veloren_voxygen/ui/graphic/mod.rs
1mod pixel_art;
2pub mod renderer;
3
4pub use renderer::{SampleStrat, Transform};
5
6use crate::{
7 render::{Renderer, Texture, UiTextureBindGroup, UiUploadBatchId},
8 ui::KeyedJobs,
9};
10use common::{figure::Segment, slowjob::SlowJobPool};
11use common_base::prof_span;
12use guillotiere::{SimpleAtlasAllocator, size2};
13use hashbrown::{HashMap, hash_map::Entry};
14use image::{DynamicImage, RgbaImage};
15use slab::Slab;
16use std::{borrow::Cow, hash::Hash, sync::Arc};
17use tracing::{error, warn};
18use vek::*;
19
20#[derive(Clone)]
21pub enum Graphic {
22 /// NOTE: The second argument is an optional border color. If this is set,
23 /// we force the image into its own texture and use the border color
24 /// whenever we sample beyond the image extent. This can be useful, for
25 /// example, for the map and minimap, which both rotate and may be
26 /// non-square (meaning if we want to display the whole map and render to a
27 /// square, we may render out of bounds unless we perform proper
28 /// clipping).
29 // TODO: probably convert this type to `RgbaImage`.
30 Image(Arc<DynamicImage>, Option<Rgba<f32>>),
31 // Note: none of the users keep this Arc currently
32 Voxel(Arc<Segment>, Transform, SampleStrat),
33 // TODO: Re-evaluate whether we need this (especially outside conrod context)
34 Blank,
35}
36
37#[derive(Clone, Copy, Debug)]
38pub enum Rotation {
39 None,
40 Cw90,
41 Cw180,
42 Cw270,
43 /// Orientation of source rectangle that always faces true north.
44 /// Simple hack to get around Conrod not having space for proper
45 /// rotation data (though it should be possible to add in other ways).
46 SourceNorth,
47 /// Orientation of target rectangle that always faces true north.
48 /// Simple hack to get around Conrod not having space for proper
49 /// rotation data (though it should be possible to add in other ways).
50 TargetNorth,
51}
52
53/// Images larger than this are stored in individual textures
54/// Fraction of the total graphic cache size
55const ATLAS_CUTOFF_FRAC: f32 = 0.2;
56/// Multiplied by current window size
57const GRAPHIC_CACHE_RELATIVE_SIZE: u32 = 1;
58
59#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
60pub struct Id(u32);
61
62// TODO these can become invalid when clearing the cache
63#[derive(PartialEq, Eq, Hash, Copy, Clone)]
64pub struct TexId(usize);
65
66enum CachedDetails {
67 Atlas {
68 // Index of the atlas this is cached in.
69 atlas_idx: usize,
70 // Whether this texture is valid.
71 valid: bool,
72 // Where in the cache texture this is.
73 aabr: Aabr<u16>,
74 },
75 Texture {
76 // Index of the (unique, non-atlas) texture this is cached in.
77 index: usize,
78 // Whether this texture is valid.
79 valid: bool,
80 },
81}
82
83impl CachedDetails {
84 /// Get information about this cache entry: texture index,
85 /// whether the entry is valid, and its bounding box in the referenced
86 /// texture.
87 fn info(
88 &self,
89 atlases: &[(SimpleAtlasAllocator, usize)],
90 textures: &Slab<(Arc<Texture>, UiTextureBindGroup, UiUploadBatchId)>,
91 ) -> (usize, bool, Aabr<u16>) {
92 match *self {
93 CachedDetails::Atlas {
94 atlas_idx,
95 valid,
96 aabr,
97 } => (atlases[atlas_idx].1, valid, aabr),
98 CachedDetails::Texture { index, valid } => {
99 (index, valid, Aabr {
100 min: Vec2::zero(),
101 // NOTE (as cast): We don't accept images larger than u16::MAX (rejected in
102 // `cache_res`) (and probably would not be able to create a texture this
103 // large).
104 //
105 // Note texture should always match the cached dimensions.
106 max: textures[index].0.get_dimensions().xy().map(|e| e as u16),
107 })
108 },
109 }
110 }
111
112 /// Invalidate this cache entry.
113 fn invalidate(&mut self) {
114 match self {
115 Self::Atlas { ref mut valid, .. } => {
116 *valid = false;
117 },
118 Self::Texture { ref mut valid, .. } => {
119 *valid = false;
120 },
121 }
122 }
123
124 fn set_valid(&mut self) {
125 match self {
126 Self::Atlas { ref mut valid, .. } => {
127 *valid = true;
128 },
129 Self::Texture { ref mut valid, .. } => {
130 *valid = true;
131 },
132 }
133 }
134}
135
136/// Requirements that a particular graphic has with respect to the atlas
137/// allocation or independent texture it will be stored in.
138///
139/// If this matches between an old graphic and a new one which is replacing it,
140/// we can reuse any of the corresponding locations where it is cached in
141/// textures on the GPU. That is we can invalidate such textures and upload the
142/// new graphic there, rather than needing to allocate a new texture (or new
143/// location in an atlas).
144#[derive(PartialEq)]
145enum TextureRequirements {
146 /// These are uploaded to the GPU in the original resolution of the image
147 /// supplied by the `Graphic` and any scaling is done during sampling in
148 /// the UI fragment shader.
149 Fixed {
150 size: Vec2<u16>,
151 /// Graphics with a border color specified are placed into their own
152 /// individual textures so that the border color can be set
153 /// there. (Note: this is partially a theoretical description as
154 /// border color options are limited in the current graphics API).
155 border_color: Option<Rgba<f32>>,
156 },
157 /// These are rasterized to the exact resolution that they will be displayed
158 /// at and then uploaded to the GPU. This corresponds to
159 /// `Graphic::Voxel`. There may be multiple copies on the GPU if
160 /// different resolutions are requested.
161 ///
162 /// It is expected that the requested sizes will generally not differ when
163 /// switching out a graphic. Thus, dependent cached depdendent should
164 /// always be invalidated since those cached locations will be reusable
165 /// if the requested size is the same.
166 Dependent,
167}
168
169/// These solely determine how a place in an atlas will be found or how a
170/// texture will be created to place the image for a graphic.
171struct TextureParameters {
172 size: Vec2<u16>,
173 border_color: Option<Rgba<f32>>,
174}
175
176/// Key used to refer to an instance of a graphic that has been uploaded to the
177/// GPU.
178#[derive(Clone, Copy, PartialEq, Eq, Hash)]
179struct CacheKey {
180 graphic_id: Id,
181 /// This is `Some` for `TextureRequirements::Dependent`.
182 size: Option<Vec2<u16>>,
183}
184
185impl TextureRequirements {
186 fn from_graphic(graphic: &Graphic) -> Option<Self> {
187 match graphic {
188 Graphic::Image(image, border_color) => {
189 // Image sizes over u16::MAX are not supported (and we would probably not be
190 // able to create a texture large enough to hold them on the GPU anyway)!
191 let image_dims = match (u16::try_from(image.width()), u16::try_from(image.height()))
192 {
193 (Ok(x), Ok(y)) if x != 0 && y != 0 => Vec2::new(x, y),
194 _ => {
195 error!(
196 "Image dimensions greater than u16::MAX are not supported! Supplied \
197 image size: ({}, {}).",
198 image.width(),
199 image.height(),
200 );
201 // TODO: reasonable to return None on this error case? We could potentially
202 // validate images sizes on add_graphic/replace_graphic?
203 return None;
204 },
205 };
206
207 Some(Self::Fixed {
208 size: image_dims,
209 border_color: *border_color,
210 })
211 },
212 Graphic::Voxel(_, _, _) => Some(Self::Dependent),
213 Graphic::Blank => None,
214 }
215 }
216
217 #[expect(clippy::wrong_self_convention)] // type is spiritually Copy
218 fn to_key_and_tex_parameters(
219 self,
220 graphic_id: Id,
221 requested_size: Vec2<u16>,
222 ) -> (CacheKey, TextureParameters) {
223 // NOTE: Any external parameters which influence the value of the returned
224 // `TextureParameters` must be included in the `CacheKey`. Otherwise,
225 // invalidation and subsequent re-use of cache locations based on the
226 // value of `self` would be wrong.
227 let (size, border_color, key_size) = match self {
228 Self::Fixed { size, border_color } => (size, border_color, None),
229 Self::Dependent => (requested_size, None, Some(requested_size)),
230 };
231 (
232 CacheKey {
233 graphic_id,
234 size: key_size,
235 },
236 TextureParameters { size, border_color },
237 )
238 }
239}
240
241// Caches graphics, only deallocates when changing screen resolution (completely
242// cleared)
243pub struct GraphicCache {
244 // TODO replace with slotmap
245 graphic_map: HashMap<Id, Graphic>,
246 /// Next id to use when a new graphic is added
247 next_id: u32,
248
249 /// Atlases with the index of their texture in the textures slab.
250 atlases: Vec<(SimpleAtlasAllocator, usize)>,
251 /// Third tuple element is a list of pending premultiply + upload operations
252 /// for this frame. The purpose of this is to collect all the operations
253 /// together so that a single renderpass is performed for each target
254 /// texture.
255 textures: Slab<(Arc<Texture>, UiTextureBindGroup, UiUploadBatchId)>,
256 /// The location and details of graphics cached on the GPU.
257 ///
258 /// Graphic::Voxel images include the dimensions they were rasterized at in
259 /// the key. Other images are scaled as part of sampling them on the
260 /// GPU.
261 cache_map: HashMap<CacheKey, CachedDetails>,
262
263 keyed_jobs: KeyedJobs<CacheKey, RgbaImage>,
264}
265
266impl GraphicCache {
267 pub fn new(renderer: &mut Renderer) -> Self {
268 let (atlas, (tex, bind)) = create_atlas_texture(renderer);
269
270 let mut textures = Slab::new();
271 let tex_id = textures.insert((tex, bind, UiUploadBatchId::default()));
272
273 Self {
274 graphic_map: HashMap::default(),
275 next_id: 0,
276 atlases: vec![(atlas, tex_id)],
277 textures,
278 cache_map: HashMap::default(),
279 keyed_jobs: KeyedJobs::new("IMAGE_PROCESSING"),
280 }
281 }
282
283 pub fn add_graphic(&mut self, graphic: Graphic) -> Id {
284 let id = self.next_id;
285 self.next_id = id.wrapping_add(1);
286
287 let id = Id(id);
288 self.graphic_map.insert(id, graphic);
289
290 id
291 }
292
293 pub fn replace_graphic(&mut self, id: Id, graphic: Graphic) {
294 let (old, new) = match self.graphic_map.entry(id) {
295 Entry::Occupied(o) => {
296 let slot_mut = o.into_mut();
297 let old = core::mem::replace(slot_mut, graphic);
298 (old, slot_mut)
299 },
300 Entry::Vacant(v) => {
301 // This was not an update, so no need to cleanup caches.
302 v.insert(graphic);
303 return;
304 },
305 };
306
307 let old_requirements = TextureRequirements::from_graphic(&old);
308 let new_requirements = TextureRequirements::from_graphic(new);
309 let should_invalidate = old_requirements == new_requirements && old_requirements.is_some();
310
311 // Invalidate if possible or remove from caches.
312 // Maybe make this more efficient if replace graphic is used more often
313 // (especially since we should know the exact key for non-voxel
314 // graphics).
315 //
316 // NOTE: at the time of writing, replace_graphic is only used for voxel minimap
317 // updates and item image reloading.
318 if should_invalidate {
319 self.cache_map.iter_mut().for_each(|(key, details)| {
320 if key.graphic_id == id {
321 details.invalidate();
322 }
323 });
324 } else {
325 let _ = self.cache_map.extract_if(|key, details| {
326 if key.graphic_id == id {
327 match details {
328 // NOTE: if replace_graphic is used continously for small images (i.e.
329 // images placed into an atlas) of different sizes, that can use up our
330 // atlas space since spots in the atlas can't be reused. (this scenario is
331 // now possible with scaling being done during sampling rather than placing
332 // resized version into the atlas). This is expected to not occur in all
333 // pratical cases we plan to support here (i.e. the size of the replacement
334 // image will always be the same).
335 CachedDetails::Atlas { .. } => {},
336 CachedDetails::Texture { index, .. } => {
337 self.textures.remove(*index);
338 },
339 };
340 true
341 } else {
342 false
343 }
344 });
345 }
346 }
347
348 pub fn get_graphic(&self, id: Id) -> Option<&Graphic> { self.graphic_map.get(&id) }
349
350 /// Used to acquire textures for rendering
351 pub fn get_tex(&self, id: TexId) -> (&Texture, &UiTextureBindGroup) {
352 let (tex, bind, _upload_batch) = self.textures.get(id.0).expect("Invalid TexId used");
353 (tex, bind)
354 }
355
356 pub fn get_graphic_dims(&self, (id, rot): (Id, Rotation)) -> Option<(u32, u32)> {
357 use image::GenericImageView;
358 self.get_graphic(id)
359 .and_then(|graphic| match graphic {
360 Graphic::Image(image, _) => Some(image.dimensions()),
361 Graphic::Voxel(segment, _, _) => {
362 use common::vol::SizedVol;
363 let size = segment.size();
364 // TODO: HACK because they can be rotated arbitrarily, remove
365 // (and they can be rasterized at arbitrary resolution)
366 // (might need to return None here?)
367 Some((size.x, size.z))
368 },
369 Graphic::Blank => None,
370 })
371 .and_then(|(w, h)| match rot {
372 Rotation::None | Rotation::Cw180 => Some((w, h)),
373 Rotation::Cw90 | Rotation::Cw270 => Some((h, w)),
374 // TODO: need dims for these?
375 Rotation::SourceNorth | Rotation::TargetNorth => None,
376 })
377 }
378
379 pub fn clear_cache(&mut self, renderer: &mut Renderer) {
380 self.cache_map.clear();
381
382 let (atlas, (tex, bind)) = create_atlas_texture(renderer);
383 let mut textures = Slab::new();
384 let tex_id = textures.insert((tex, bind, UiUploadBatchId::default()));
385 self.atlases = vec![(atlas, tex_id)];
386 self.textures = textures;
387 }
388
389 /// Source rectangle should be from 0 to 1, and represents a bounding box
390 /// for the source image of the graphic.
391 ///
392 /// # Panics
393 ///
394 /// Panics if one of the lengths in requested_dims is zero.
395 pub fn cache_res(
396 &mut self,
397 renderer: &mut Renderer,
398 pool: Option<&SlowJobPool>,
399 graphic_id: Id,
400 // TODO: if we aren't resizing here we can potentially upload the image earlier... (as long
401 // as this doesn't lead to uploading too much unused stuff). (currently not sure whether it
402 // would be an overall gain to pursue this.)
403 requested_dims: Vec2<u16>,
404 source: Aabr<f64>,
405 rotation: Rotation,
406 ) -> Option<((Aabr<f64>, Vec2<f32>), TexId)> {
407 assert!(requested_dims.map(|e| e != 0).reduce_and());
408 let requested_dims_upright = match rotation {
409 // The image is stored on the GPU with no rotation, so we need to swap the dimensions
410 // here to get the resolution that the image will be displayed at but re-oriented into
411 // the "upright" space that the image is stored in and sampled from (this can be bit
412 // confusing initially / hard to explain).
413 Rotation::Cw90 | Rotation::Cw270 => requested_dims.yx(),
414 Rotation::None | Rotation::Cw180 => requested_dims,
415 Rotation::SourceNorth => requested_dims,
416 Rotation::TargetNorth => requested_dims,
417 };
418
419 // Rotate aabr according to requested rotation.
420 let rotated_aabr = |Aabr { min, max }| match rotation {
421 Rotation::None | Rotation::SourceNorth | Rotation::TargetNorth => Aabr { min, max },
422 Rotation::Cw90 => Aabr {
423 min: Vec2::new(min.x, max.y),
424 max: Vec2::new(max.x, min.y),
425 },
426 Rotation::Cw180 => Aabr { min: max, max: min },
427 Rotation::Cw270 => Aabr {
428 min: Vec2::new(max.x, min.y),
429 max: Vec2::new(min.x, max.y),
430 },
431 };
432 // Scale aabr according to provided source rectangle.
433 let scaled_aabr = |aabr: Aabr<_>| {
434 let size: Vec2<f64> = aabr.size().into();
435 Aabr {
436 min: size.mul_add(source.min, aabr.min),
437 max: size.mul_add(source.max, aabr.min),
438 }
439 };
440 // Apply all transformations.
441 // TODO: Verify rotation is being applied correctly.
442 let transformed_aabr_and_scale = |aabr| {
443 let scaled = scaled_aabr(aabr);
444 // Calculate how many displayed pixels there are for each pixel in the source
445 // image. We need this to calculate where to sample in the shader to
446 // retain crisp pixel borders when scaling the image.
447 let scale = requested_dims_upright.map2(
448 Vec2::from(scaled.size()),
449 |screen_pixels, sample_pixels: f64| screen_pixels as f32 / sample_pixels as f32,
450 );
451 let transformed = rotated_aabr(scaled);
452 (transformed, scale)
453 };
454
455 let Self {
456 textures,
457 atlases,
458 cache_map,
459 graphic_map,
460 ..
461 } = self;
462
463 let graphic = match graphic_map.get(&graphic_id) {
464 Some(g) => g,
465 None => {
466 warn!(
467 ?graphic_id,
468 "A graphic was requested via an id which is not in use"
469 );
470 return None;
471 },
472 };
473
474 let requirements = TextureRequirements::from_graphic(graphic)?;
475 let (key, texture_parameters) =
476 requirements.to_key_and_tex_parameters(graphic_id, requested_dims_upright);
477
478 let details = match cache_map.entry(key) {
479 Entry::Occupied(mut details) => {
480 let details = details.get_mut();
481 let (idx, valid, aabr) = details.info(atlases, textures);
482
483 // Check if the cached version has been invalidated by replacing the underlying
484 // graphic
485 if !valid {
486 // Create image
487 let (image, gpu_premul) = prepare_graphic(
488 graphic,
489 key,
490 requested_dims_upright,
491 &mut self.keyed_jobs,
492 pool,
493 )?;
494 // Ensure we don't have any bugs causing the size used to determine if the
495 // cached version is reusable to not match the size of the image produced by
496 // prepare_graphic.
497 assert_eq!(
498 image.dimensions(),
499 texture_parameters.size.map(u32::from).into_tuple()
500 );
501 // Transfer to the gpu
502 let (ref texture, _, ref mut upload_batch) = &mut textures[idx];
503 upload_image(renderer, texture, upload_batch, &image, aabr, gpu_premul);
504 details.set_valid();
505 }
506
507 return Some((
508 transformed_aabr_and_scale(aabr.map(|e| e as f64)),
509 TexId(idx),
510 ));
511 },
512 Entry::Vacant(details) => details,
513 };
514
515 // Construct image in an optional threadpool.
516 let (image, gpu_premul) = prepare_graphic(
517 graphic,
518 key,
519 requested_dims_upright,
520 &mut self.keyed_jobs,
521 pool,
522 )?;
523 // Assert dimensions of image from `prepare_graphic` are as expected!
524 assert_eq!(
525 image.dimensions(),
526 texture_parameters.size.map(u32::from).into_tuple()
527 );
528 // Image dimensions in the format used by the allocator crate.
529 let image_dims_size2d = size2(
530 i32::from(texture_parameters.size.x),
531 i32::from(texture_parameters.size.y),
532 );
533
534 // Now we allocate space on the gpu (either in an atlas or an independent
535 // texture) and upload the image to that location.
536
537 let atlas_size = atlas_size(renderer);
538 // Graphics that request a border color or which are over a particular size
539 // compared to the atlas size are sent to their own textures.
540 let can_place_in_atlas = texture_parameters.border_color.is_none()
541 && atlas_size
542 .map2(texture_parameters.size, |a, d| {
543 a as f32 * ATLAS_CUTOFF_FRAC >= d as f32
544 })
545 .reduce_and();
546 let location = if can_place_in_atlas {
547 // Fit into an atlas
548 let mut loc = None;
549 for (atlas_idx, &mut (ref mut atlas, texture_idx)) in atlases.iter_mut().enumerate() {
550 if let Some(rectangle) = atlas.allocate(image_dims_size2d) {
551 let aabr = aabr_from_alloc_rect(rectangle);
552 loc = Some(CachedDetails::Atlas {
553 atlas_idx,
554 valid: true,
555 aabr,
556 });
557 let (ref texture, _, ref mut upload_batch) = &mut textures[texture_idx];
558 upload_image(renderer, texture, upload_batch, &image, aabr, gpu_premul);
559 break;
560 }
561 }
562
563 match loc {
564 Some(loc) => loc,
565 // Create a new atlas
566 None => {
567 let (mut atlas, (tex, bind)) = create_atlas_texture(renderer);
568 let aabr = atlas
569 .allocate(image_dims_size2d)
570 .map(aabr_from_alloc_rect)
571 .unwrap();
572 // NOTE: All mutations happen only after the texture creation succeeds!
573 let tex_idx = textures.insert((tex, bind, UiUploadBatchId::default()));
574 let atlas_idx = atlases.len();
575 atlases.push((atlas, tex_idx));
576 let (ref texture, _, ref mut upload_batch) = &mut textures[tex_idx];
577 upload_image(renderer, texture, upload_batch, &image, aabr, gpu_premul);
578 CachedDetails::Atlas {
579 atlas_idx,
580 valid: true,
581 aabr,
582 }
583 },
584 }
585 } else {
586 // Create a texture just for this
587 let (tex, bind, upload_batch) =
588 create_image(renderer, &image, texture_parameters, gpu_premul);
589 // NOTE: All mutations happen only after the texture creation and upload
590 // initiation succeeds! (completing the upload does not have any
591 // failure cases afaik)
592 let index = textures.insert((tex, bind, upload_batch));
593 CachedDetails::Texture { index, valid: true }
594 };
595
596 // Extract information from cache entry.
597 let (idx, _, aabr) = location.info(atlases, textures);
598
599 // Insert into cached map
600 details.insert(location);
601
602 Some((
603 transformed_aabr_and_scale(aabr.map(|e| e as f64)),
604 TexId(idx),
605 ))
606 }
607}
608
609/// Prepare the graphic into the form that will be uploaded to the GPU.
610///
611/// For voxel graphics, draws the graphic at the specified dimensions.
612///
613/// Alpha premultiplication is necessary so that images so they can be linearly
614/// filtered on the GPU. Premultiplication can either occur here or on the GPU
615/// depending on the size of the image and other factors. If premultiplication
616/// on the GPU is needed the returned bool will be `true`.
617fn prepare_graphic<'graphic>(
618 graphic: &'graphic Graphic,
619 cache_key: CacheKey,
620 dims: Vec2<u16>,
621 keyed_jobs: &mut KeyedJobs<CacheKey, RgbaImage>,
622 pool: Option<&SlowJobPool>,
623) -> Option<(Cow<'graphic, RgbaImage>, bool)> {
624 prof_span!("prepare_graphic");
625 match graphic {
626 Graphic::Blank => None,
627 Graphic::Image(image, _border_color) => {
628 // Image will be rescaled when sampling from it on the GPU so we don't
629 // need to resize it here.
630 //
631 // TODO: We could potentially push premultiplication even earlier (e.g. to the
632 // time of loading images or packaging veloren for distribution).
633 let mut rgba_cow = image.as_rgba8().map_or_else(
634 || {
635 // TODO: we may want to require loading in as the rgba8 format so we don't have
636 // to perform conversion here. On the other hand, we can take advantage of
637 // certain formats to know that alpha premultiplication doesn't need to be
638 // performed (but we would probably just want to store that with the loaded
639 // rgba8 format).
640 Cow::Owned(image.to_rgba8())
641 },
642 Cow::Borrowed,
643 );
644 // NOTE: We do premultiplication on the main thread since if it would be
645 // expensive enough to do in the background we would just do it on
646 // the GPU. Could still use `rayon` to parallelize this work, if
647 // needed.
648 let premultiply_strategy = PremultiplyStrategy::determine(&rgba_cow);
649 let needs_gpu_premultiply = match premultiply_strategy {
650 PremultiplyStrategy::UseGpu => true,
651 PremultiplyStrategy::NotNeeded => false,
652 PremultiplyStrategy::UseCpu => {
653 // NOTE: to_mut will clone the image if it was Cow::Borrowed
654 premultiply_alpha(rgba_cow.to_mut());
655 false
656 },
657 };
658
659 Some((rgba_cow, needs_gpu_premultiply))
660 },
661 Graphic::Voxel(segment, trans, sample_strat) => keyed_jobs
662 .spawn(pool, cache_key, || {
663 let segment = Arc::clone(segment);
664 let (trans, sample_strat) = (*trans, *sample_strat);
665 move |_| {
666 // TODO: for now we always use CPU premultiplication for these, may want to
667 // re-evaluate this after zoomy worldgen branch is merged (and it is more clear
668 // when these jobs go to the background thread pool or not).
669
670 // Render voxel model at requested resolution
671 let mut image = renderer::draw_vox(&segment, dims, trans, sample_strat);
672 premultiply_alpha(&mut image);
673 image
674 }
675 })
676 .map(|(_, v)| (Cow::Owned(v), false)),
677 }
678}
679
680fn atlas_size(renderer: &Renderer) -> Vec2<u32> {
681 let max_texture_size = renderer.max_texture_size();
682
683 renderer
684 .resolution()
685 .map(|e| (e * GRAPHIC_CACHE_RELATIVE_SIZE).clamp(512, max_texture_size))
686}
687
688/// This creates a texture suitable for sampling from during the UI pass and
689/// rendering too during alpha premultiplication upload passes.
690fn create_image_texture(
691 renderer: &mut Renderer,
692 size: Vec2<u32>,
693 address_mode: Option<wgpu::AddressMode>,
694) -> (Arc<Texture>, UiTextureBindGroup) {
695 // TODO: Right now we have to manually clear images to workaround AMD DX bug,
696 // for this we use Queue::write_texture which needs this usage. I think this
697 // may be fixed in newer wgpu versions that auto-clear the texture.
698 let workaround_usage = wgpu::TextureUsages::COPY_DST;
699 let tex_info = wgpu::TextureDescriptor {
700 label: None,
701 size: wgpu::Extent3d {
702 width: size.x,
703 height: size.y,
704 depth_or_array_layers: 1,
705 },
706 mip_level_count: 1,
707 sample_count: 1,
708 dimension: wgpu::TextureDimension::D2,
709 format: wgpu::TextureFormat::Rgba8UnormSrgb,
710 usage: wgpu::TextureUsages::RENDER_ATTACHMENT // GPU premultiply
711 | wgpu::TextureUsages::COPY_DST // CPU premultiply
712 | wgpu::TextureUsages::TEXTURE_BINDING // using image in ui rendering
713 | workaround_usage,
714 view_formats: &[],
715 };
716 let view_info = wgpu::TextureViewDescriptor {
717 format: Some(tex_info.format),
718 dimension: Some(wgpu::TextureViewDimension::D2),
719 ..Default::default()
720 };
721 let address_mode = address_mode.unwrap_or(wgpu::AddressMode::ClampToEdge);
722 let sampler_info = wgpu::SamplerDescriptor {
723 address_mode_u: address_mode,
724 address_mode_v: address_mode,
725 mag_filter: wgpu::FilterMode::Linear,
726 min_filter: wgpu::FilterMode::Linear,
727 ..Default::default()
728 };
729 let tex = renderer.create_texture_raw(&tex_info, &view_info, &sampler_info);
730 let bind = renderer.ui_bind_texture(&tex);
731 (Arc::new(tex), bind)
732}
733
734fn create_atlas_texture(
735 renderer: &mut Renderer,
736) -> (SimpleAtlasAllocator, (Arc<Texture>, UiTextureBindGroup)) {
737 let size = atlas_size(renderer);
738 // Note: here we assume the max texture size is under i32::MAX.
739 let atlas = SimpleAtlasAllocator::new(size2(size.x as i32, size.y as i32));
740 let (tex, bind) = create_image_texture(renderer, size, None);
741 (atlas, (tex, bind))
742}
743
744fn aabr_from_alloc_rect(rect: guillotiere::Rectangle) -> Aabr<u16> {
745 let (min, max) = (rect.min, rect.max);
746 // Note: here we assume the max texture size (and thus the maximum size of the
747 // atlas) is under `u16::MAX`.
748 Aabr {
749 min: Vec2::new(min.x as u16, min.y as u16),
750 max: Vec2::new(max.x as u16, max.y as u16),
751 }
752}
753
754fn upload_image(
755 renderer: &mut Renderer,
756 target_texture: &Arc<Texture>,
757 upload_batch: &mut UiUploadBatchId,
758 image: &RgbaImage,
759 aabr: Aabr<u16>,
760 premultiply_on_gpu: bool,
761) {
762 // Check that this image and the target aabr are the same size (otherwise there
763 // is a bug in this module).
764 debug_assert_eq!(aabr.map(u32::from).size().into_tuple(), image.dimensions());
765 if premultiply_on_gpu {
766 *upload_batch =
767 renderer.ui_premultiply_upload(target_texture, *upload_batch, image, aabr.min);
768 } else {
769 let aabr = aabr.map(u32::from);
770 let offset = aabr.min.into_array();
771 let size = aabr.size().into_array();
772 // upload directly
773 renderer.update_texture(
774 target_texture,
775 offset,
776 size,
777 // NOTE: Rgba texture, so each pixel is 4 bytes, ergo this cannot fail.
778 // We make the cast parameters explicit for clarity.
779 bytemuck::cast_slice::<u8, [u8; 4]>(
780 &(&**image)[..size[0] as usize * size[1] as usize * 4],
781 ),
782 )
783 }
784}
785
786// This is used for border_color.is_some() images (ie the map image).
787fn create_image(
788 renderer: &mut Renderer,
789 image: &RgbaImage,
790 texture_parameters: TextureParameters,
791 premultiply_on_gpu: bool,
792) -> (Arc<Texture>, UiTextureBindGroup, UiUploadBatchId) {
793 let (tex, bind) = create_image_texture(
794 renderer,
795 texture_parameters.size.map(u32::from),
796 texture_parameters
797 .border_color
798 // TODO: either use the desktop only border color or just emulate this
799 //.map(|c| c.into_array().into()),
800 .map(|_| wgpu::AddressMode::ClampToBorder),
801 );
802 let mut upload_batch = UiUploadBatchId::default();
803 let aabr = Aabr {
804 min: Vec2::zero(),
805 max: texture_parameters.size,
806 };
807 upload_image(
808 renderer,
809 &tex,
810 &mut upload_batch,
811 image,
812 aabr,
813 premultiply_on_gpu,
814 );
815 (tex, bind, upload_batch)
816}
817
818// CPU-side alpha premultiplication implementation.
819
820pub struct PremultiplyLookupTable {
821 alpha: [u16; 256],
822 // This is for both colors that are always below the linear transform threshold (of the
823 // transform between linear/non-linear srgb) and colors that start above the threshold when
824 // transforming into linear srgb and then fall below it after being multiplied by alpha (before
825 // being transformed out of linear srgb).
826 color: [u16; 256],
827}
828
829impl Default for PremultiplyLookupTable {
830 fn default() -> Self {
831 #[rustfmt::skip]
832 fn accurate_to_linear(c: u8) -> f32 {
833 let c = c as f32 / 255.0;
834 // https://en.wikipedia.org/wiki/SRGB#Transformation
835 if c <= 0.04045 {
836 c / 12.92
837 } else {
838 // 0.055 ~= 14
839 ((c + 0.055) / 1.055).powf(2.4)
840 }
841 }
842
843 use core::array;
844 let alpha = array::from_fn(|alpha| {
845 // NOTE: u16::MAX + 1 here relies on the max alpha being short-circuited (and
846 // not using this table). We multiply by this factor since it is a
847 // power of 2, which means later demultiplying it will optimize to a
848 // bitshift.
849 (((alpha as f32 / 255.0).powf(1.0 / 2.4) * (u16::MAX as f32 + 1.0)) + 0.5) as u16
850 });
851 let color = array::from_fn(|color| {
852 (if color <= 10 {
853 // <= 10 means the transform is linear!
854 color as f32 / 255.0
855 } else {
856 // Here the transform into linear srgb isn't linear but the transform out of it is.
857 //
858 // This is transform into and out of linear srgb with the theoretical alpha
859 // multiplication factored out.
860 accurate_to_linear(color as u8) * 12.92
861 }
862 // take advantage of the precision offered by u16
863 * (1 << 13) as f32
864 // round to the nearest integer when the cast truncates
865 + 0.5) as u16
866 });
867 Self { alpha, color }
868 }
869}
870
871fn premultiply_alpha(image: &mut RgbaImage) {
872 lazy_static::lazy_static! {
873 static ref LOOKUP: PremultiplyLookupTable = Default::default();
874 }
875 let lookup = &*LOOKUP;
876 // TODO: Apparently it is possible for ImageBuffer raw vec to have more pixels
877 // than the dimensions of the actual image (I don't think we actually have
878 // this occuring but we should probably fix other spots that use the raw
879 // buffer). See:
880 // https://github.com/image-rs/image/blob/a1ce569afd476e881acafdf9e7a5bce294d0db9a/src/buffer.rs#L664
881 let dims = image.dimensions();
882 let image_buffer_len = dims.0 as usize * dims.1 as usize * 4;
883 let (arrays, end) = (&mut **image)[..image_buffer_len].as_chunks_mut::<{ 4 * 4 }>();
884 // Rgba8 has 4 bytes per pixel there should be no remainder when dividing by 4.
885 let (end, _) = end.as_chunks_mut::<4>();
886 end.iter_mut().for_each(|pixel| {
887 let alpha = pixel[3];
888 if alpha == 0 {
889 *pixel = [0; 4];
890 return;
891 } else if alpha == 255 {
892 return;
893 };
894
895 for color in &mut pixel[..3] {
896 let predicted = ((lookup.alpha[alpha as usize] as u32) * (*color as u32 + 14) + 32433)
897 / (u16::MAX as u32 + 1);
898 let multiplied_color = (if predicted < 9 + 14 {
899 (lookup.color[*color as usize] as u32 * alpha as u32 + 4096) >> 13
900 } else {
901 predicted - 14
902 }) as u8;
903 *color = multiplied_color;
904 }
905 });
906 arrays.iter_mut().for_each(|pixelx4| {
907 // Short-circuit for alpha == 0 or 255
908 // This adds ~7 us (worst case) for a 256x256 image.
909 // Best case is decreased to 20 us total time.
910 if pixelx4[3] == pixelx4[7] && pixelx4[3] == pixelx4[11] && pixelx4[3] == pixelx4[15] {
911 if pixelx4[3] == 0 {
912 *pixelx4 = [0; 16];
913 return;
914 } else if pixelx4[3] == u8::MAX {
915 return;
916 }
917 }
918
919 // Lookup transformed alpha values for each pixel first.
920 // Putting this here seems to make things slightly faster.
921 let factors = [
922 lookup.alpha[pixelx4[3] as usize],
923 lookup.alpha[pixelx4[7] as usize],
924 lookup.alpha[pixelx4[11] as usize],
925 lookup.alpha[pixelx4[15] as usize],
926 ];
927 for pixel_index in 0..4 {
928 let alpha_factor = factors[pixel_index];
929 let alpha = pixelx4[pixel_index * 4 + 3];
930 // Putting this code outside the loop makes things take ~25% less time.
931 let color_factors = [
932 lookup.color[pixelx4[pixel_index * 4 + 0] as usize] as u32 * alpha as u32 + 4096,
933 lookup.color[pixelx4[pixel_index * 4 + 1] as usize] as u32 * alpha as u32 + 4096,
934 lookup.color[pixelx4[pixel_index * 4 + 2] as usize] as u32 * alpha as u32 + 4096,
935 ];
936 for i in 0..3 {
937 let color = &mut pixelx4[pixel_index * 4 + i];
938 // Loosely based on transform to linear and back (above threshold) (this is
939 // where use of 14 comes from).
940 // `32433` selected via trial and error to reduce the number of mismatches.
941 // `/ (u16::MAX as u32 + 1)` transforms back to `u8` precision (we add 1 so it
942 // will be a division by a power of 2 which optimizes well).
943 let predicted =
944 ((alpha_factor as u32) * (*color as u32 + 14) + 32328) / (u16::MAX as u32 + 1);
945 let multiplied_color = (if predicted < 9 + 14 {
946 // Here we handle two cases:
947 // 1. When the transform starts and ends as linear.
948 // 2. When the color is over the linear threshold for the transform into linear
949 // space but below this threshold when transforming back out (due to being
950 // multiplied with a small alpha).
951 // (in both cases the result is linearly related to alpha and we can encode how
952 // it is related to the color in a lookup table)
953 // NOTE: 212 is the largest color value used here (when alpha isn't 0)
954 color_factors[i] >> 13
955 } else {
956 predicted - 14
957 }) as u8;
958 *color = multiplied_color;
959 }
960 }
961 });
962}
963
964/// Strategy for how alpha premultiplication will be applied to an image.
965enum PremultiplyStrategy {
966 UseCpu,
967 UseGpu,
968 // Image is fully opaque.
969 NotNeeded,
970}
971
972impl PremultiplyStrategy {
973 #[rustfmt::skip] // please don't format comment with 'ns/pixel' to a separate line from the value
974 fn determine(image: &RgbaImage) -> Self {
975 // TODO: Would be useful to re-time this after a wgpu update.
976 //
977 // Thresholds below are based on the timing measurements of the CPU based premultiplication
978 // vs ovehead of interacting with the GPU API to perform premultiplication on the GPU.
979 // These timings are quite circumstantial and could vary between machines, wgpu updates,
980 // and changes to the structure of the GPU based path.
981 //
982 // GPU path costs (For calculations I used `57.6 us` as a roughly reasonable estimate of
983 // total time here but that can vary lower and higher. Everything is a bit imprecise here
984 // so I won't list individual timings. The key takeaway is that this can be made more
985 // efficient by avoidiing the create/drop of a texture, texture view, and bind group for
986 // each image. Also, if we didn't need a separate render pass for each target image that
987 // would be helpful as well. Using compute passes and passing data in as a raw buffer may
988 // help with both of these but initial attempts with that ran into issues (e.g. when we get
989 // the ability to have non-srgb views of srgb textures that will be useful)):
990 // * create/drop texture
991 // * create/drop texture view
992 // * create/drop bind group
993 // * run render pass (NOTE: if many images are processed at once with the same target
994 // texture this portion of the cost can be split between them)
995 //
996 // CPU path costs:
997 // * clone image (0.17 ns/pixel (benchmark) - 0.73 ns/pixel (in voxygen))
998 // * run premultiplication (0.305 ns/pixel (when shortcircuits are always hit) -
999 // 3.81 ns/pixel (with random alpha))
1000 //
1001 // Shared costs include:
1002 // * write_texture
1003 // * (optional) check for fraction of shortcircuit blocks in image (0.223 ns/pixel)
1004 //
1005 // `ALWAYS_CPU_THRESHOLD` is roughly:
1006 // ("cost of GPU path" + "shortcircuit count cost") / "worst case cost of CPU path per pixel"
1007 //
1008 // `ALWAYS_GPU_THRESHOLD` is NOT: "cost of GPU path" / "best case cost of CPU path per pixel"
1009 // since the cost of checking for whether the CPU path is better at this quantity of pixels
1010 // becomes more than the on the amount of overhead we are willing to add to the worst case
1011 // scenario where we run the short-circuit count check and end up using the GPU path. The
1012 // currently selected value of 200x200 adds at most about ~20% of the cost of the GPU path.
1013 // (TODO: maybe we could have the check bail out early if the results aren't looking
1014 // favorable for the CPU path and/or sample a random subset of the pixels).
1015 //
1016 // `CHECKED_THRESHOLD` is roughly: "cost of GPU path / "best case cost of CPU path per pixel"
1017 const ALWAYS_CPU_THRESHOLD: usize = 120 * 120;
1018 const ALWAYS_GPU_THRESHOLD: usize = 200 * 200;
1019 const CHECKED_THRESHOLD: usize = 240 * 240;
1020
1021 let dims = image.dimensions();
1022 let pixel_count = dims.0 as usize * dims.1 as usize;
1023 if pixel_count <= ALWAYS_CPU_THRESHOLD {
1024 Self::UseCpu
1025 } else if pixel_count > ALWAYS_GPU_THRESHOLD {
1026 Self::UseGpu
1027 } else if let Some(fraction) = fraction_shortcircuit_blocks(image) {
1028 // This seems correct...?
1029 // TODO: I think we technically can exit the fraction checking early if we know the
1030 // total fraction value will be over: (threshold - ALWAYS_CPU_THRESHOLD) /
1031 // (CHECKED_THRESHOLD - ALWAYS_CPU_THRESHOLD).
1032 let threshold = fraction * CHECKED_THRESHOLD as f32
1033 + (1.0 - fraction) * ALWAYS_CPU_THRESHOLD as f32;
1034 if pixel_count as f32 <= threshold {
1035 Self::UseCpu
1036 } else {
1037 Self::UseGpu
1038 }
1039 } else {
1040 Self::NotNeeded
1041 }
1042 }
1043}
1044
1045/// Useful to estimates cost of premultiplying alpha in the provided image via
1046/// the CPU method.
1047///
1048/// Computes the fraction of 4 pixel chunks that are fully translucent or
1049/// opaque. Returns `None` if no premultiplication is needed (i.e. all alpha
1050/// values are 255).
1051fn fraction_shortcircuit_blocks(image: &RgbaImage) -> Option<f32> {
1052 let dims = image.dimensions();
1053 let pixel_count = dims.0 as usize * dims.1 as usize;
1054 let (arrays, end) = (&**image)[..pixel_count * 4].as_chunks::<{ 4 * 4 }>();
1055
1056 // Rgba8 has 4 bytes per pixel there should be no remainder when dividing by 4.
1057 let (end, _) = end.as_chunks::<4>();
1058 let end_is_opaque = end.iter().all(|pixel| pixel[3] == 255);
1059
1060 // 14.6 us for 256x256 image
1061 let num_chunks = arrays.len();
1062 let mut num_translucent = 0;
1063 let mut num_opaque = 0;
1064 arrays.iter().for_each(|pixelx4| {
1065 let v = u128::from_ne_bytes(*pixelx4);
1066 let alpha_mask = 0x000000FF_000000FF_000000FF_000000FF;
1067 let masked = v & alpha_mask;
1068 if masked == 0 {
1069 num_translucent += 1;
1070 } else if masked == alpha_mask {
1071 num_opaque += 1;
1072 }
1073 });
1074
1075 if num_chunks == num_opaque && num_translucent == 0 && end_is_opaque {
1076 None
1077 } else {
1078 Some((num_translucent as f32 + num_opaque as f32) / num_chunks as f32)
1079 }
1080}