veloren_voxygen/ui/graphic/
mod.rs

1mod pixel_art;
2pub mod renderer;
3
4pub use renderer::{SampleStrat, Transform};
5
6use crate::{
7    render::{Renderer, Texture, UiTextureBindGroup, UiUploadBatchId},
8    ui::KeyedJobs,
9};
10use common::{figure::Segment, slowjob::SlowJobPool};
11use common_base::prof_span;
12use guillotiere::{SimpleAtlasAllocator, size2};
13use hashbrown::{HashMap, hash_map::Entry};
14use image::{DynamicImage, RgbaImage};
15use slab::Slab;
16use std::{borrow::Cow, hash::Hash, sync::Arc};
17use tracing::{error, warn};
18use vek::*;
19
20#[derive(Clone)]
21pub enum Graphic {
22    /// NOTE: The second argument is an optional border color.  If this is set,
23    /// we force the image into its own texture and use the border color
24    /// whenever we sample beyond the image extent. This can be useful, for
25    /// example, for the map and minimap, which both rotate and may be
26    /// non-square (meaning if we want to display the whole map and render to a
27    /// square, we may render out of bounds unless we perform proper
28    /// clipping).
29    // TODO: probably convert this type to `RgbaImage`.
30    Image(Arc<DynamicImage>, Option<Rgba<f32>>),
31    // Note: none of the users keep this Arc currently
32    Voxel(Arc<Segment>, Transform, SampleStrat),
33    // TODO: Re-evaluate whether we need this (especially outside conrod context)
34    Blank,
35}
36
37#[derive(Clone, Copy, Debug)]
38pub enum Rotation {
39    None,
40    Cw90,
41    Cw180,
42    Cw270,
43    /// Orientation of source rectangle that always faces true north.
44    /// Simple hack to get around Conrod not having space for proper
45    /// rotation data (though it should be possible to add in other ways).
46    SourceNorth,
47    /// Orientation of target rectangle that always faces true north.
48    /// Simple hack to get around Conrod not having space for proper
49    /// rotation data (though it should be possible to add in other ways).
50    TargetNorth,
51    /// Orientation of object that always faces the characters orientation
52    /// adjusted by the minimaps rotation (only if the minimap is being
53    /// rotated based on camera position). Simple hack to get around Conrod
54    /// not having space for proper rotation data (though it should be
55    /// possible to add in other ways).
56    TargetPlayer,
57}
58
59/// Images larger than this are stored in individual textures
60/// Fraction of the total graphic cache size
61const ATLAS_CUTOFF_FRAC: f32 = 0.2;
62/// Multiplied by current window size
63const GRAPHIC_CACHE_RELATIVE_SIZE: u32 = 1;
64
65#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
66pub struct Id(u32);
67
68// TODO these can become invalid when clearing the cache
69#[derive(PartialEq, Eq, Hash, Copy, Clone)]
70pub struct TexId(usize);
71
72enum CachedDetails {
73    Atlas {
74        // Index of the atlas this is cached in.
75        atlas_idx: usize,
76        // Whether this texture is valid.
77        valid: bool,
78        // Where in the cache texture this is.
79        aabr: Aabr<u16>,
80    },
81    Texture {
82        // Index of the (unique, non-atlas) texture this is cached in.
83        index: usize,
84        // Whether this texture is valid.
85        valid: bool,
86    },
87}
88
89impl CachedDetails {
90    /// Get information about this cache entry: texture index,
91    /// whether the entry is valid, and its bounding box in the referenced
92    /// texture.
93    fn info(
94        &self,
95        atlases: &[(SimpleAtlasAllocator, usize)],
96        textures: &Slab<(Arc<Texture>, UiTextureBindGroup, UiUploadBatchId)>,
97    ) -> (usize, bool, Aabr<u16>) {
98        match *self {
99            CachedDetails::Atlas {
100                atlas_idx,
101                valid,
102                aabr,
103            } => (atlases[atlas_idx].1, valid, aabr),
104            CachedDetails::Texture { index, valid } => {
105                (index, valid, Aabr {
106                    min: Vec2::zero(),
107                    // NOTE (as cast): We don't accept images larger than u16::MAX (rejected in
108                    // `cache_res`) (and probably would not be able to create a texture this
109                    // large).
110                    //
111                    // Note texture should always match the cached dimensions.
112                    max: textures[index].0.get_dimensions().xy().map(|e| e as u16),
113                })
114            },
115        }
116    }
117
118    /// Invalidate this cache entry.
119    fn invalidate(&mut self) {
120        match self {
121            Self::Atlas { valid, .. } => {
122                *valid = false;
123            },
124            Self::Texture { valid, .. } => {
125                *valid = false;
126            },
127        }
128    }
129
130    fn set_valid(&mut self) {
131        match self {
132            Self::Atlas { valid, .. } => {
133                *valid = true;
134            },
135            Self::Texture { valid, .. } => {
136                *valid = true;
137            },
138        }
139    }
140}
141
142/// Requirements that a particular graphic has with respect to the atlas
143/// allocation or independent texture it will be stored in.
144///
145/// If this matches between an old graphic and a new one which is replacing it,
146/// we can reuse any of the corresponding locations where it is cached in
147/// textures on the GPU. That is we can invalidate such textures and upload the
148/// new graphic there, rather than needing to allocate a new texture (or new
149/// location in an atlas).
150#[derive(PartialEq)]
151enum TextureRequirements {
152    /// These are uploaded to the GPU in the original resolution of the image
153    /// supplied by the `Graphic` and any scaling is done during sampling in
154    /// the UI fragment shader.
155    Fixed {
156        size: Vec2<u16>,
157        /// Graphics with a border color specified are placed into their own
158        /// individual textures so that the border color can be set
159        /// there. (Note: this is partially a theoretical description as
160        /// border color options are limited in the current graphics API).
161        border_color: Option<Rgba<f32>>,
162    },
163    /// These are rasterized to the exact resolution that they will be displayed
164    /// at and then uploaded to the GPU. This corresponds to
165    /// `Graphic::Voxel`. There may be multiple copies on the GPU if
166    /// different resolutions are requested.
167    ///
168    /// It is expected that the requested sizes will generally not differ when
169    /// switching out a graphic. Thus, dependent cached depdendent should
170    /// always be invalidated since those cached locations will be reusable
171    /// if the requested size is the same.
172    Dependent,
173}
174
175/// These solely determine how a place in an atlas will be found or how a
176/// texture will be created to place the image for a graphic.
177struct TextureParameters {
178    size: Vec2<u16>,
179    border_color: Option<Rgba<f32>>,
180}
181
182/// Key used to refer to an instance of a graphic that has been uploaded to the
183/// GPU.
184#[derive(Clone, Copy, PartialEq, Eq, Hash)]
185struct CacheKey {
186    graphic_id: Id,
187    /// This is `Some` for `TextureRequirements::Dependent`.
188    size: Option<Vec2<u16>>,
189}
190
191impl TextureRequirements {
192    fn from_graphic(graphic: &Graphic) -> Option<Self> {
193        match graphic {
194            Graphic::Image(image, border_color) => {
195                // Image sizes over u16::MAX are not supported (and we would probably not be
196                // able to create a texture large enough to hold them on the GPU anyway)!
197                let image_dims = match (u16::try_from(image.width()), u16::try_from(image.height()))
198                {
199                    (Ok(x), Ok(y)) if x != 0 && y != 0 => Vec2::new(x, y),
200                    _ => {
201                        error!(
202                            "Image dimensions greater than u16::MAX are not supported! Supplied \
203                             image size: ({}, {}).",
204                            image.width(),
205                            image.height(),
206                        );
207                        // TODO: reasonable to return None on this error case? We could potentially
208                        // validate images sizes on add_graphic/replace_graphic?
209                        return None;
210                    },
211                };
212
213                Some(Self::Fixed {
214                    size: image_dims,
215                    border_color: *border_color,
216                })
217            },
218            Graphic::Voxel(_, _, _) => Some(Self::Dependent),
219            Graphic::Blank => None,
220        }
221    }
222
223    #[expect(clippy::wrong_self_convention)] // type is spiritually Copy
224    fn to_key_and_tex_parameters(
225        self,
226        graphic_id: Id,
227        requested_size: Vec2<u16>,
228    ) -> (CacheKey, TextureParameters) {
229        // NOTE: Any external parameters which influence the value of the returned
230        // `TextureParameters` must be included in the `CacheKey`. Otherwise,
231        // invalidation and subsequent re-use of cache locations based on the
232        // value of `self` would be wrong.
233        let (size, border_color, key_size) = match self {
234            Self::Fixed { size, border_color } => (size, border_color, None),
235            Self::Dependent => (requested_size, None, Some(requested_size)),
236        };
237        (
238            CacheKey {
239                graphic_id,
240                size: key_size,
241            },
242            TextureParameters { size, border_color },
243        )
244    }
245}
246
247// Caches graphics, only deallocates when changing screen resolution (completely
248// cleared)
249pub struct GraphicCache {
250    // TODO replace with slotmap
251    graphic_map: HashMap<Id, Graphic>,
252    /// Next id to use when a new graphic is added
253    next_id: u32,
254
255    /// Atlases with the index of their texture in the textures slab.
256    atlases: Vec<(SimpleAtlasAllocator, usize)>,
257    /// Third tuple element is a list of pending premultiply + upload operations
258    /// for this frame. The purpose of this is to collect all the operations
259    /// together so that a single renderpass is performed for each target
260    /// texture.
261    textures: Slab<(Arc<Texture>, UiTextureBindGroup, UiUploadBatchId)>,
262    /// The location and details of graphics cached on the GPU.
263    ///
264    /// Graphic::Voxel images include the dimensions they were rasterized at in
265    /// the key. Other images are scaled as part of sampling them on the
266    /// GPU.
267    cache_map: HashMap<CacheKey, CachedDetails>,
268
269    keyed_jobs: KeyedJobs<CacheKey, RgbaImage>,
270}
271
272impl GraphicCache {
273    pub fn new(renderer: &mut Renderer) -> Self {
274        let (atlas, (tex, bind)) = create_atlas_texture(renderer);
275
276        let mut textures = Slab::new();
277        let tex_id = textures.insert((tex, bind, UiUploadBatchId::default()));
278
279        Self {
280            graphic_map: HashMap::default(),
281            next_id: 0,
282            atlases: vec![(atlas, tex_id)],
283            textures,
284            cache_map: HashMap::default(),
285            keyed_jobs: KeyedJobs::new("IMAGE_PROCESSING"),
286        }
287    }
288
289    pub fn add_graphic(&mut self, graphic: Graphic) -> Id {
290        let id = self.next_id;
291        self.next_id = id.wrapping_add(1);
292
293        let id = Id(id);
294        self.graphic_map.insert(id, graphic);
295
296        id
297    }
298
299    pub fn replace_graphic(&mut self, id: Id, graphic: Graphic) {
300        let (old, new) = match self.graphic_map.entry(id) {
301            Entry::Occupied(o) => {
302                let slot_mut = o.into_mut();
303                let old = core::mem::replace(slot_mut, graphic);
304                (old, slot_mut)
305            },
306            Entry::Vacant(v) => {
307                // This was not an update, so no need to cleanup caches.
308                v.insert(graphic);
309                return;
310            },
311        };
312
313        let old_requirements = TextureRequirements::from_graphic(&old);
314        let new_requirements = TextureRequirements::from_graphic(new);
315        let should_invalidate = old_requirements == new_requirements && old_requirements.is_some();
316
317        // Invalidate if possible or remove from caches.
318        // Maybe make this more efficient if replace graphic is used more often
319        // (especially since we should know the exact key for non-voxel
320        // graphics).
321        //
322        // NOTE: at the time of writing, replace_graphic is only used for voxel minimap
323        // updates and item image reloading.
324        if should_invalidate {
325            self.cache_map.iter_mut().for_each(|(key, details)| {
326                if key.graphic_id == id {
327                    details.invalidate();
328                }
329            });
330        } else {
331            let _ = self.cache_map.extract_if(|key, details| {
332                if key.graphic_id == id {
333                    match details {
334                        // NOTE: if replace_graphic is used continously for small images (i.e.
335                        // images placed into an atlas) of different sizes, that can use up our
336                        // atlas space since spots in the atlas can't be reused. (this scenario is
337                        // now possible with scaling being done during sampling rather than placing
338                        // resized version into the atlas). This is expected to not occur in all
339                        // pratical cases we plan to support here (i.e. the size of the replacement
340                        // image will always be the same).
341                        CachedDetails::Atlas { .. } => {},
342                        CachedDetails::Texture { index, .. } => {
343                            self.textures.remove(*index);
344                        },
345                    };
346                    true
347                } else {
348                    false
349                }
350            });
351        }
352    }
353
354    pub fn get_graphic(&self, id: Id) -> Option<&Graphic> { self.graphic_map.get(&id) }
355
356    /// Used to acquire textures for rendering
357    pub fn get_tex(&self, id: TexId) -> (&Texture, &UiTextureBindGroup) {
358        let (tex, bind, _upload_batch) = self.textures.get(id.0).expect("Invalid TexId used");
359        (tex, bind)
360    }
361
362    pub fn get_graphic_dims(&self, (id, rot): (Id, Rotation)) -> Option<(u32, u32)> {
363        use image::GenericImageView;
364        self.get_graphic(id)
365            .and_then(|graphic| match graphic {
366                Graphic::Image(image, _) => Some(image.dimensions()),
367                Graphic::Voxel(segment, _, _) => {
368                    use common::vol::SizedVol;
369                    let size = segment.size();
370                    // TODO: HACK because they can be rotated arbitrarily, remove
371                    // (and they can be rasterized at arbitrary resolution)
372                    // (might need to return None here?)
373                    Some((size.x, size.z))
374                },
375                Graphic::Blank => None,
376            })
377            .and_then(|(w, h)| match rot {
378                Rotation::None | Rotation::Cw180 => Some((w, h)),
379                Rotation::Cw90 | Rotation::Cw270 => Some((h, w)),
380                // TODO: need dims for these?
381                Rotation::SourceNorth | Rotation::TargetNorth | Rotation::TargetPlayer => None,
382            })
383    }
384
385    pub fn clear_cache(&mut self, renderer: &mut Renderer) {
386        self.cache_map.clear();
387
388        let (atlas, (tex, bind)) = create_atlas_texture(renderer);
389        let mut textures = Slab::new();
390        let tex_id = textures.insert((tex, bind, UiUploadBatchId::default()));
391        self.atlases = vec![(atlas, tex_id)];
392        self.textures = textures;
393    }
394
395    /// Source rectangle should be from 0 to 1, and represents a bounding box
396    /// for the source image of the graphic.
397    ///
398    /// # Panics
399    ///
400    /// Panics if one of the lengths in requested_dims is zero.
401    pub fn cache_res(
402        &mut self,
403        renderer: &mut Renderer,
404        pool: Option<&SlowJobPool>,
405        graphic_id: Id,
406        // TODO: if we aren't resizing here we can potentially upload the image earlier... (as long
407        // as this doesn't lead to uploading too much unused stuff). (currently not sure whether it
408        // would be an overall gain to pursue this.)
409        requested_dims: Vec2<u16>,
410        source: Aabr<f64>,
411        rotation: Rotation,
412    ) -> Option<((Aabr<f64>, Vec2<f32>), TexId)> {
413        assert!(requested_dims.map(|e| e != 0).reduce_and());
414        let requested_dims_upright = match rotation {
415            // The image is stored on the GPU with no rotation, so we need to swap the dimensions
416            // here to get the resolution that the image will be displayed at but re-oriented into
417            // the "upright" space that the image is stored in and sampled from (this can be bit
418            // confusing initially / hard to explain).
419            Rotation::Cw90 | Rotation::Cw270 => requested_dims.yx(),
420            Rotation::None | Rotation::Cw180 => requested_dims,
421            Rotation::SourceNorth => requested_dims,
422            Rotation::TargetNorth => requested_dims,
423            Rotation::TargetPlayer => requested_dims,
424        };
425
426        // Rotate aabr according to requested rotation.
427        let rotated_aabr = |Aabr { min, max }| match rotation {
428            Rotation::None
429            | Rotation::SourceNorth
430            | Rotation::TargetNorth
431            | Rotation::TargetPlayer => Aabr { min, max },
432            Rotation::Cw90 => Aabr {
433                min: Vec2::new(min.x, max.y),
434                max: Vec2::new(max.x, min.y),
435            },
436            Rotation::Cw180 => Aabr { min: max, max: min },
437            Rotation::Cw270 => Aabr {
438                min: Vec2::new(max.x, min.y),
439                max: Vec2::new(min.x, max.y),
440            },
441        };
442        // Scale aabr according to provided source rectangle.
443        let scaled_aabr = |aabr: Aabr<_>| {
444            let size: Vec2<f64> = aabr.size().into();
445            Aabr {
446                min: size.mul_add(source.min, aabr.min),
447                max: size.mul_add(source.max, aabr.min),
448            }
449        };
450        // Apply all transformations.
451        // TODO: Verify rotation is being applied correctly.
452        let transformed_aabr_and_scale = |aabr| {
453            let scaled = scaled_aabr(aabr);
454            // Calculate how many displayed pixels there are for each pixel in the source
455            // image. We need this to calculate where to sample in the shader to
456            // retain crisp pixel borders when scaling the image.
457            let scale = requested_dims_upright.map2(
458                Vec2::from(scaled.size()),
459                |screen_pixels, sample_pixels: f64| screen_pixels as f32 / sample_pixels as f32,
460            );
461            let transformed = rotated_aabr(scaled);
462            (transformed, scale)
463        };
464
465        let Self {
466            textures,
467            atlases,
468            cache_map,
469            graphic_map,
470            ..
471        } = self;
472
473        let graphic = match graphic_map.get(&graphic_id) {
474            Some(g) => g,
475            None => {
476                warn!(
477                    ?graphic_id,
478                    "A graphic was requested via an id which is not in use"
479                );
480                return None;
481            },
482        };
483
484        let requirements = TextureRequirements::from_graphic(graphic)?;
485        let (key, texture_parameters) =
486            requirements.to_key_and_tex_parameters(graphic_id, requested_dims_upright);
487
488        let details = match cache_map.entry(key) {
489            Entry::Occupied(mut details) => {
490                let details = details.get_mut();
491                let (idx, valid, aabr) = details.info(atlases, textures);
492
493                // Check if the cached version has been invalidated by replacing the underlying
494                // graphic
495                if !valid {
496                    // Create image
497                    let (image, gpu_premul) = prepare_graphic(
498                        graphic,
499                        key,
500                        requested_dims_upright,
501                        &mut self.keyed_jobs,
502                        pool,
503                    )?;
504                    // Ensure we don't have any bugs causing the size used to determine if the
505                    // cached version is reusable to not match the size of the image produced by
506                    // prepare_graphic.
507                    assert_eq!(
508                        image.dimensions(),
509                        texture_parameters.size.map(u32::from).into_tuple()
510                    );
511                    // Transfer to the gpu
512                    let &mut (ref texture, _, ref mut upload_batch) = &mut textures[idx];
513                    upload_image(renderer, texture, upload_batch, &image, aabr, gpu_premul);
514                    details.set_valid();
515                }
516
517                return Some((
518                    transformed_aabr_and_scale(aabr.map(|e| e as f64)),
519                    TexId(idx),
520                ));
521            },
522            Entry::Vacant(details) => details,
523        };
524
525        // Construct image in an optional threadpool.
526        let (image, gpu_premul) = prepare_graphic(
527            graphic,
528            key,
529            requested_dims_upright,
530            &mut self.keyed_jobs,
531            pool,
532        )?;
533        // Assert dimensions of image from `prepare_graphic` are as expected!
534        assert_eq!(
535            image.dimensions(),
536            texture_parameters.size.map(u32::from).into_tuple()
537        );
538        // Image dimensions in the format used by the allocator crate.
539        let image_dims_size2d = size2(
540            i32::from(texture_parameters.size.x),
541            i32::from(texture_parameters.size.y),
542        );
543
544        // Now we allocate space on the gpu (either in an atlas or an independent
545        // texture) and upload the image to that location.
546
547        let atlas_size = atlas_size(renderer);
548        // Graphics that request a border color or which are over a particular size
549        // compared to the atlas size are sent to their own textures.
550        let can_place_in_atlas = texture_parameters.border_color.is_none()
551            && atlas_size
552                .map2(texture_parameters.size, |a, d| {
553                    a as f32 * ATLAS_CUTOFF_FRAC >= d as f32
554                })
555                .reduce_and();
556        let location = if can_place_in_atlas {
557            // Fit into an atlas
558            let mut loc = None;
559            for (atlas_idx, &mut (ref mut atlas, texture_idx)) in atlases.iter_mut().enumerate() {
560                if let Some(rectangle) = atlas.allocate(image_dims_size2d) {
561                    let aabr = aabr_from_alloc_rect(rectangle);
562                    loc = Some(CachedDetails::Atlas {
563                        atlas_idx,
564                        valid: true,
565                        aabr,
566                    });
567                    let &mut (ref texture, _, ref mut upload_batch) = &mut textures[texture_idx];
568                    upload_image(renderer, texture, upload_batch, &image, aabr, gpu_premul);
569                    break;
570                }
571            }
572
573            match loc {
574                Some(loc) => loc,
575                // Create a new atlas
576                None => {
577                    let (mut atlas, (tex, bind)) = create_atlas_texture(renderer);
578                    let aabr = atlas
579                        .allocate(image_dims_size2d)
580                        .map(aabr_from_alloc_rect)
581                        .unwrap();
582                    // NOTE: All mutations happen only after the texture creation succeeds!
583                    let tex_idx = textures.insert((tex, bind, UiUploadBatchId::default()));
584                    let atlas_idx = atlases.len();
585                    atlases.push((atlas, tex_idx));
586                    let &mut (ref texture, _, ref mut upload_batch) = &mut textures[tex_idx];
587                    upload_image(renderer, texture, upload_batch, &image, aabr, gpu_premul);
588                    CachedDetails::Atlas {
589                        atlas_idx,
590                        valid: true,
591                        aabr,
592                    }
593                },
594            }
595        } else {
596            // Create a texture just for this
597            let (tex, bind, upload_batch) =
598                create_image(renderer, &image, texture_parameters, gpu_premul);
599            // NOTE: All mutations happen only after the texture creation and upload
600            // initiation succeeds! (completing the upload does not have any
601            // failure cases afaik)
602            let index = textures.insert((tex, bind, upload_batch));
603            CachedDetails::Texture { index, valid: true }
604        };
605
606        // Extract information from cache entry.
607        let (idx, _, aabr) = location.info(atlases, textures);
608
609        // Insert into cached map
610        details.insert(location);
611
612        Some((
613            transformed_aabr_and_scale(aabr.map(|e| e as f64)),
614            TexId(idx),
615        ))
616    }
617}
618
619/// Prepare the graphic into the form that will be uploaded to the GPU.
620///
621/// For voxel graphics, draws the graphic at the specified dimensions.
622///
623/// Alpha premultiplication is necessary so that  images so they can be linearly
624/// filtered on the GPU. Premultiplication can either occur here or on the GPU
625/// depending on the size of the image and other factors. If premultiplication
626/// on the GPU is needed the returned bool will be `true`.
627fn prepare_graphic<'graphic>(
628    graphic: &'graphic Graphic,
629    cache_key: CacheKey,
630    dims: Vec2<u16>,
631    keyed_jobs: &mut KeyedJobs<CacheKey, RgbaImage>,
632    pool: Option<&SlowJobPool>,
633) -> Option<(Cow<'graphic, RgbaImage>, bool)> {
634    prof_span!("prepare_graphic");
635    match graphic {
636        Graphic::Blank => None,
637        Graphic::Image(image, _border_color) => {
638            // Image will be rescaled when sampling from it on the GPU so we don't
639            // need to resize it here.
640            //
641            // TODO: We could potentially push premultiplication even earlier (e.g. to the
642            // time of loading images or packaging veloren for distribution).
643            let mut rgba_cow = image.as_rgba8().map_or_else(
644                || {
645                    // TODO: we may want to require loading in as the rgba8 format so we don't have
646                    // to perform conversion here. On the other hand, we can take advantage of
647                    // certain formats to know that alpha premultiplication doesn't need to be
648                    // performed (but we would probably just want to store that with the loaded
649                    // rgba8 format).
650                    Cow::Owned(image.to_rgba8())
651                },
652                Cow::Borrowed,
653            );
654            // NOTE: We do premultiplication on the main thread since if it would be
655            // expensive enough to do in the background we would just do it on
656            // the GPU. Could still use `rayon` to parallelize this work, if
657            // needed.
658            let premultiply_strategy = PremultiplyStrategy::determine(&rgba_cow);
659            let needs_gpu_premultiply = match premultiply_strategy {
660                PremultiplyStrategy::UseGpu => true,
661                PremultiplyStrategy::NotNeeded => false,
662                PremultiplyStrategy::UseCpu => {
663                    // NOTE: to_mut will clone the image if it was Cow::Borrowed
664                    premultiply_alpha(rgba_cow.to_mut());
665                    false
666                },
667            };
668
669            Some((rgba_cow, needs_gpu_premultiply))
670        },
671        Graphic::Voxel(segment, trans, sample_strat) => keyed_jobs
672            .spawn(pool, cache_key, || {
673                let segment = Arc::clone(segment);
674                let (trans, sample_strat) = (*trans, *sample_strat);
675                move |_| {
676                    // TODO: for now we always use CPU premultiplication for these, may want to
677                    // re-evaluate this after zoomy worldgen branch is merged (and it is more clear
678                    // when these jobs go to the background thread pool or not).
679
680                    // Render voxel model at requested resolution
681                    let mut image = renderer::draw_vox(&segment, dims, trans, sample_strat);
682                    premultiply_alpha(&mut image);
683                    image
684                }
685            })
686            .map(|(_, v)| (Cow::Owned(v), false)),
687    }
688}
689
690fn atlas_size(renderer: &Renderer) -> Vec2<u32> {
691    let max_texture_size = renderer.max_texture_size();
692
693    renderer
694        .resolution()
695        .map(|e| (e * GRAPHIC_CACHE_RELATIVE_SIZE).clamp(512, max_texture_size))
696}
697
698/// This creates a texture suitable for sampling from during the UI pass and
699/// rendering too during alpha premultiplication upload passes.
700fn create_image_texture(
701    renderer: &mut Renderer,
702    size: Vec2<u32>,
703    address_mode: Option<wgpu::AddressMode>,
704) -> (Arc<Texture>, UiTextureBindGroup) {
705    // TODO: Right now we have to manually clear images to workaround AMD DX bug,
706    // for this we use Queue::write_texture which needs this usage. I think this
707    // may be fixed in newer wgpu versions that auto-clear the texture.
708    let workaround_usage = wgpu::TextureUsages::COPY_DST;
709    let tex_info = wgpu::TextureDescriptor {
710        label: None,
711        size: wgpu::Extent3d {
712            width: size.x,
713            height: size.y,
714            depth_or_array_layers: 1,
715        },
716        mip_level_count: 1,
717        sample_count: 1,
718        dimension: wgpu::TextureDimension::D2,
719        format: wgpu::TextureFormat::Rgba8UnormSrgb,
720        usage: wgpu::TextureUsages::RENDER_ATTACHMENT // GPU premultiply
721            | wgpu::TextureUsages::COPY_DST // CPU premultiply
722            | wgpu::TextureUsages::TEXTURE_BINDING // using image in ui rendering
723            | workaround_usage,
724        view_formats: &[],
725    };
726    let view_info = wgpu::TextureViewDescriptor {
727        format: Some(tex_info.format),
728        dimension: Some(wgpu::TextureViewDimension::D2),
729        ..Default::default()
730    };
731    let address_mode = address_mode.unwrap_or(wgpu::AddressMode::ClampToEdge);
732    let sampler_info = wgpu::SamplerDescriptor {
733        address_mode_u: address_mode,
734        address_mode_v: address_mode,
735        mag_filter: wgpu::FilterMode::Linear,
736        min_filter: wgpu::FilterMode::Linear,
737        ..Default::default()
738    };
739    let tex = renderer.create_texture_raw(&tex_info, &view_info, &sampler_info);
740    let bind = renderer.ui_bind_texture(&tex);
741    (Arc::new(tex), bind)
742}
743
744fn create_atlas_texture(
745    renderer: &mut Renderer,
746) -> (SimpleAtlasAllocator, (Arc<Texture>, UiTextureBindGroup)) {
747    let size = atlas_size(renderer);
748    // Note: here we assume the max texture size is under i32::MAX.
749    let atlas = SimpleAtlasAllocator::new(size2(size.x as i32, size.y as i32));
750    let (tex, bind) = create_image_texture(renderer, size, None);
751    (atlas, (tex, bind))
752}
753
754fn aabr_from_alloc_rect(rect: guillotiere::Rectangle) -> Aabr<u16> {
755    let (min, max) = (rect.min, rect.max);
756    // Note: here we assume the max texture size (and thus the maximum size of the
757    // atlas) is under `u16::MAX`.
758    Aabr {
759        min: Vec2::new(min.x as u16, min.y as u16),
760        max: Vec2::new(max.x as u16, max.y as u16),
761    }
762}
763
764fn upload_image(
765    renderer: &mut Renderer,
766    target_texture: &Arc<Texture>,
767    upload_batch: &mut UiUploadBatchId,
768    image: &RgbaImage,
769    aabr: Aabr<u16>,
770    premultiply_on_gpu: bool,
771) {
772    // Check that this image and the target aabr are the same size (otherwise there
773    // is a bug in this module).
774    debug_assert_eq!(aabr.map(u32::from).size().into_tuple(), image.dimensions());
775    if premultiply_on_gpu {
776        *upload_batch =
777            renderer.ui_premultiply_upload(target_texture, *upload_batch, image, aabr.min);
778    } else {
779        let aabr = aabr.map(u32::from);
780        let offset = aabr.min.into_array();
781        let size = aabr.size().into_array();
782        // upload directly
783        renderer.update_texture(
784            target_texture,
785            offset,
786            size,
787            // NOTE: Rgba texture, so each pixel is 4 bytes, ergo this cannot fail.
788            // We make the cast parameters explicit for clarity.
789            bytemuck::cast_slice::<u8, [u8; 4]>(
790                &(&**image)[..size[0] as usize * size[1] as usize * 4],
791            ),
792        )
793    }
794}
795
796// This is used for border_color.is_some() images (ie the map image).
797fn create_image(
798    renderer: &mut Renderer,
799    image: &RgbaImage,
800    texture_parameters: TextureParameters,
801    premultiply_on_gpu: bool,
802) -> (Arc<Texture>, UiTextureBindGroup, UiUploadBatchId) {
803    let (tex, bind) = create_image_texture(
804        renderer,
805        texture_parameters.size.map(u32::from),
806        texture_parameters
807            .border_color
808            // TODO: either use the desktop only border color or just emulate this
809            //.map(|c| c.into_array().into()),
810            .map(|_| wgpu::AddressMode::ClampToBorder),
811    );
812    let mut upload_batch = UiUploadBatchId::default();
813    let aabr = Aabr {
814        min: Vec2::zero(),
815        max: texture_parameters.size,
816    };
817    upload_image(
818        renderer,
819        &tex,
820        &mut upload_batch,
821        image,
822        aabr,
823        premultiply_on_gpu,
824    );
825    (tex, bind, upload_batch)
826}
827
828// CPU-side alpha premultiplication implementation.
829
830pub struct PremultiplyLookupTable {
831    alpha: [u16; 256],
832    // This is for both colors that are always below the linear transform threshold (of the
833    // transform between linear/non-linear srgb) and colors that start above the threshold when
834    // transforming into linear srgb and then fall below it after being multiplied by alpha (before
835    // being transformed out of linear srgb).
836    color: [u16; 256],
837}
838
839impl Default for PremultiplyLookupTable {
840    fn default() -> Self {
841        #[rustfmt::skip]
842        fn accurate_to_linear(c: u8) -> f32 {
843            let c = c as f32 / 255.0;
844            // https://en.wikipedia.org/wiki/SRGB#Transformation
845            if c <= 0.04045 {
846                c / 12.92
847            } else {
848                // 0.055 ~= 14
849                ((c + 0.055) / 1.055).powf(2.4)
850            }
851        }
852
853        use core::array;
854        let alpha = array::from_fn(|alpha| {
855            // NOTE: u16::MAX + 1 here relies on the max alpha being short-circuited (and
856            // not using this table). We multiply by this factor since it is a
857            // power of 2, which means later demultiplying it will optimize to a
858            // bitshift.
859            (((alpha as f32 / 255.0).powf(1.0 / 2.4) * (u16::MAX as f32 + 1.0)) + 0.5) as u16
860        });
861        let color = array::from_fn(|color| {
862            (if color <= 10 {
863                //  <= 10 means the transform is linear!
864                color as f32 / 255.0
865            } else {
866                // Here the transform into linear srgb isn't linear but the transform out of it is.
867                //
868                // This is transform into and out of linear srgb with the theoretical alpha
869                // multiplication factored out.
870                accurate_to_linear(color as u8) * 12.92
871            }
872            // take advantage of the precision offered by u16
873            * (1 << 13) as f32
874            // round to the nearest integer when the cast truncates
875            + 0.5) as u16
876        });
877        Self { alpha, color }
878    }
879}
880
881fn premultiply_alpha(image: &mut RgbaImage) {
882    lazy_static::lazy_static! {
883        static ref LOOKUP: PremultiplyLookupTable = Default::default();
884    }
885    let lookup = &*LOOKUP;
886    // TODO: Apparently it is possible for ImageBuffer raw vec to have more pixels
887    // than the dimensions of the actual image (I don't think we actually have
888    // this occuring but we should probably fix other spots that use the raw
889    // buffer). See:
890    // https://github.com/image-rs/image/blob/a1ce569afd476e881acafdf9e7a5bce294d0db9a/src/buffer.rs#L664
891    let dims = image.dimensions();
892    let image_buffer_len = dims.0 as usize * dims.1 as usize * 4;
893    let (arrays, end) = (&mut **image)[..image_buffer_len].as_chunks_mut::<{ 4 * 4 }>();
894    // Rgba8 has 4 bytes per pixel there should be no remainder when dividing by 4.
895    let (end, _) = end.as_chunks_mut::<4>();
896    end.iter_mut().for_each(|pixel| {
897        let alpha = pixel[3];
898        if alpha == 0 {
899            *pixel = [0; 4];
900            return;
901        } else if alpha == 255 {
902            return;
903        };
904
905        for color in &mut pixel[..3] {
906            let predicted = ((lookup.alpha[alpha as usize] as u32) * (*color as u32 + 14) + 32433)
907                / (u16::MAX as u32 + 1);
908            let multiplied_color = (if predicted < 9 + 14 {
909                (lookup.color[*color as usize] as u32 * alpha as u32 + 4096) >> 13
910            } else {
911                predicted - 14
912            }) as u8;
913            *color = multiplied_color;
914        }
915    });
916    arrays.iter_mut().for_each(|pixelx4| {
917        // Short-circuit for alpha == 0 or 255
918        // This adds ~7 us (worst case) for a 256x256 image.
919        // Best case is decreased to 20 us total time.
920        if pixelx4[3] == pixelx4[7] && pixelx4[3] == pixelx4[11] && pixelx4[3] == pixelx4[15] {
921            if pixelx4[3] == 0 {
922                *pixelx4 = [0; 16];
923                return;
924            } else if pixelx4[3] == u8::MAX {
925                return;
926            }
927        }
928
929        // Lookup transformed alpha values for each pixel first.
930        // Putting this here seems to make things slightly faster.
931        let factors = [
932            lookup.alpha[pixelx4[3] as usize],
933            lookup.alpha[pixelx4[7] as usize],
934            lookup.alpha[pixelx4[11] as usize],
935            lookup.alpha[pixelx4[15] as usize],
936        ];
937        for pixel_index in 0..4 {
938            let alpha_factor = factors[pixel_index];
939            let alpha = pixelx4[pixel_index * 4 + 3];
940            // Putting this code outside the loop makes things take ~25% less time.
941            let color_factors = [
942                lookup.color[pixelx4[pixel_index * 4 + 0] as usize] as u32 * alpha as u32 + 4096,
943                lookup.color[pixelx4[pixel_index * 4 + 1] as usize] as u32 * alpha as u32 + 4096,
944                lookup.color[pixelx4[pixel_index * 4 + 2] as usize] as u32 * alpha as u32 + 4096,
945            ];
946            for i in 0..3 {
947                let color = &mut pixelx4[pixel_index * 4 + i];
948                // Loosely based on transform to linear and back (above threshold) (this is
949                // where use of 14 comes from).
950                // `32433` selected via trial and error to reduce the number of mismatches.
951                // `/ (u16::MAX as u32 + 1)` transforms back to `u8` precision (we add 1 so it
952                // will be a division by a power of 2 which optimizes well).
953                let predicted =
954                    ((alpha_factor as u32) * (*color as u32 + 14) + 32328) / (u16::MAX as u32 + 1);
955                let multiplied_color = (if predicted < 9 + 14 {
956                    // Here we handle two cases:
957                    // 1. When the transform starts and ends as linear.
958                    // 2. When the color is over the linear threshold for the transform into linear
959                    //    space but below this threshold when transforming back out (due to being
960                    //    multiplied with a small alpha).
961                    // (in both cases the result is linearly related to alpha and we can encode how
962                    // it is related to the color in a lookup table)
963                    // NOTE: 212 is the largest color value used here (when alpha isn't 0)
964                    color_factors[i] >> 13
965                } else {
966                    predicted - 14
967                }) as u8;
968                *color = multiplied_color;
969            }
970        }
971    });
972}
973
974/// Strategy for how alpha premultiplication will be applied to an image.
975enum PremultiplyStrategy {
976    UseCpu,
977    UseGpu,
978    // Image is fully opaque.
979    NotNeeded,
980}
981
982impl PremultiplyStrategy {
983    #[rustfmt::skip] // please don't format comment with 'ns/pixel' to a separate line from the value
984    fn determine(image: &RgbaImage) -> Self {
985        // TODO: Would be useful to re-time this after a wgpu update.
986        //
987        // Thresholds below are based on the timing measurements of the CPU based premultiplication
988        // vs ovehead of interacting with the GPU API to perform premultiplication on the GPU.
989        // These timings are quite circumstantial and could vary between machines, wgpu updates,
990        // and changes to the structure of the GPU based path.
991        //
992        // GPU path costs (For calculations I used `57.6 us` as a roughly reasonable estimate of
993        // total time here but that can vary lower and higher. Everything is a bit imprecise here
994        // so I won't list individual timings. The key takeaway is that this can be made more
995        // efficient by avoidiing the create/drop of a texture, texture view, and bind group for
996        // each image. Also, if we didn't need a separate render pass for each target image that
997        // would be helpful as well. Using compute passes and passing data in as a raw buffer may
998        // help with both of these but initial attempts with that ran into issues (e.g. when we get
999        // the ability to have non-srgb views of srgb textures that will be useful)):
1000        // * create/drop texture
1001        // * create/drop texture view
1002        // * create/drop bind group
1003        // * run render pass (NOTE: if many images are processed at once with the same target
1004        //   texture this portion of the cost can be split between them)
1005        //
1006        // CPU path costs:
1007        // * clone image (0.17 ns/pixel (benchmark) - 0.73 ns/pixel (in voxygen))
1008        // * run premultiplication (0.305 ns/pixel (when shortcircuits are always hit) -
1009        //   3.81 ns/pixel (with random alpha))
1010        //
1011        // Shared costs include:
1012        // * write_texture
1013        // * (optional) check for fraction of shortcircuit blocks in image (0.223 ns/pixel)
1014        //
1015        // `ALWAYS_CPU_THRESHOLD` is roughly:
1016        // ("cost of GPU path" + "shortcircuit count cost") / "worst case cost of CPU path per pixel"
1017        //
1018        // `ALWAYS_GPU_THRESHOLD` is NOT: "cost of GPU path" / "best case cost of CPU path per pixel"
1019        // since the cost of checking for whether the CPU path is better at this quantity of pixels
1020        // becomes more than the on the amount of overhead we are willing to add to the worst case
1021        // scenario where we run the short-circuit count check and end up using the GPU path. The
1022        // currently selected value of 200x200 adds at most about ~20% of the cost of the GPU path.
1023        // (TODO: maybe we could have the check bail out early if the results aren't looking
1024        // favorable for the CPU path and/or sample a random subset of the pixels).
1025        //
1026        // `CHECKED_THRESHOLD` is roughly: "cost of GPU path / "best case cost of CPU path per pixel"
1027        const ALWAYS_CPU_THRESHOLD: usize = 120 * 120;
1028        const ALWAYS_GPU_THRESHOLD: usize = 200 * 200;
1029        const CHECKED_THRESHOLD: usize = 240 * 240;
1030
1031        let dims = image.dimensions();
1032        let pixel_count = dims.0 as usize * dims.1 as usize;
1033        if pixel_count <= ALWAYS_CPU_THRESHOLD {
1034            Self::UseCpu
1035        } else if pixel_count > ALWAYS_GPU_THRESHOLD {
1036            Self::UseGpu
1037        } else if let Some(fraction) = fraction_shortcircuit_blocks(image) {
1038            // This seems correct...?
1039            // TODO: I think we technically can exit the fraction checking early if we know the
1040            // total fraction value will be over: (threshold - ALWAYS_CPU_THRESHOLD) /
1041            // (CHECKED_THRESHOLD - ALWAYS_CPU_THRESHOLD).
1042            let threshold = fraction * CHECKED_THRESHOLD as f32
1043                + (1.0 - fraction) * ALWAYS_CPU_THRESHOLD as f32;
1044            if pixel_count as f32 <= threshold {
1045                Self::UseCpu
1046            } else {
1047                Self::UseGpu
1048            }
1049        } else {
1050            Self::NotNeeded
1051        }
1052    }
1053}
1054
1055/// Useful to estimates cost of premultiplying alpha in the provided image via
1056/// the CPU method.
1057///
1058/// Computes the fraction of 4 pixel chunks that are fully translucent or
1059/// opaque. Returns `None` if no premultiplication is needed (i.e. all alpha
1060/// values are 255).
1061fn fraction_shortcircuit_blocks(image: &RgbaImage) -> Option<f32> {
1062    let dims = image.dimensions();
1063    let pixel_count = dims.0 as usize * dims.1 as usize;
1064    let (arrays, end) = (&**image)[..pixel_count * 4].as_chunks::<{ 4 * 4 }>();
1065
1066    // Rgba8 has 4 bytes per pixel there should be no remainder when dividing by 4.
1067    let (end, _) = end.as_chunks::<4>();
1068    let end_is_opaque = end.iter().all(|pixel| pixel[3] == 255);
1069
1070    // 14.6 us for 256x256 image
1071    let num_chunks = arrays.len();
1072    let mut num_translucent = 0;
1073    let mut num_opaque = 0;
1074    arrays.iter().for_each(|pixelx4| {
1075        let v = u128::from_ne_bytes(*pixelx4);
1076        let alpha_mask = 0x000000FF_000000FF_000000FF_000000FF;
1077        let masked = v & alpha_mask;
1078        if masked == 0 {
1079            num_translucent += 1;
1080        } else if masked == alpha_mask {
1081            num_opaque += 1;
1082        }
1083    });
1084
1085    if num_chunks == num_opaque && num_translucent == 0 && end_is_opaque {
1086        None
1087    } else {
1088        Some((num_translucent as f32 + num_opaque as f32) / num_chunks as f32)
1089    }
1090}