veloren_voxygen/ui/graphic/
mod.rs

1mod pixel_art;
2pub mod renderer;
3
4pub use renderer::{SampleStrat, Transform};
5
6use crate::{
7    render::{Renderer, Texture, UiTextureBindGroup, UiUploadBatchId},
8    ui::KeyedJobs,
9};
10use common::{figure::Segment, slowjob::SlowJobPool};
11use common_base::prof_span;
12use guillotiere::{SimpleAtlasAllocator, size2};
13use hashbrown::{HashMap, hash_map::Entry};
14use image::{DynamicImage, RgbaImage};
15use slab::Slab;
16use std::{borrow::Cow, hash::Hash, sync::Arc};
17use tracing::{error, warn};
18use vek::*;
19
20#[derive(Clone)]
21pub enum Graphic {
22    /// NOTE: The second argument is an optional border color.  If this is set,
23    /// we force the image into its own texture and use the border color
24    /// whenever we sample beyond the image extent. This can be useful, for
25    /// example, for the map and minimap, which both rotate and may be
26    /// non-square (meaning if we want to display the whole map and render to a
27    /// square, we may render out of bounds unless we perform proper
28    /// clipping).
29    // TODO: probably convert this type to `RgbaImage`.
30    Image(Arc<DynamicImage>, Option<Rgba<f32>>),
31    // Note: none of the users keep this Arc currently
32    Voxel(Arc<Segment>, Transform, SampleStrat),
33    // TODO: Re-evaluate whether we need this (especially outside conrod context)
34    Blank,
35}
36
37#[derive(Clone, Copy, Debug)]
38pub enum Rotation {
39    None,
40    Cw90,
41    Cw180,
42    Cw270,
43    /// Orientation of source rectangle that always faces true north.
44    /// Simple hack to get around Conrod not having space for proper
45    /// rotation data (though it should be possible to add in other ways).
46    SourceNorth,
47    /// Orientation of target rectangle that always faces true north.
48    /// Simple hack to get around Conrod not having space for proper
49    /// rotation data (though it should be possible to add in other ways).
50    TargetNorth,
51}
52
53/// Images larger than this are stored in individual textures
54/// Fraction of the total graphic cache size
55const ATLAS_CUTOFF_FRAC: f32 = 0.2;
56/// Multiplied by current window size
57const GRAPHIC_CACHE_RELATIVE_SIZE: u32 = 1;
58
59#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
60pub struct Id(u32);
61
62// TODO these can become invalid when clearing the cache
63#[derive(PartialEq, Eq, Hash, Copy, Clone)]
64pub struct TexId(usize);
65
66enum CachedDetails {
67    Atlas {
68        // Index of the atlas this is cached in.
69        atlas_idx: usize,
70        // Whether this texture is valid.
71        valid: bool,
72        // Where in the cache texture this is.
73        aabr: Aabr<u16>,
74    },
75    Texture {
76        // Index of the (unique, non-atlas) texture this is cached in.
77        index: usize,
78        // Whether this texture is valid.
79        valid: bool,
80    },
81}
82
83impl CachedDetails {
84    /// Get information about this cache entry: texture index,
85    /// whether the entry is valid, and its bounding box in the referenced
86    /// texture.
87    fn info(
88        &self,
89        atlases: &[(SimpleAtlasAllocator, usize)],
90        textures: &Slab<(Arc<Texture>, UiTextureBindGroup, UiUploadBatchId)>,
91    ) -> (usize, bool, Aabr<u16>) {
92        match *self {
93            CachedDetails::Atlas {
94                atlas_idx,
95                valid,
96                aabr,
97            } => (atlases[atlas_idx].1, valid, aabr),
98            CachedDetails::Texture { index, valid } => {
99                (index, valid, Aabr {
100                    min: Vec2::zero(),
101                    // NOTE (as cast): We don't accept images larger than u16::MAX (rejected in
102                    // `cache_res`) (and probably would not be able to create a texture this
103                    // large).
104                    //
105                    // Note texture should always match the cached dimensions.
106                    max: textures[index].0.get_dimensions().xy().map(|e| e as u16),
107                })
108            },
109        }
110    }
111
112    /// Invalidate this cache entry.
113    fn invalidate(&mut self) {
114        match self {
115            Self::Atlas { valid, .. } => {
116                *valid = false;
117            },
118            Self::Texture { valid, .. } => {
119                *valid = false;
120            },
121        }
122    }
123
124    fn set_valid(&mut self) {
125        match self {
126            Self::Atlas { valid, .. } => {
127                *valid = true;
128            },
129            Self::Texture { valid, .. } => {
130                *valid = true;
131            },
132        }
133    }
134}
135
136/// Requirements that a particular graphic has with respect to the atlas
137/// allocation or independent texture it will be stored in.
138///
139/// If this matches between an old graphic and a new one which is replacing it,
140/// we can reuse any of the corresponding locations where it is cached in
141/// textures on the GPU. That is we can invalidate such textures and upload the
142/// new graphic there, rather than needing to allocate a new texture (or new
143/// location in an atlas).
144#[derive(PartialEq)]
145enum TextureRequirements {
146    /// These are uploaded to the GPU in the original resolution of the image
147    /// supplied by the `Graphic` and any scaling is done during sampling in
148    /// the UI fragment shader.
149    Fixed {
150        size: Vec2<u16>,
151        /// Graphics with a border color specified are placed into their own
152        /// individual textures so that the border color can be set
153        /// there. (Note: this is partially a theoretical description as
154        /// border color options are limited in the current graphics API).
155        border_color: Option<Rgba<f32>>,
156    },
157    /// These are rasterized to the exact resolution that they will be displayed
158    /// at and then uploaded to the GPU. This corresponds to
159    /// `Graphic::Voxel`. There may be multiple copies on the GPU if
160    /// different resolutions are requested.
161    ///
162    /// It is expected that the requested sizes will generally not differ when
163    /// switching out a graphic. Thus, dependent cached depdendent should
164    /// always be invalidated since those cached locations will be reusable
165    /// if the requested size is the same.
166    Dependent,
167}
168
169/// These solely determine how a place in an atlas will be found or how a
170/// texture will be created to place the image for a graphic.
171struct TextureParameters {
172    size: Vec2<u16>,
173    border_color: Option<Rgba<f32>>,
174}
175
176/// Key used to refer to an instance of a graphic that has been uploaded to the
177/// GPU.
178#[derive(Clone, Copy, PartialEq, Eq, Hash)]
179struct CacheKey {
180    graphic_id: Id,
181    /// This is `Some` for `TextureRequirements::Dependent`.
182    size: Option<Vec2<u16>>,
183}
184
185impl TextureRequirements {
186    fn from_graphic(graphic: &Graphic) -> Option<Self> {
187        match graphic {
188            Graphic::Image(image, border_color) => {
189                // Image sizes over u16::MAX are not supported (and we would probably not be
190                // able to create a texture large enough to hold them on the GPU anyway)!
191                let image_dims = match (u16::try_from(image.width()), u16::try_from(image.height()))
192                {
193                    (Ok(x), Ok(y)) if x != 0 && y != 0 => Vec2::new(x, y),
194                    _ => {
195                        error!(
196                            "Image dimensions greater than u16::MAX are not supported! Supplied \
197                             image size: ({}, {}).",
198                            image.width(),
199                            image.height(),
200                        );
201                        // TODO: reasonable to return None on this error case? We could potentially
202                        // validate images sizes on add_graphic/replace_graphic?
203                        return None;
204                    },
205                };
206
207                Some(Self::Fixed {
208                    size: image_dims,
209                    border_color: *border_color,
210                })
211            },
212            Graphic::Voxel(_, _, _) => Some(Self::Dependent),
213            Graphic::Blank => None,
214        }
215    }
216
217    #[expect(clippy::wrong_self_convention)] // type is spiritually Copy
218    fn to_key_and_tex_parameters(
219        self,
220        graphic_id: Id,
221        requested_size: Vec2<u16>,
222    ) -> (CacheKey, TextureParameters) {
223        // NOTE: Any external parameters which influence the value of the returned
224        // `TextureParameters` must be included in the `CacheKey`. Otherwise,
225        // invalidation and subsequent re-use of cache locations based on the
226        // value of `self` would be wrong.
227        let (size, border_color, key_size) = match self {
228            Self::Fixed { size, border_color } => (size, border_color, None),
229            Self::Dependent => (requested_size, None, Some(requested_size)),
230        };
231        (
232            CacheKey {
233                graphic_id,
234                size: key_size,
235            },
236            TextureParameters { size, border_color },
237        )
238    }
239}
240
241// Caches graphics, only deallocates when changing screen resolution (completely
242// cleared)
243pub struct GraphicCache {
244    // TODO replace with slotmap
245    graphic_map: HashMap<Id, Graphic>,
246    /// Next id to use when a new graphic is added
247    next_id: u32,
248
249    /// Atlases with the index of their texture in the textures slab.
250    atlases: Vec<(SimpleAtlasAllocator, usize)>,
251    /// Third tuple element is a list of pending premultiply + upload operations
252    /// for this frame. The purpose of this is to collect all the operations
253    /// together so that a single renderpass is performed for each target
254    /// texture.
255    textures: Slab<(Arc<Texture>, UiTextureBindGroup, UiUploadBatchId)>,
256    /// The location and details of graphics cached on the GPU.
257    ///
258    /// Graphic::Voxel images include the dimensions they were rasterized at in
259    /// the key. Other images are scaled as part of sampling them on the
260    /// GPU.
261    cache_map: HashMap<CacheKey, CachedDetails>,
262
263    keyed_jobs: KeyedJobs<CacheKey, RgbaImage>,
264}
265
266impl GraphicCache {
267    pub fn new(renderer: &mut Renderer) -> Self {
268        let (atlas, (tex, bind)) = create_atlas_texture(renderer);
269
270        let mut textures = Slab::new();
271        let tex_id = textures.insert((tex, bind, UiUploadBatchId::default()));
272
273        Self {
274            graphic_map: HashMap::default(),
275            next_id: 0,
276            atlases: vec![(atlas, tex_id)],
277            textures,
278            cache_map: HashMap::default(),
279            keyed_jobs: KeyedJobs::new("IMAGE_PROCESSING"),
280        }
281    }
282
283    pub fn add_graphic(&mut self, graphic: Graphic) -> Id {
284        let id = self.next_id;
285        self.next_id = id.wrapping_add(1);
286
287        let id = Id(id);
288        self.graphic_map.insert(id, graphic);
289
290        id
291    }
292
293    pub fn replace_graphic(&mut self, id: Id, graphic: Graphic) {
294        let (old, new) = match self.graphic_map.entry(id) {
295            Entry::Occupied(o) => {
296                let slot_mut = o.into_mut();
297                let old = core::mem::replace(slot_mut, graphic);
298                (old, slot_mut)
299            },
300            Entry::Vacant(v) => {
301                // This was not an update, so no need to cleanup caches.
302                v.insert(graphic);
303                return;
304            },
305        };
306
307        let old_requirements = TextureRequirements::from_graphic(&old);
308        let new_requirements = TextureRequirements::from_graphic(new);
309        let should_invalidate = old_requirements == new_requirements && old_requirements.is_some();
310
311        // Invalidate if possible or remove from caches.
312        // Maybe make this more efficient if replace graphic is used more often
313        // (especially since we should know the exact key for non-voxel
314        // graphics).
315        //
316        // NOTE: at the time of writing, replace_graphic is only used for voxel minimap
317        // updates and item image reloading.
318        if should_invalidate {
319            self.cache_map.iter_mut().for_each(|(key, details)| {
320                if key.graphic_id == id {
321                    details.invalidate();
322                }
323            });
324        } else {
325            let _ = self.cache_map.extract_if(|key, details| {
326                if key.graphic_id == id {
327                    match details {
328                        // NOTE: if replace_graphic is used continously for small images (i.e.
329                        // images placed into an atlas) of different sizes, that can use up our
330                        // atlas space since spots in the atlas can't be reused. (this scenario is
331                        // now possible with scaling being done during sampling rather than placing
332                        // resized version into the atlas). This is expected to not occur in all
333                        // pratical cases we plan to support here (i.e. the size of the replacement
334                        // image will always be the same).
335                        CachedDetails::Atlas { .. } => {},
336                        CachedDetails::Texture { index, .. } => {
337                            self.textures.remove(*index);
338                        },
339                    };
340                    true
341                } else {
342                    false
343                }
344            });
345        }
346    }
347
348    pub fn get_graphic(&self, id: Id) -> Option<&Graphic> { self.graphic_map.get(&id) }
349
350    /// Used to acquire textures for rendering
351    pub fn get_tex(&self, id: TexId) -> (&Texture, &UiTextureBindGroup) {
352        let (tex, bind, _upload_batch) = self.textures.get(id.0).expect("Invalid TexId used");
353        (tex, bind)
354    }
355
356    pub fn get_graphic_dims(&self, (id, rot): (Id, Rotation)) -> Option<(u32, u32)> {
357        use image::GenericImageView;
358        self.get_graphic(id)
359            .and_then(|graphic| match graphic {
360                Graphic::Image(image, _) => Some(image.dimensions()),
361                Graphic::Voxel(segment, _, _) => {
362                    use common::vol::SizedVol;
363                    let size = segment.size();
364                    // TODO: HACK because they can be rotated arbitrarily, remove
365                    // (and they can be rasterized at arbitrary resolution)
366                    // (might need to return None here?)
367                    Some((size.x, size.z))
368                },
369                Graphic::Blank => None,
370            })
371            .and_then(|(w, h)| match rot {
372                Rotation::None | Rotation::Cw180 => Some((w, h)),
373                Rotation::Cw90 | Rotation::Cw270 => Some((h, w)),
374                // TODO: need dims for these?
375                Rotation::SourceNorth | Rotation::TargetNorth => None,
376            })
377    }
378
379    pub fn clear_cache(&mut self, renderer: &mut Renderer) {
380        self.cache_map.clear();
381
382        let (atlas, (tex, bind)) = create_atlas_texture(renderer);
383        let mut textures = Slab::new();
384        let tex_id = textures.insert((tex, bind, UiUploadBatchId::default()));
385        self.atlases = vec![(atlas, tex_id)];
386        self.textures = textures;
387    }
388
389    /// Source rectangle should be from 0 to 1, and represents a bounding box
390    /// for the source image of the graphic.
391    ///
392    /// # Panics
393    ///
394    /// Panics if one of the lengths in requested_dims is zero.
395    pub fn cache_res(
396        &mut self,
397        renderer: &mut Renderer,
398        pool: Option<&SlowJobPool>,
399        graphic_id: Id,
400        // TODO: if we aren't resizing here we can potentially upload the image earlier... (as long
401        // as this doesn't lead to uploading too much unused stuff). (currently not sure whether it
402        // would be an overall gain to pursue this.)
403        requested_dims: Vec2<u16>,
404        source: Aabr<f64>,
405        rotation: Rotation,
406    ) -> Option<((Aabr<f64>, Vec2<f32>), TexId)> {
407        assert!(requested_dims.map(|e| e != 0).reduce_and());
408        let requested_dims_upright = match rotation {
409            // The image is stored on the GPU with no rotation, so we need to swap the dimensions
410            // here to get the resolution that the image will be displayed at but re-oriented into
411            // the "upright" space that the image is stored in and sampled from (this can be bit
412            // confusing initially / hard to explain).
413            Rotation::Cw90 | Rotation::Cw270 => requested_dims.yx(),
414            Rotation::None | Rotation::Cw180 => requested_dims,
415            Rotation::SourceNorth => requested_dims,
416            Rotation::TargetNorth => requested_dims,
417        };
418
419        // Rotate aabr according to requested rotation.
420        let rotated_aabr = |Aabr { min, max }| match rotation {
421            Rotation::None | Rotation::SourceNorth | Rotation::TargetNorth => Aabr { min, max },
422            Rotation::Cw90 => Aabr {
423                min: Vec2::new(min.x, max.y),
424                max: Vec2::new(max.x, min.y),
425            },
426            Rotation::Cw180 => Aabr { min: max, max: min },
427            Rotation::Cw270 => Aabr {
428                min: Vec2::new(max.x, min.y),
429                max: Vec2::new(min.x, max.y),
430            },
431        };
432        // Scale aabr according to provided source rectangle.
433        let scaled_aabr = |aabr: Aabr<_>| {
434            let size: Vec2<f64> = aabr.size().into();
435            Aabr {
436                min: size.mul_add(source.min, aabr.min),
437                max: size.mul_add(source.max, aabr.min),
438            }
439        };
440        // Apply all transformations.
441        // TODO: Verify rotation is being applied correctly.
442        let transformed_aabr_and_scale = |aabr| {
443            let scaled = scaled_aabr(aabr);
444            // Calculate how many displayed pixels there are for each pixel in the source
445            // image. We need this to calculate where to sample in the shader to
446            // retain crisp pixel borders when scaling the image.
447            let scale = requested_dims_upright.map2(
448                Vec2::from(scaled.size()),
449                |screen_pixels, sample_pixels: f64| screen_pixels as f32 / sample_pixels as f32,
450            );
451            let transformed = rotated_aabr(scaled);
452            (transformed, scale)
453        };
454
455        let Self {
456            textures,
457            atlases,
458            cache_map,
459            graphic_map,
460            ..
461        } = self;
462
463        let graphic = match graphic_map.get(&graphic_id) {
464            Some(g) => g,
465            None => {
466                warn!(
467                    ?graphic_id,
468                    "A graphic was requested via an id which is not in use"
469                );
470                return None;
471            },
472        };
473
474        let requirements = TextureRequirements::from_graphic(graphic)?;
475        let (key, texture_parameters) =
476            requirements.to_key_and_tex_parameters(graphic_id, requested_dims_upright);
477
478        let details = match cache_map.entry(key) {
479            Entry::Occupied(mut details) => {
480                let details = details.get_mut();
481                let (idx, valid, aabr) = details.info(atlases, textures);
482
483                // Check if the cached version has been invalidated by replacing the underlying
484                // graphic
485                if !valid {
486                    // Create image
487                    let (image, gpu_premul) = prepare_graphic(
488                        graphic,
489                        key,
490                        requested_dims_upright,
491                        &mut self.keyed_jobs,
492                        pool,
493                    )?;
494                    // Ensure we don't have any bugs causing the size used to determine if the
495                    // cached version is reusable to not match the size of the image produced by
496                    // prepare_graphic.
497                    assert_eq!(
498                        image.dimensions(),
499                        texture_parameters.size.map(u32::from).into_tuple()
500                    );
501                    // Transfer to the gpu
502                    let &mut (ref texture, _, ref mut upload_batch) = &mut textures[idx];
503                    upload_image(renderer, texture, upload_batch, &image, aabr, gpu_premul);
504                    details.set_valid();
505                }
506
507                return Some((
508                    transformed_aabr_and_scale(aabr.map(|e| e as f64)),
509                    TexId(idx),
510                ));
511            },
512            Entry::Vacant(details) => details,
513        };
514
515        // Construct image in an optional threadpool.
516        let (image, gpu_premul) = prepare_graphic(
517            graphic,
518            key,
519            requested_dims_upright,
520            &mut self.keyed_jobs,
521            pool,
522        )?;
523        // Assert dimensions of image from `prepare_graphic` are as expected!
524        assert_eq!(
525            image.dimensions(),
526            texture_parameters.size.map(u32::from).into_tuple()
527        );
528        // Image dimensions in the format used by the allocator crate.
529        let image_dims_size2d = size2(
530            i32::from(texture_parameters.size.x),
531            i32::from(texture_parameters.size.y),
532        );
533
534        // Now we allocate space on the gpu (either in an atlas or an independent
535        // texture) and upload the image to that location.
536
537        let atlas_size = atlas_size(renderer);
538        // Graphics that request a border color or which are over a particular size
539        // compared to the atlas size are sent to their own textures.
540        let can_place_in_atlas = texture_parameters.border_color.is_none()
541            && atlas_size
542                .map2(texture_parameters.size, |a, d| {
543                    a as f32 * ATLAS_CUTOFF_FRAC >= d as f32
544                })
545                .reduce_and();
546        let location = if can_place_in_atlas {
547            // Fit into an atlas
548            let mut loc = None;
549            for (atlas_idx, &mut (ref mut atlas, texture_idx)) in atlases.iter_mut().enumerate() {
550                if let Some(rectangle) = atlas.allocate(image_dims_size2d) {
551                    let aabr = aabr_from_alloc_rect(rectangle);
552                    loc = Some(CachedDetails::Atlas {
553                        atlas_idx,
554                        valid: true,
555                        aabr,
556                    });
557                    let &mut (ref texture, _, ref mut upload_batch) = &mut textures[texture_idx];
558                    upload_image(renderer, texture, upload_batch, &image, aabr, gpu_premul);
559                    break;
560                }
561            }
562
563            match loc {
564                Some(loc) => loc,
565                // Create a new atlas
566                None => {
567                    let (mut atlas, (tex, bind)) = create_atlas_texture(renderer);
568                    let aabr = atlas
569                        .allocate(image_dims_size2d)
570                        .map(aabr_from_alloc_rect)
571                        .unwrap();
572                    // NOTE: All mutations happen only after the texture creation succeeds!
573                    let tex_idx = textures.insert((tex, bind, UiUploadBatchId::default()));
574                    let atlas_idx = atlases.len();
575                    atlases.push((atlas, tex_idx));
576                    let &mut (ref texture, _, ref mut upload_batch) = &mut textures[tex_idx];
577                    upload_image(renderer, texture, upload_batch, &image, aabr, gpu_premul);
578                    CachedDetails::Atlas {
579                        atlas_idx,
580                        valid: true,
581                        aabr,
582                    }
583                },
584            }
585        } else {
586            // Create a texture just for this
587            let (tex, bind, upload_batch) =
588                create_image(renderer, &image, texture_parameters, gpu_premul);
589            // NOTE: All mutations happen only after the texture creation and upload
590            // initiation succeeds! (completing the upload does not have any
591            // failure cases afaik)
592            let index = textures.insert((tex, bind, upload_batch));
593            CachedDetails::Texture { index, valid: true }
594        };
595
596        // Extract information from cache entry.
597        let (idx, _, aabr) = location.info(atlases, textures);
598
599        // Insert into cached map
600        details.insert(location);
601
602        Some((
603            transformed_aabr_and_scale(aabr.map(|e| e as f64)),
604            TexId(idx),
605        ))
606    }
607}
608
609/// Prepare the graphic into the form that will be uploaded to the GPU.
610///
611/// For voxel graphics, draws the graphic at the specified dimensions.
612///
613/// Alpha premultiplication is necessary so that  images so they can be linearly
614/// filtered on the GPU. Premultiplication can either occur here or on the GPU
615/// depending on the size of the image and other factors. If premultiplication
616/// on the GPU is needed the returned bool will be `true`.
617fn prepare_graphic<'graphic>(
618    graphic: &'graphic Graphic,
619    cache_key: CacheKey,
620    dims: Vec2<u16>,
621    keyed_jobs: &mut KeyedJobs<CacheKey, RgbaImage>,
622    pool: Option<&SlowJobPool>,
623) -> Option<(Cow<'graphic, RgbaImage>, bool)> {
624    prof_span!("prepare_graphic");
625    match graphic {
626        Graphic::Blank => None,
627        Graphic::Image(image, _border_color) => {
628            // Image will be rescaled when sampling from it on the GPU so we don't
629            // need to resize it here.
630            //
631            // TODO: We could potentially push premultiplication even earlier (e.g. to the
632            // time of loading images or packaging veloren for distribution).
633            let mut rgba_cow = image.as_rgba8().map_or_else(
634                || {
635                    // TODO: we may want to require loading in as the rgba8 format so we don't have
636                    // to perform conversion here. On the other hand, we can take advantage of
637                    // certain formats to know that alpha premultiplication doesn't need to be
638                    // performed (but we would probably just want to store that with the loaded
639                    // rgba8 format).
640                    Cow::Owned(image.to_rgba8())
641                },
642                Cow::Borrowed,
643            );
644            // NOTE: We do premultiplication on the main thread since if it would be
645            // expensive enough to do in the background we would just do it on
646            // the GPU. Could still use `rayon` to parallelize this work, if
647            // needed.
648            let premultiply_strategy = PremultiplyStrategy::determine(&rgba_cow);
649            let needs_gpu_premultiply = match premultiply_strategy {
650                PremultiplyStrategy::UseGpu => true,
651                PremultiplyStrategy::NotNeeded => false,
652                PremultiplyStrategy::UseCpu => {
653                    // NOTE: to_mut will clone the image if it was Cow::Borrowed
654                    premultiply_alpha(rgba_cow.to_mut());
655                    false
656                },
657            };
658
659            Some((rgba_cow, needs_gpu_premultiply))
660        },
661        Graphic::Voxel(segment, trans, sample_strat) => keyed_jobs
662            .spawn(pool, cache_key, || {
663                let segment = Arc::clone(segment);
664                let (trans, sample_strat) = (*trans, *sample_strat);
665                move |_| {
666                    // TODO: for now we always use CPU premultiplication for these, may want to
667                    // re-evaluate this after zoomy worldgen branch is merged (and it is more clear
668                    // when these jobs go to the background thread pool or not).
669
670                    // Render voxel model at requested resolution
671                    let mut image = renderer::draw_vox(&segment, dims, trans, sample_strat);
672                    premultiply_alpha(&mut image);
673                    image
674                }
675            })
676            .map(|(_, v)| (Cow::Owned(v), false)),
677    }
678}
679
680fn atlas_size(renderer: &Renderer) -> Vec2<u32> {
681    let max_texture_size = renderer.max_texture_size();
682
683    renderer
684        .resolution()
685        .map(|e| (e * GRAPHIC_CACHE_RELATIVE_SIZE).clamp(512, max_texture_size))
686}
687
688/// This creates a texture suitable for sampling from during the UI pass and
689/// rendering too during alpha premultiplication upload passes.
690fn create_image_texture(
691    renderer: &mut Renderer,
692    size: Vec2<u32>,
693    address_mode: Option<wgpu::AddressMode>,
694) -> (Arc<Texture>, UiTextureBindGroup) {
695    // TODO: Right now we have to manually clear images to workaround AMD DX bug,
696    // for this we use Queue::write_texture which needs this usage. I think this
697    // may be fixed in newer wgpu versions that auto-clear the texture.
698    let workaround_usage = wgpu::TextureUsages::COPY_DST;
699    let tex_info = wgpu::TextureDescriptor {
700        label: None,
701        size: wgpu::Extent3d {
702            width: size.x,
703            height: size.y,
704            depth_or_array_layers: 1,
705        },
706        mip_level_count: 1,
707        sample_count: 1,
708        dimension: wgpu::TextureDimension::D2,
709        format: wgpu::TextureFormat::Rgba8UnormSrgb,
710        usage: wgpu::TextureUsages::RENDER_ATTACHMENT // GPU premultiply
711            | wgpu::TextureUsages::COPY_DST // CPU premultiply
712            | wgpu::TextureUsages::TEXTURE_BINDING // using image in ui rendering
713            | workaround_usage,
714        view_formats: &[],
715    };
716    let view_info = wgpu::TextureViewDescriptor {
717        format: Some(tex_info.format),
718        dimension: Some(wgpu::TextureViewDimension::D2),
719        ..Default::default()
720    };
721    let address_mode = address_mode.unwrap_or(wgpu::AddressMode::ClampToEdge);
722    let sampler_info = wgpu::SamplerDescriptor {
723        address_mode_u: address_mode,
724        address_mode_v: address_mode,
725        mag_filter: wgpu::FilterMode::Linear,
726        min_filter: wgpu::FilterMode::Linear,
727        ..Default::default()
728    };
729    let tex = renderer.create_texture_raw(&tex_info, &view_info, &sampler_info);
730    let bind = renderer.ui_bind_texture(&tex);
731    (Arc::new(tex), bind)
732}
733
734fn create_atlas_texture(
735    renderer: &mut Renderer,
736) -> (SimpleAtlasAllocator, (Arc<Texture>, UiTextureBindGroup)) {
737    let size = atlas_size(renderer);
738    // Note: here we assume the max texture size is under i32::MAX.
739    let atlas = SimpleAtlasAllocator::new(size2(size.x as i32, size.y as i32));
740    let (tex, bind) = create_image_texture(renderer, size, None);
741    (atlas, (tex, bind))
742}
743
744fn aabr_from_alloc_rect(rect: guillotiere::Rectangle) -> Aabr<u16> {
745    let (min, max) = (rect.min, rect.max);
746    // Note: here we assume the max texture size (and thus the maximum size of the
747    // atlas) is under `u16::MAX`.
748    Aabr {
749        min: Vec2::new(min.x as u16, min.y as u16),
750        max: Vec2::new(max.x as u16, max.y as u16),
751    }
752}
753
754fn upload_image(
755    renderer: &mut Renderer,
756    target_texture: &Arc<Texture>,
757    upload_batch: &mut UiUploadBatchId,
758    image: &RgbaImage,
759    aabr: Aabr<u16>,
760    premultiply_on_gpu: bool,
761) {
762    // Check that this image and the target aabr are the same size (otherwise there
763    // is a bug in this module).
764    debug_assert_eq!(aabr.map(u32::from).size().into_tuple(), image.dimensions());
765    if premultiply_on_gpu {
766        *upload_batch =
767            renderer.ui_premultiply_upload(target_texture, *upload_batch, image, aabr.min);
768    } else {
769        let aabr = aabr.map(u32::from);
770        let offset = aabr.min.into_array();
771        let size = aabr.size().into_array();
772        // upload directly
773        renderer.update_texture(
774            target_texture,
775            offset,
776            size,
777            // NOTE: Rgba texture, so each pixel is 4 bytes, ergo this cannot fail.
778            // We make the cast parameters explicit for clarity.
779            bytemuck::cast_slice::<u8, [u8; 4]>(
780                &(&**image)[..size[0] as usize * size[1] as usize * 4],
781            ),
782        )
783    }
784}
785
786// This is used for border_color.is_some() images (ie the map image).
787fn create_image(
788    renderer: &mut Renderer,
789    image: &RgbaImage,
790    texture_parameters: TextureParameters,
791    premultiply_on_gpu: bool,
792) -> (Arc<Texture>, UiTextureBindGroup, UiUploadBatchId) {
793    let (tex, bind) = create_image_texture(
794        renderer,
795        texture_parameters.size.map(u32::from),
796        texture_parameters
797            .border_color
798            // TODO: either use the desktop only border color or just emulate this
799            //.map(|c| c.into_array().into()),
800            .map(|_| wgpu::AddressMode::ClampToBorder),
801    );
802    let mut upload_batch = UiUploadBatchId::default();
803    let aabr = Aabr {
804        min: Vec2::zero(),
805        max: texture_parameters.size,
806    };
807    upload_image(
808        renderer,
809        &tex,
810        &mut upload_batch,
811        image,
812        aabr,
813        premultiply_on_gpu,
814    );
815    (tex, bind, upload_batch)
816}
817
818// CPU-side alpha premultiplication implementation.
819
820pub struct PremultiplyLookupTable {
821    alpha: [u16; 256],
822    // This is for both colors that are always below the linear transform threshold (of the
823    // transform between linear/non-linear srgb) and colors that start above the threshold when
824    // transforming into linear srgb and then fall below it after being multiplied by alpha (before
825    // being transformed out of linear srgb).
826    color: [u16; 256],
827}
828
829impl Default for PremultiplyLookupTable {
830    fn default() -> Self {
831        #[rustfmt::skip]
832        fn accurate_to_linear(c: u8) -> f32 {
833            let c = c as f32 / 255.0;
834            // https://en.wikipedia.org/wiki/SRGB#Transformation
835            if c <= 0.04045 {
836                c / 12.92
837            } else {
838                // 0.055 ~= 14
839                ((c + 0.055) / 1.055).powf(2.4)
840            }
841        }
842
843        use core::array;
844        let alpha = array::from_fn(|alpha| {
845            // NOTE: u16::MAX + 1 here relies on the max alpha being short-circuited (and
846            // not using this table). We multiply by this factor since it is a
847            // power of 2, which means later demultiplying it will optimize to a
848            // bitshift.
849            (((alpha as f32 / 255.0).powf(1.0 / 2.4) * (u16::MAX as f32 + 1.0)) + 0.5) as u16
850        });
851        let color = array::from_fn(|color| {
852            (if color <= 10 {
853                //  <= 10 means the transform is linear!
854                color as f32 / 255.0
855            } else {
856                // Here the transform into linear srgb isn't linear but the transform out of it is. 
857                //
858                // This is transform into and out of linear srgb with the theoretical alpha
859                // multiplication factored out.
860                accurate_to_linear(color as u8) * 12.92
861            }
862            // take advantage of the precision offered by u16
863            * (1 << 13) as f32
864            // round to the nearest integer when the cast truncates
865            + 0.5) as u16
866        });
867        Self { alpha, color }
868    }
869}
870
871fn premultiply_alpha(image: &mut RgbaImage) {
872    lazy_static::lazy_static! {
873        static ref LOOKUP: PremultiplyLookupTable = Default::default();
874    }
875    let lookup = &*LOOKUP;
876    // TODO: Apparently it is possible for ImageBuffer raw vec to have more pixels
877    // than the dimensions of the actual image (I don't think we actually have
878    // this occuring but we should probably fix other spots that use the raw
879    // buffer). See:
880    // https://github.com/image-rs/image/blob/a1ce569afd476e881acafdf9e7a5bce294d0db9a/src/buffer.rs#L664
881    let dims = image.dimensions();
882    let image_buffer_len = dims.0 as usize * dims.1 as usize * 4;
883    let (arrays, end) = (&mut **image)[..image_buffer_len].as_chunks_mut::<{ 4 * 4 }>();
884    // Rgba8 has 4 bytes per pixel there should be no remainder when dividing by 4.
885    let (end, _) = end.as_chunks_mut::<4>();
886    end.iter_mut().for_each(|pixel| {
887        let alpha = pixel[3];
888        if alpha == 0 {
889            *pixel = [0; 4];
890            return;
891        } else if alpha == 255 {
892            return;
893        };
894
895        for color in &mut pixel[..3] {
896            let predicted = ((lookup.alpha[alpha as usize] as u32) * (*color as u32 + 14) + 32433)
897                / (u16::MAX as u32 + 1);
898            let multiplied_color = (if predicted < 9 + 14 {
899                (lookup.color[*color as usize] as u32 * alpha as u32 + 4096) >> 13
900            } else {
901                predicted - 14
902            }) as u8;
903            *color = multiplied_color;
904        }
905    });
906    arrays.iter_mut().for_each(|pixelx4| {
907        // Short-circuit for alpha == 0 or 255
908        // This adds ~7 us (worst case) for a 256x256 image.
909        // Best case is decreased to 20 us total time.
910        if pixelx4[3] == pixelx4[7] && pixelx4[3] == pixelx4[11] && pixelx4[3] == pixelx4[15] {
911            if pixelx4[3] == 0 {
912                *pixelx4 = [0; 16];
913                return;
914            } else if pixelx4[3] == u8::MAX {
915                return;
916            }
917        }
918
919        // Lookup transformed alpha values for each pixel first.
920        // Putting this here seems to make things slightly faster.
921        let factors = [
922            lookup.alpha[pixelx4[3] as usize],
923            lookup.alpha[pixelx4[7] as usize],
924            lookup.alpha[pixelx4[11] as usize],
925            lookup.alpha[pixelx4[15] as usize],
926        ];
927        for pixel_index in 0..4 {
928            let alpha_factor = factors[pixel_index];
929            let alpha = pixelx4[pixel_index * 4 + 3];
930            // Putting this code outside the loop makes things take ~25% less time.
931            let color_factors = [
932                lookup.color[pixelx4[pixel_index * 4 + 0] as usize] as u32 * alpha as u32 + 4096,
933                lookup.color[pixelx4[pixel_index * 4 + 1] as usize] as u32 * alpha as u32 + 4096,
934                lookup.color[pixelx4[pixel_index * 4 + 2] as usize] as u32 * alpha as u32 + 4096,
935            ];
936            for i in 0..3 {
937                let color = &mut pixelx4[pixel_index * 4 + i];
938                // Loosely based on transform to linear and back (above threshold) (this is
939                // where use of 14 comes from).
940                // `32433` selected via trial and error to reduce the number of mismatches.
941                // `/ (u16::MAX as u32 + 1)` transforms back to `u8` precision (we add 1 so it
942                // will be a division by a power of 2 which optimizes well).
943                let predicted =
944                    ((alpha_factor as u32) * (*color as u32 + 14) + 32328) / (u16::MAX as u32 + 1);
945                let multiplied_color = (if predicted < 9 + 14 {
946                    // Here we handle two cases:
947                    // 1. When the transform starts and ends as linear.
948                    // 2. When the color is over the linear threshold for the transform into linear
949                    //    space but below this threshold when transforming back out (due to being
950                    //    multiplied with a small alpha).
951                    // (in both cases the result is linearly related to alpha and we can encode how
952                    // it is related to the color in a lookup table)
953                    // NOTE: 212 is the largest color value used here (when alpha isn't 0)
954                    color_factors[i] >> 13
955                } else {
956                    predicted - 14
957                }) as u8;
958                *color = multiplied_color;
959            }
960        }
961    });
962}
963
964/// Strategy for how alpha premultiplication will be applied to an image.
965enum PremultiplyStrategy {
966    UseCpu,
967    UseGpu,
968    // Image is fully opaque.
969    NotNeeded,
970}
971
972impl PremultiplyStrategy {
973    #[rustfmt::skip] // please don't format comment with 'ns/pixel' to a separate line from the value
974    fn determine(image: &RgbaImage) -> Self {
975        // TODO: Would be useful to re-time this after a wgpu update.
976        //
977        // Thresholds below are based on the timing measurements of the CPU based premultiplication
978        // vs ovehead of interacting with the GPU API to perform premultiplication on the GPU.
979        // These timings are quite circumstantial and could vary between machines, wgpu updates,
980        // and changes to the structure of the GPU based path.  
981        //
982        // GPU path costs (For calculations I used `57.6 us` as a roughly reasonable estimate of
983        // total time here but that can vary lower and higher. Everything is a bit imprecise here
984        // so I won't list individual timings. The key takeaway is that this can be made more
985        // efficient by avoidiing the create/drop of a texture, texture view, and bind group for
986        // each image. Also, if we didn't need a separate render pass for each target image that
987        // would be helpful as well. Using compute passes and passing data in as a raw buffer may
988        // help with both of these but initial attempts with that ran into issues (e.g. when we get
989        // the ability to have non-srgb views of srgb textures that will be useful)):
990        // * create/drop texture
991        // * create/drop texture view
992        // * create/drop bind group
993        // * run render pass (NOTE: if many images are processed at once with the same target
994        //   texture this portion of the cost can be split between them)
995        //
996        // CPU path costs:
997        // * clone image (0.17 ns/pixel (benchmark) - 0.73 ns/pixel (in voxygen))
998        // * run premultiplication (0.305 ns/pixel (when shortcircuits are always hit) -
999        //   3.81 ns/pixel (with random alpha))
1000        //
1001        // Shared costs include:
1002        // * write_texture
1003        // * (optional) check for fraction of shortcircuit blocks in image (0.223 ns/pixel)
1004        //
1005        // `ALWAYS_CPU_THRESHOLD` is roughly:
1006        // ("cost of GPU path" + "shortcircuit count cost") / "worst case cost of CPU path per pixel"
1007        //
1008        // `ALWAYS_GPU_THRESHOLD` is NOT: "cost of GPU path" / "best case cost of CPU path per pixel"
1009        // since the cost of checking for whether the CPU path is better at this quantity of pixels
1010        // becomes more than the on the amount of overhead we are willing to add to the worst case
1011        // scenario where we run the short-circuit count check and end up using the GPU path. The
1012        // currently selected value of 200x200 adds at most about ~20% of the cost of the GPU path.
1013        // (TODO: maybe we could have the check bail out early if the results aren't looking
1014        // favorable for the CPU path and/or sample a random subset of the pixels).
1015        //
1016        // `CHECKED_THRESHOLD` is roughly: "cost of GPU path / "best case cost of CPU path per pixel"
1017        const ALWAYS_CPU_THRESHOLD: usize = 120 * 120;
1018        const ALWAYS_GPU_THRESHOLD: usize = 200 * 200;
1019        const CHECKED_THRESHOLD: usize = 240 * 240;
1020
1021        let dims = image.dimensions();
1022        let pixel_count = dims.0 as usize * dims.1 as usize;
1023        if pixel_count <= ALWAYS_CPU_THRESHOLD {
1024            Self::UseCpu
1025        } else if pixel_count > ALWAYS_GPU_THRESHOLD {
1026            Self::UseGpu
1027        } else if let Some(fraction) = fraction_shortcircuit_blocks(image) {
1028            // This seems correct...?
1029            // TODO: I think we technically can exit the fraction checking early if we know the
1030            // total fraction value will be over: (threshold - ALWAYS_CPU_THRESHOLD) /
1031            // (CHECKED_THRESHOLD - ALWAYS_CPU_THRESHOLD).
1032            let threshold = fraction * CHECKED_THRESHOLD as f32
1033                + (1.0 - fraction) * ALWAYS_CPU_THRESHOLD as f32;
1034            if pixel_count as f32 <= threshold {
1035                Self::UseCpu
1036            } else {
1037                Self::UseGpu
1038            }
1039        } else {
1040            Self::NotNeeded
1041        }
1042    }
1043}
1044
1045/// Useful to estimates cost of premultiplying alpha in the provided image via
1046/// the CPU method.
1047///
1048/// Computes the fraction of 4 pixel chunks that are fully translucent or
1049/// opaque. Returns `None` if no premultiplication is needed (i.e. all alpha
1050/// values are 255).
1051fn fraction_shortcircuit_blocks(image: &RgbaImage) -> Option<f32> {
1052    let dims = image.dimensions();
1053    let pixel_count = dims.0 as usize * dims.1 as usize;
1054    let (arrays, end) = (&**image)[..pixel_count * 4].as_chunks::<{ 4 * 4 }>();
1055
1056    // Rgba8 has 4 bytes per pixel there should be no remainder when dividing by 4.
1057    let (end, _) = end.as_chunks::<4>();
1058    let end_is_opaque = end.iter().all(|pixel| pixel[3] == 255);
1059
1060    // 14.6 us for 256x256 image
1061    let num_chunks = arrays.len();
1062    let mut num_translucent = 0;
1063    let mut num_opaque = 0;
1064    arrays.iter().for_each(|pixelx4| {
1065        let v = u128::from_ne_bytes(*pixelx4);
1066        let alpha_mask = 0x000000FF_000000FF_000000FF_000000FF;
1067        let masked = v & alpha_mask;
1068        if masked == 0 {
1069            num_translucent += 1;
1070        } else if masked == alpha_mask {
1071            num_opaque += 1;
1072        }
1073    });
1074
1075    if num_chunks == num_opaque && num_translucent == 0 && end_is_opaque {
1076        None
1077    } else {
1078        Some((num_translucent as f32 + num_opaque as f32) / num_chunks as f32)
1079    }
1080}
veloren_voxygen/ui/graphic/mod.rs

veloren_voxygen/ui/graphic/
mod.rs