veloren_voxygen/ui/graphic/mod.rs
1mod pixel_art;
2pub mod renderer;
3
4pub use renderer::{SampleStrat, Transform};
5
6use crate::{
7 render::{Renderer, Texture, UiTextureBindGroup, UiUploadBatchId},
8 ui::KeyedJobs,
9};
10use common::{figure::Segment, slowjob::SlowJobPool};
11use common_base::prof_span;
12use guillotiere::{SimpleAtlasAllocator, size2};
13use hashbrown::{HashMap, hash_map::Entry};
14use image::{DynamicImage, RgbaImage};
15use slab::Slab;
16use std::{borrow::Cow, hash::Hash, sync::Arc};
17use tracing::{error, warn};
18use vek::*;
19
20#[derive(Clone)]
21pub enum Graphic {
22 /// NOTE: The second argument is an optional border color. If this is set,
23 /// we force the image into its own texture and use the border color
24 /// whenever we sample beyond the image extent. This can be useful, for
25 /// example, for the map and minimap, which both rotate and may be
26 /// non-square (meaning if we want to display the whole map and render to a
27 /// square, we may render out of bounds unless we perform proper
28 /// clipping).
29 // TODO: probably convert this type to `RgbaImage`.
30 Image(Arc<DynamicImage>, Option<Rgba<f32>>),
31 // Note: none of the users keep this Arc currently
32 Voxel(Arc<Segment>, Transform, SampleStrat),
33 // TODO: Re-evaluate whether we need this (especially outside conrod context)
34 Blank,
35}
36
37#[derive(Clone, Copy, Debug)]
38pub enum Rotation {
39 None,
40 Cw90,
41 Cw180,
42 Cw270,
43 /// Orientation of source rectangle that always faces true north.
44 /// Simple hack to get around Conrod not having space for proper
45 /// rotation data (though it should be possible to add in other ways).
46 SourceNorth,
47 /// Orientation of target rectangle that always faces true north.
48 /// Simple hack to get around Conrod not having space for proper
49 /// rotation data (though it should be possible to add in other ways).
50 TargetNorth,
51 /// Orientation of object that always faces the characters orientation
52 /// adjusted by the minimaps rotation (only if the minimap is being
53 /// rotated based on camera position). Simple hack to get around Conrod
54 /// not having space for proper rotation data (though it should be
55 /// possible to add in other ways).
56 TargetPlayer,
57}
58
59/// Images larger than this are stored in individual textures
60/// Fraction of the total graphic cache size
61const ATLAS_CUTOFF_FRAC: f32 = 0.2;
62/// Multiplied by current window size
63const GRAPHIC_CACHE_RELATIVE_SIZE: u32 = 1;
64
65#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
66pub struct Id(u32);
67
68// TODO these can become invalid when clearing the cache
69#[derive(PartialEq, Eq, Hash, Copy, Clone)]
70pub struct TexId(usize);
71
72enum CachedDetails {
73 Atlas {
74 // Index of the atlas this is cached in.
75 atlas_idx: usize,
76 // Whether this texture is valid.
77 valid: bool,
78 // Where in the cache texture this is.
79 aabr: Aabr<u16>,
80 },
81 Texture {
82 // Index of the (unique, non-atlas) texture this is cached in.
83 index: usize,
84 // Whether this texture is valid.
85 valid: bool,
86 },
87}
88
89impl CachedDetails {
90 /// Get information about this cache entry: texture index,
91 /// whether the entry is valid, and its bounding box in the referenced
92 /// texture.
93 fn info(
94 &self,
95 atlases: &[(SimpleAtlasAllocator, usize)],
96 textures: &Slab<(Arc<Texture>, UiTextureBindGroup, UiUploadBatchId)>,
97 ) -> (usize, bool, Aabr<u16>) {
98 match *self {
99 CachedDetails::Atlas {
100 atlas_idx,
101 valid,
102 aabr,
103 } => (atlases[atlas_idx].1, valid, aabr),
104 CachedDetails::Texture { index, valid } => {
105 (index, valid, Aabr {
106 min: Vec2::zero(),
107 // NOTE (as cast): We don't accept images larger than u16::MAX (rejected in
108 // `cache_res`) (and probably would not be able to create a texture this
109 // large).
110 //
111 // Note texture should always match the cached dimensions.
112 max: textures[index].0.get_dimensions().xy().map(|e| e as u16),
113 })
114 },
115 }
116 }
117
118 /// Invalidate this cache entry.
119 fn invalidate(&mut self) {
120 match self {
121 Self::Atlas { valid, .. } => {
122 *valid = false;
123 },
124 Self::Texture { valid, .. } => {
125 *valid = false;
126 },
127 }
128 }
129
130 fn set_valid(&mut self) {
131 match self {
132 Self::Atlas { valid, .. } => {
133 *valid = true;
134 },
135 Self::Texture { valid, .. } => {
136 *valid = true;
137 },
138 }
139 }
140}
141
142/// Requirements that a particular graphic has with respect to the atlas
143/// allocation or independent texture it will be stored in.
144///
145/// If this matches between an old graphic and a new one which is replacing it,
146/// we can reuse any of the corresponding locations where it is cached in
147/// textures on the GPU. That is we can invalidate such textures and upload the
148/// new graphic there, rather than needing to allocate a new texture (or new
149/// location in an atlas).
150#[derive(PartialEq)]
151enum TextureRequirements {
152 /// These are uploaded to the GPU in the original resolution of the image
153 /// supplied by the `Graphic` and any scaling is done during sampling in
154 /// the UI fragment shader.
155 Fixed {
156 size: Vec2<u16>,
157 /// Graphics with a border color specified are placed into their own
158 /// individual textures so that the border color can be set
159 /// there. (Note: this is partially a theoretical description as
160 /// border color options are limited in the current graphics API).
161 border_color: Option<Rgba<f32>>,
162 },
163 /// These are rasterized to the exact resolution that they will be displayed
164 /// at and then uploaded to the GPU. This corresponds to
165 /// `Graphic::Voxel`. There may be multiple copies on the GPU if
166 /// different resolutions are requested.
167 ///
168 /// It is expected that the requested sizes will generally not differ when
169 /// switching out a graphic. Thus, dependent cached depdendent should
170 /// always be invalidated since those cached locations will be reusable
171 /// if the requested size is the same.
172 Dependent,
173}
174
175/// These solely determine how a place in an atlas will be found or how a
176/// texture will be created to place the image for a graphic.
177struct TextureParameters {
178 size: Vec2<u16>,
179 border_color: Option<Rgba<f32>>,
180}
181
182/// Key used to refer to an instance of a graphic that has been uploaded to the
183/// GPU.
184#[derive(Clone, Copy, PartialEq, Eq, Hash)]
185struct CacheKey {
186 graphic_id: Id,
187 /// This is `Some` for `TextureRequirements::Dependent`.
188 size: Option<Vec2<u16>>,
189}
190
191impl TextureRequirements {
192 fn from_graphic(graphic: &Graphic) -> Option<Self> {
193 match graphic {
194 Graphic::Image(image, border_color) => {
195 // Image sizes over u16::MAX are not supported (and we would probably not be
196 // able to create a texture large enough to hold them on the GPU anyway)!
197 let image_dims = match (u16::try_from(image.width()), u16::try_from(image.height()))
198 {
199 (Ok(x), Ok(y)) if x != 0 && y != 0 => Vec2::new(x, y),
200 _ => {
201 error!(
202 "Image dimensions greater than u16::MAX are not supported! Supplied \
203 image size: ({}, {}).",
204 image.width(),
205 image.height(),
206 );
207 // TODO: reasonable to return None on this error case? We could potentially
208 // validate images sizes on add_graphic/replace_graphic?
209 return None;
210 },
211 };
212
213 Some(Self::Fixed {
214 size: image_dims,
215 border_color: *border_color,
216 })
217 },
218 Graphic::Voxel(_, _, _) => Some(Self::Dependent),
219 Graphic::Blank => None,
220 }
221 }
222
223 #[expect(clippy::wrong_self_convention)] // type is spiritually Copy
224 fn to_key_and_tex_parameters(
225 self,
226 graphic_id: Id,
227 requested_size: Vec2<u16>,
228 ) -> (CacheKey, TextureParameters) {
229 // NOTE: Any external parameters which influence the value of the returned
230 // `TextureParameters` must be included in the `CacheKey`. Otherwise,
231 // invalidation and subsequent re-use of cache locations based on the
232 // value of `self` would be wrong.
233 let (size, border_color, key_size) = match self {
234 Self::Fixed { size, border_color } => (size, border_color, None),
235 Self::Dependent => (requested_size, None, Some(requested_size)),
236 };
237 (
238 CacheKey {
239 graphic_id,
240 size: key_size,
241 },
242 TextureParameters { size, border_color },
243 )
244 }
245}
246
247// Caches graphics, only deallocates when changing screen resolution (completely
248// cleared)
249pub struct GraphicCache {
250 // TODO replace with slotmap
251 graphic_map: HashMap<Id, Graphic>,
252 /// Next id to use when a new graphic is added
253 next_id: u32,
254
255 /// Atlases with the index of their texture in the textures slab.
256 atlases: Vec<(SimpleAtlasAllocator, usize)>,
257 /// Third tuple element is a list of pending premultiply + upload operations
258 /// for this frame. The purpose of this is to collect all the operations
259 /// together so that a single renderpass is performed for each target
260 /// texture.
261 textures: Slab<(Arc<Texture>, UiTextureBindGroup, UiUploadBatchId)>,
262 /// The location and details of graphics cached on the GPU.
263 ///
264 /// Graphic::Voxel images include the dimensions they were rasterized at in
265 /// the key. Other images are scaled as part of sampling them on the
266 /// GPU.
267 cache_map: HashMap<CacheKey, CachedDetails>,
268
269 keyed_jobs: KeyedJobs<CacheKey, RgbaImage>,
270}
271
272impl GraphicCache {
273 pub fn new(renderer: &mut Renderer) -> Self {
274 let (atlas, (tex, bind)) = create_atlas_texture(renderer);
275
276 let mut textures = Slab::new();
277 let tex_id = textures.insert((tex, bind, UiUploadBatchId::default()));
278
279 Self {
280 graphic_map: HashMap::default(),
281 next_id: 0,
282 atlases: vec![(atlas, tex_id)],
283 textures,
284 cache_map: HashMap::default(),
285 keyed_jobs: KeyedJobs::new("IMAGE_PROCESSING"),
286 }
287 }
288
289 pub fn add_graphic(&mut self, graphic: Graphic) -> Id {
290 let id = self.next_id;
291 self.next_id = id.wrapping_add(1);
292
293 let id = Id(id);
294 self.graphic_map.insert(id, graphic);
295
296 id
297 }
298
299 pub fn replace_graphic(&mut self, id: Id, graphic: Graphic) {
300 let (old, new) = match self.graphic_map.entry(id) {
301 Entry::Occupied(o) => {
302 let slot_mut = o.into_mut();
303 let old = core::mem::replace(slot_mut, graphic);
304 (old, slot_mut)
305 },
306 Entry::Vacant(v) => {
307 // This was not an update, so no need to cleanup caches.
308 v.insert(graphic);
309 return;
310 },
311 };
312
313 let old_requirements = TextureRequirements::from_graphic(&old);
314 let new_requirements = TextureRequirements::from_graphic(new);
315 let should_invalidate = old_requirements == new_requirements && old_requirements.is_some();
316
317 // Invalidate if possible or remove from caches.
318 // Maybe make this more efficient if replace graphic is used more often
319 // (especially since we should know the exact key for non-voxel
320 // graphics).
321 //
322 // NOTE: at the time of writing, replace_graphic is only used for voxel minimap
323 // updates and item image reloading.
324 if should_invalidate {
325 self.cache_map.iter_mut().for_each(|(key, details)| {
326 if key.graphic_id == id {
327 details.invalidate();
328 }
329 });
330 } else {
331 let _ = self.cache_map.extract_if(|key, details| {
332 if key.graphic_id == id {
333 match details {
334 // NOTE: if replace_graphic is used continously for small images (i.e.
335 // images placed into an atlas) of different sizes, that can use up our
336 // atlas space since spots in the atlas can't be reused. (this scenario is
337 // now possible with scaling being done during sampling rather than placing
338 // resized version into the atlas). This is expected to not occur in all
339 // pratical cases we plan to support here (i.e. the size of the replacement
340 // image will always be the same).
341 CachedDetails::Atlas { .. } => {},
342 CachedDetails::Texture { index, .. } => {
343 self.textures.remove(*index);
344 },
345 };
346 true
347 } else {
348 false
349 }
350 });
351 }
352 }
353
354 pub fn get_graphic(&self, id: Id) -> Option<&Graphic> { self.graphic_map.get(&id) }
355
356 /// Used to acquire textures for rendering
357 pub fn get_tex(&self, id: TexId) -> (&Texture, &UiTextureBindGroup) {
358 let (tex, bind, _upload_batch) = self.textures.get(id.0).expect("Invalid TexId used");
359 (tex, bind)
360 }
361
362 pub fn get_graphic_dims(&self, (id, rot): (Id, Rotation)) -> Option<(u32, u32)> {
363 use image::GenericImageView;
364 self.get_graphic(id)
365 .and_then(|graphic| match graphic {
366 Graphic::Image(image, _) => Some(image.dimensions()),
367 Graphic::Voxel(segment, _, _) => {
368 use common::vol::SizedVol;
369 let size = segment.size();
370 // TODO: HACK because they can be rotated arbitrarily, remove
371 // (and they can be rasterized at arbitrary resolution)
372 // (might need to return None here?)
373 Some((size.x, size.z))
374 },
375 Graphic::Blank => None,
376 })
377 .and_then(|(w, h)| match rot {
378 Rotation::None | Rotation::Cw180 => Some((w, h)),
379 Rotation::Cw90 | Rotation::Cw270 => Some((h, w)),
380 // TODO: need dims for these?
381 Rotation::SourceNorth | Rotation::TargetNorth | Rotation::TargetPlayer => None,
382 })
383 }
384
385 pub fn clear_cache(&mut self, renderer: &mut Renderer) {
386 self.cache_map.clear();
387
388 let (atlas, (tex, bind)) = create_atlas_texture(renderer);
389 let mut textures = Slab::new();
390 let tex_id = textures.insert((tex, bind, UiUploadBatchId::default()));
391 self.atlases = vec![(atlas, tex_id)];
392 self.textures = textures;
393 }
394
395 /// Source rectangle should be from 0 to 1, and represents a bounding box
396 /// for the source image of the graphic.
397 ///
398 /// # Panics
399 ///
400 /// Panics if one of the lengths in requested_dims is zero.
401 pub fn cache_res(
402 &mut self,
403 renderer: &mut Renderer,
404 pool: Option<&SlowJobPool>,
405 graphic_id: Id,
406 // TODO: if we aren't resizing here we can potentially upload the image earlier... (as long
407 // as this doesn't lead to uploading too much unused stuff). (currently not sure whether it
408 // would be an overall gain to pursue this.)
409 requested_dims: Vec2<u16>,
410 source: Aabr<f64>,
411 rotation: Rotation,
412 ) -> Option<((Aabr<f64>, Vec2<f32>), TexId)> {
413 assert!(requested_dims.map(|e| e != 0).reduce_and());
414 let requested_dims_upright = match rotation {
415 // The image is stored on the GPU with no rotation, so we need to swap the dimensions
416 // here to get the resolution that the image will be displayed at but re-oriented into
417 // the "upright" space that the image is stored in and sampled from (this can be bit
418 // confusing initially / hard to explain).
419 Rotation::Cw90 | Rotation::Cw270 => requested_dims.yx(),
420 Rotation::None | Rotation::Cw180 => requested_dims,
421 Rotation::SourceNorth => requested_dims,
422 Rotation::TargetNorth => requested_dims,
423 Rotation::TargetPlayer => requested_dims,
424 };
425
426 // Rotate aabr according to requested rotation.
427 let rotated_aabr = |Aabr { min, max }| match rotation {
428 Rotation::None
429 | Rotation::SourceNorth
430 | Rotation::TargetNorth
431 | Rotation::TargetPlayer => Aabr { min, max },
432 Rotation::Cw90 => Aabr {
433 min: Vec2::new(min.x, max.y),
434 max: Vec2::new(max.x, min.y),
435 },
436 Rotation::Cw180 => Aabr { min: max, max: min },
437 Rotation::Cw270 => Aabr {
438 min: Vec2::new(max.x, min.y),
439 max: Vec2::new(min.x, max.y),
440 },
441 };
442 // Scale aabr according to provided source rectangle.
443 let scaled_aabr = |aabr: Aabr<_>| {
444 let size: Vec2<f64> = aabr.size().into();
445 Aabr {
446 min: size.mul_add(source.min, aabr.min),
447 max: size.mul_add(source.max, aabr.min),
448 }
449 };
450 // Apply all transformations.
451 // TODO: Verify rotation is being applied correctly.
452 let transformed_aabr_and_scale = |aabr| {
453 let scaled = scaled_aabr(aabr);
454 // Calculate how many displayed pixels there are for each pixel in the source
455 // image. We need this to calculate where to sample in the shader to
456 // retain crisp pixel borders when scaling the image.
457 let scale = requested_dims_upright.map2(
458 Vec2::from(scaled.size()),
459 |screen_pixels, sample_pixels: f64| screen_pixels as f32 / sample_pixels as f32,
460 );
461 let transformed = rotated_aabr(scaled);
462 (transformed, scale)
463 };
464
465 let Self {
466 textures,
467 atlases,
468 cache_map,
469 graphic_map,
470 ..
471 } = self;
472
473 let graphic = match graphic_map.get(&graphic_id) {
474 Some(g) => g,
475 None => {
476 warn!(
477 ?graphic_id,
478 "A graphic was requested via an id which is not in use"
479 );
480 return None;
481 },
482 };
483
484 let requirements = TextureRequirements::from_graphic(graphic)?;
485 let (key, texture_parameters) =
486 requirements.to_key_and_tex_parameters(graphic_id, requested_dims_upright);
487
488 let details = match cache_map.entry(key) {
489 Entry::Occupied(mut details) => {
490 let details = details.get_mut();
491 let (idx, valid, aabr) = details.info(atlases, textures);
492
493 // Check if the cached version has been invalidated by replacing the underlying
494 // graphic
495 if !valid {
496 // Create image
497 let (image, gpu_premul) = prepare_graphic(
498 graphic,
499 key,
500 requested_dims_upright,
501 &mut self.keyed_jobs,
502 pool,
503 )?;
504 // Ensure we don't have any bugs causing the size used to determine if the
505 // cached version is reusable to not match the size of the image produced by
506 // prepare_graphic.
507 assert_eq!(
508 image.dimensions(),
509 texture_parameters.size.map(u32::from).into_tuple()
510 );
511 // Transfer to the gpu
512 let &mut (ref texture, _, ref mut upload_batch) = &mut textures[idx];
513 upload_image(renderer, texture, upload_batch, &image, aabr, gpu_premul);
514 details.set_valid();
515 }
516
517 return Some((
518 transformed_aabr_and_scale(aabr.map(|e| e as f64)),
519 TexId(idx),
520 ));
521 },
522 Entry::Vacant(details) => details,
523 };
524
525 // Construct image in an optional threadpool.
526 let (image, gpu_premul) = prepare_graphic(
527 graphic,
528 key,
529 requested_dims_upright,
530 &mut self.keyed_jobs,
531 pool,
532 )?;
533 // Assert dimensions of image from `prepare_graphic` are as expected!
534 assert_eq!(
535 image.dimensions(),
536 texture_parameters.size.map(u32::from).into_tuple()
537 );
538 // Image dimensions in the format used by the allocator crate.
539 let image_dims_size2d = size2(
540 i32::from(texture_parameters.size.x),
541 i32::from(texture_parameters.size.y),
542 );
543
544 // Now we allocate space on the gpu (either in an atlas or an independent
545 // texture) and upload the image to that location.
546
547 let atlas_size = atlas_size(renderer);
548 // Graphics that request a border color or which are over a particular size
549 // compared to the atlas size are sent to their own textures.
550 let can_place_in_atlas = texture_parameters.border_color.is_none()
551 && atlas_size
552 .map2(texture_parameters.size, |a, d| {
553 a as f32 * ATLAS_CUTOFF_FRAC >= d as f32
554 })
555 .reduce_and();
556 let location = if can_place_in_atlas {
557 // Fit into an atlas
558 let mut loc = None;
559 for (atlas_idx, &mut (ref mut atlas, texture_idx)) in atlases.iter_mut().enumerate() {
560 if let Some(rectangle) = atlas.allocate(image_dims_size2d) {
561 let aabr = aabr_from_alloc_rect(rectangle);
562 loc = Some(CachedDetails::Atlas {
563 atlas_idx,
564 valid: true,
565 aabr,
566 });
567 let &mut (ref texture, _, ref mut upload_batch) = &mut textures[texture_idx];
568 upload_image(renderer, texture, upload_batch, &image, aabr, gpu_premul);
569 break;
570 }
571 }
572
573 match loc {
574 Some(loc) => loc,
575 // Create a new atlas
576 None => {
577 let (mut atlas, (tex, bind)) = create_atlas_texture(renderer);
578 let aabr = atlas
579 .allocate(image_dims_size2d)
580 .map(aabr_from_alloc_rect)
581 .unwrap();
582 // NOTE: All mutations happen only after the texture creation succeeds!
583 let tex_idx = textures.insert((tex, bind, UiUploadBatchId::default()));
584 let atlas_idx = atlases.len();
585 atlases.push((atlas, tex_idx));
586 let &mut (ref texture, _, ref mut upload_batch) = &mut textures[tex_idx];
587 upload_image(renderer, texture, upload_batch, &image, aabr, gpu_premul);
588 CachedDetails::Atlas {
589 atlas_idx,
590 valid: true,
591 aabr,
592 }
593 },
594 }
595 } else {
596 // Create a texture just for this
597 let (tex, bind, upload_batch) =
598 create_image(renderer, &image, texture_parameters, gpu_premul);
599 // NOTE: All mutations happen only after the texture creation and upload
600 // initiation succeeds! (completing the upload does not have any
601 // failure cases afaik)
602 let index = textures.insert((tex, bind, upload_batch));
603 CachedDetails::Texture { index, valid: true }
604 };
605
606 // Extract information from cache entry.
607 let (idx, _, aabr) = location.info(atlases, textures);
608
609 // Insert into cached map
610 details.insert(location);
611
612 Some((
613 transformed_aabr_and_scale(aabr.map(|e| e as f64)),
614 TexId(idx),
615 ))
616 }
617}
618
619/// Prepare the graphic into the form that will be uploaded to the GPU.
620///
621/// For voxel graphics, draws the graphic at the specified dimensions.
622///
623/// Alpha premultiplication is necessary so that images so they can be linearly
624/// filtered on the GPU. Premultiplication can either occur here or on the GPU
625/// depending on the size of the image and other factors. If premultiplication
626/// on the GPU is needed the returned bool will be `true`.
627fn prepare_graphic<'graphic>(
628 graphic: &'graphic Graphic,
629 cache_key: CacheKey,
630 dims: Vec2<u16>,
631 keyed_jobs: &mut KeyedJobs<CacheKey, RgbaImage>,
632 pool: Option<&SlowJobPool>,
633) -> Option<(Cow<'graphic, RgbaImage>, bool)> {
634 prof_span!("prepare_graphic");
635 match graphic {
636 Graphic::Blank => None,
637 Graphic::Image(image, _border_color) => {
638 // Image will be rescaled when sampling from it on the GPU so we don't
639 // need to resize it here.
640 //
641 // TODO: We could potentially push premultiplication even earlier (e.g. to the
642 // time of loading images or packaging veloren for distribution).
643 let mut rgba_cow = image.as_rgba8().map_or_else(
644 || {
645 // TODO: we may want to require loading in as the rgba8 format so we don't have
646 // to perform conversion here. On the other hand, we can take advantage of
647 // certain formats to know that alpha premultiplication doesn't need to be
648 // performed (but we would probably just want to store that with the loaded
649 // rgba8 format).
650 Cow::Owned(image.to_rgba8())
651 },
652 Cow::Borrowed,
653 );
654 // NOTE: We do premultiplication on the main thread since if it would be
655 // expensive enough to do in the background we would just do it on
656 // the GPU. Could still use `rayon` to parallelize this work, if
657 // needed.
658 let premultiply_strategy = PremultiplyStrategy::determine(&rgba_cow);
659 let needs_gpu_premultiply = match premultiply_strategy {
660 PremultiplyStrategy::UseGpu => true,
661 PremultiplyStrategy::NotNeeded => false,
662 PremultiplyStrategy::UseCpu => {
663 // NOTE: to_mut will clone the image if it was Cow::Borrowed
664 premultiply_alpha(rgba_cow.to_mut());
665 false
666 },
667 };
668
669 Some((rgba_cow, needs_gpu_premultiply))
670 },
671 Graphic::Voxel(segment, trans, sample_strat) => keyed_jobs
672 .spawn(pool, cache_key, || {
673 let segment = Arc::clone(segment);
674 let (trans, sample_strat) = (*trans, *sample_strat);
675 move |_| {
676 // TODO: for now we always use CPU premultiplication for these, may want to
677 // re-evaluate this after zoomy worldgen branch is merged (and it is more clear
678 // when these jobs go to the background thread pool or not).
679
680 // Render voxel model at requested resolution
681 let mut image = renderer::draw_vox(&segment, dims, trans, sample_strat);
682 premultiply_alpha(&mut image);
683 image
684 }
685 })
686 .map(|(_, v)| (Cow::Owned(v), false)),
687 }
688}
689
690fn atlas_size(renderer: &Renderer) -> Vec2<u32> {
691 let max_texture_size = renderer.max_texture_size();
692
693 renderer
694 .resolution()
695 .map(|e| (e * GRAPHIC_CACHE_RELATIVE_SIZE).clamp(512, max_texture_size))
696}
697
698/// This creates a texture suitable for sampling from during the UI pass and
699/// rendering too during alpha premultiplication upload passes.
700fn create_image_texture(
701 renderer: &mut Renderer,
702 size: Vec2<u32>,
703 address_mode: Option<wgpu::AddressMode>,
704) -> (Arc<Texture>, UiTextureBindGroup) {
705 // TODO: Right now we have to manually clear images to workaround AMD DX bug,
706 // for this we use Queue::write_texture which needs this usage. I think this
707 // may be fixed in newer wgpu versions that auto-clear the texture.
708 let workaround_usage = wgpu::TextureUsages::COPY_DST;
709 let tex_info = wgpu::TextureDescriptor {
710 label: None,
711 size: wgpu::Extent3d {
712 width: size.x,
713 height: size.y,
714 depth_or_array_layers: 1,
715 },
716 mip_level_count: 1,
717 sample_count: 1,
718 dimension: wgpu::TextureDimension::D2,
719 format: wgpu::TextureFormat::Rgba8UnormSrgb,
720 usage: wgpu::TextureUsages::RENDER_ATTACHMENT // GPU premultiply
721 | wgpu::TextureUsages::COPY_DST // CPU premultiply
722 | wgpu::TextureUsages::TEXTURE_BINDING // using image in ui rendering
723 | workaround_usage,
724 view_formats: &[],
725 };
726 let view_info = wgpu::TextureViewDescriptor {
727 format: Some(tex_info.format),
728 dimension: Some(wgpu::TextureViewDimension::D2),
729 ..Default::default()
730 };
731 let address_mode = address_mode.unwrap_or(wgpu::AddressMode::ClampToEdge);
732 let sampler_info = wgpu::SamplerDescriptor {
733 address_mode_u: address_mode,
734 address_mode_v: address_mode,
735 mag_filter: wgpu::FilterMode::Linear,
736 min_filter: wgpu::FilterMode::Linear,
737 ..Default::default()
738 };
739 let tex = renderer.create_texture_raw(&tex_info, &view_info, &sampler_info);
740 let bind = renderer.ui_bind_texture(&tex);
741 (Arc::new(tex), bind)
742}
743
744fn create_atlas_texture(
745 renderer: &mut Renderer,
746) -> (SimpleAtlasAllocator, (Arc<Texture>, UiTextureBindGroup)) {
747 let size = atlas_size(renderer);
748 // Note: here we assume the max texture size is under i32::MAX.
749 let atlas = SimpleAtlasAllocator::new(size2(size.x as i32, size.y as i32));
750 let (tex, bind) = create_image_texture(renderer, size, None);
751 (atlas, (tex, bind))
752}
753
754fn aabr_from_alloc_rect(rect: guillotiere::Rectangle) -> Aabr<u16> {
755 let (min, max) = (rect.min, rect.max);
756 // Note: here we assume the max texture size (and thus the maximum size of the
757 // atlas) is under `u16::MAX`.
758 Aabr {
759 min: Vec2::new(min.x as u16, min.y as u16),
760 max: Vec2::new(max.x as u16, max.y as u16),
761 }
762}
763
764fn upload_image(
765 renderer: &mut Renderer,
766 target_texture: &Arc<Texture>,
767 upload_batch: &mut UiUploadBatchId,
768 image: &RgbaImage,
769 aabr: Aabr<u16>,
770 premultiply_on_gpu: bool,
771) {
772 // Check that this image and the target aabr are the same size (otherwise there
773 // is a bug in this module).
774 debug_assert_eq!(aabr.map(u32::from).size().into_tuple(), image.dimensions());
775 if premultiply_on_gpu {
776 *upload_batch =
777 renderer.ui_premultiply_upload(target_texture, *upload_batch, image, aabr.min);
778 } else {
779 let aabr = aabr.map(u32::from);
780 let offset = aabr.min.into_array();
781 let size = aabr.size().into_array();
782 // upload directly
783 renderer.update_texture(
784 target_texture,
785 offset,
786 size,
787 // NOTE: Rgba texture, so each pixel is 4 bytes, ergo this cannot fail.
788 // We make the cast parameters explicit for clarity.
789 bytemuck::cast_slice::<u8, [u8; 4]>(
790 &(&**image)[..size[0] as usize * size[1] as usize * 4],
791 ),
792 )
793 }
794}
795
796// This is used for border_color.is_some() images (ie the map image).
797fn create_image(
798 renderer: &mut Renderer,
799 image: &RgbaImage,
800 texture_parameters: TextureParameters,
801 premultiply_on_gpu: bool,
802) -> (Arc<Texture>, UiTextureBindGroup, UiUploadBatchId) {
803 let (tex, bind) = create_image_texture(
804 renderer,
805 texture_parameters.size.map(u32::from),
806 texture_parameters
807 .border_color
808 // TODO: either use the desktop only border color or just emulate this
809 //.map(|c| c.into_array().into()),
810 .map(|_| wgpu::AddressMode::ClampToBorder),
811 );
812 let mut upload_batch = UiUploadBatchId::default();
813 let aabr = Aabr {
814 min: Vec2::zero(),
815 max: texture_parameters.size,
816 };
817 upload_image(
818 renderer,
819 &tex,
820 &mut upload_batch,
821 image,
822 aabr,
823 premultiply_on_gpu,
824 );
825 (tex, bind, upload_batch)
826}
827
828// CPU-side alpha premultiplication implementation.
829
830pub struct PremultiplyLookupTable {
831 alpha: [u16; 256],
832 // This is for both colors that are always below the linear transform threshold (of the
833 // transform between linear/non-linear srgb) and colors that start above the threshold when
834 // transforming into linear srgb and then fall below it after being multiplied by alpha (before
835 // being transformed out of linear srgb).
836 color: [u16; 256],
837}
838
839impl Default for PremultiplyLookupTable {
840 fn default() -> Self {
841 #[rustfmt::skip]
842 fn accurate_to_linear(c: u8) -> f32 {
843 let c = c as f32 / 255.0;
844 // https://en.wikipedia.org/wiki/SRGB#Transformation
845 if c <= 0.04045 {
846 c / 12.92
847 } else {
848 // 0.055 ~= 14
849 ((c + 0.055) / 1.055).powf(2.4)
850 }
851 }
852
853 use core::array;
854 let alpha = array::from_fn(|alpha| {
855 // NOTE: u16::MAX + 1 here relies on the max alpha being short-circuited (and
856 // not using this table). We multiply by this factor since it is a
857 // power of 2, which means later demultiplying it will optimize to a
858 // bitshift.
859 (((alpha as f32 / 255.0).powf(1.0 / 2.4) * (u16::MAX as f32 + 1.0)) + 0.5) as u16
860 });
861 let color = array::from_fn(|color| {
862 (if color <= 10 {
863 // <= 10 means the transform is linear!
864 color as f32 / 255.0
865 } else {
866 // Here the transform into linear srgb isn't linear but the transform out of it is.
867 //
868 // This is transform into and out of linear srgb with the theoretical alpha
869 // multiplication factored out.
870 accurate_to_linear(color as u8) * 12.92
871 }
872 // take advantage of the precision offered by u16
873 * (1 << 13) as f32
874 // round to the nearest integer when the cast truncates
875 + 0.5) as u16
876 });
877 Self { alpha, color }
878 }
879}
880
881fn premultiply_alpha(image: &mut RgbaImage) {
882 lazy_static::lazy_static! {
883 static ref LOOKUP: PremultiplyLookupTable = Default::default();
884 }
885 let lookup = &*LOOKUP;
886 // TODO: Apparently it is possible for ImageBuffer raw vec to have more pixels
887 // than the dimensions of the actual image (I don't think we actually have
888 // this occuring but we should probably fix other spots that use the raw
889 // buffer). See:
890 // https://github.com/image-rs/image/blob/a1ce569afd476e881acafdf9e7a5bce294d0db9a/src/buffer.rs#L664
891 let dims = image.dimensions();
892 let image_buffer_len = dims.0 as usize * dims.1 as usize * 4;
893 let (arrays, end) = (&mut **image)[..image_buffer_len].as_chunks_mut::<{ 4 * 4 }>();
894 // Rgba8 has 4 bytes per pixel there should be no remainder when dividing by 4.
895 let (end, _) = end.as_chunks_mut::<4>();
896 end.iter_mut().for_each(|pixel| {
897 let alpha = pixel[3];
898 if alpha == 0 {
899 *pixel = [0; 4];
900 return;
901 } else if alpha == 255 {
902 return;
903 };
904
905 for color in &mut pixel[..3] {
906 let predicted = ((lookup.alpha[alpha as usize] as u32) * (*color as u32 + 14) + 32433)
907 / (u16::MAX as u32 + 1);
908 let multiplied_color = (if predicted < 9 + 14 {
909 (lookup.color[*color as usize] as u32 * alpha as u32 + 4096) >> 13
910 } else {
911 predicted - 14
912 }) as u8;
913 *color = multiplied_color;
914 }
915 });
916 arrays.iter_mut().for_each(|pixelx4| {
917 // Short-circuit for alpha == 0 or 255
918 // This adds ~7 us (worst case) for a 256x256 image.
919 // Best case is decreased to 20 us total time.
920 if pixelx4[3] == pixelx4[7] && pixelx4[3] == pixelx4[11] && pixelx4[3] == pixelx4[15] {
921 if pixelx4[3] == 0 {
922 *pixelx4 = [0; 16];
923 return;
924 } else if pixelx4[3] == u8::MAX {
925 return;
926 }
927 }
928
929 // Lookup transformed alpha values for each pixel first.
930 // Putting this here seems to make things slightly faster.
931 let factors = [
932 lookup.alpha[pixelx4[3] as usize],
933 lookup.alpha[pixelx4[7] as usize],
934 lookup.alpha[pixelx4[11] as usize],
935 lookup.alpha[pixelx4[15] as usize],
936 ];
937 for pixel_index in 0..4 {
938 let alpha_factor = factors[pixel_index];
939 let alpha = pixelx4[pixel_index * 4 + 3];
940 // Putting this code outside the loop makes things take ~25% less time.
941 let color_factors = [
942 lookup.color[pixelx4[pixel_index * 4 + 0] as usize] as u32 * alpha as u32 + 4096,
943 lookup.color[pixelx4[pixel_index * 4 + 1] as usize] as u32 * alpha as u32 + 4096,
944 lookup.color[pixelx4[pixel_index * 4 + 2] as usize] as u32 * alpha as u32 + 4096,
945 ];
946 for i in 0..3 {
947 let color = &mut pixelx4[pixel_index * 4 + i];
948 // Loosely based on transform to linear and back (above threshold) (this is
949 // where use of 14 comes from).
950 // `32433` selected via trial and error to reduce the number of mismatches.
951 // `/ (u16::MAX as u32 + 1)` transforms back to `u8` precision (we add 1 so it
952 // will be a division by a power of 2 which optimizes well).
953 let predicted =
954 ((alpha_factor as u32) * (*color as u32 + 14) + 32328) / (u16::MAX as u32 + 1);
955 let multiplied_color = (if predicted < 9 + 14 {
956 // Here we handle two cases:
957 // 1. When the transform starts and ends as linear.
958 // 2. When the color is over the linear threshold for the transform into linear
959 // space but below this threshold when transforming back out (due to being
960 // multiplied with a small alpha).
961 // (in both cases the result is linearly related to alpha and we can encode how
962 // it is related to the color in a lookup table)
963 // NOTE: 212 is the largest color value used here (when alpha isn't 0)
964 color_factors[i] >> 13
965 } else {
966 predicted - 14
967 }) as u8;
968 *color = multiplied_color;
969 }
970 }
971 });
972}
973
974/// Strategy for how alpha premultiplication will be applied to an image.
975enum PremultiplyStrategy {
976 UseCpu,
977 UseGpu,
978 // Image is fully opaque.
979 NotNeeded,
980}
981
982impl PremultiplyStrategy {
983 #[rustfmt::skip] // please don't format comment with 'ns/pixel' to a separate line from the value
984 fn determine(image: &RgbaImage) -> Self {
985 // TODO: Would be useful to re-time this after a wgpu update.
986 //
987 // Thresholds below are based on the timing measurements of the CPU based premultiplication
988 // vs ovehead of interacting with the GPU API to perform premultiplication on the GPU.
989 // These timings are quite circumstantial and could vary between machines, wgpu updates,
990 // and changes to the structure of the GPU based path.
991 //
992 // GPU path costs (For calculations I used `57.6 us` as a roughly reasonable estimate of
993 // total time here but that can vary lower and higher. Everything is a bit imprecise here
994 // so I won't list individual timings. The key takeaway is that this can be made more
995 // efficient by avoidiing the create/drop of a texture, texture view, and bind group for
996 // each image. Also, if we didn't need a separate render pass for each target image that
997 // would be helpful as well. Using compute passes and passing data in as a raw buffer may
998 // help with both of these but initial attempts with that ran into issues (e.g. when we get
999 // the ability to have non-srgb views of srgb textures that will be useful)):
1000 // * create/drop texture
1001 // * create/drop texture view
1002 // * create/drop bind group
1003 // * run render pass (NOTE: if many images are processed at once with the same target
1004 // texture this portion of the cost can be split between them)
1005 //
1006 // CPU path costs:
1007 // * clone image (0.17 ns/pixel (benchmark) - 0.73 ns/pixel (in voxygen))
1008 // * run premultiplication (0.305 ns/pixel (when shortcircuits are always hit) -
1009 // 3.81 ns/pixel (with random alpha))
1010 //
1011 // Shared costs include:
1012 // * write_texture
1013 // * (optional) check for fraction of shortcircuit blocks in image (0.223 ns/pixel)
1014 //
1015 // `ALWAYS_CPU_THRESHOLD` is roughly:
1016 // ("cost of GPU path" + "shortcircuit count cost") / "worst case cost of CPU path per pixel"
1017 //
1018 // `ALWAYS_GPU_THRESHOLD` is NOT: "cost of GPU path" / "best case cost of CPU path per pixel"
1019 // since the cost of checking for whether the CPU path is better at this quantity of pixels
1020 // becomes more than the on the amount of overhead we are willing to add to the worst case
1021 // scenario where we run the short-circuit count check and end up using the GPU path. The
1022 // currently selected value of 200x200 adds at most about ~20% of the cost of the GPU path.
1023 // (TODO: maybe we could have the check bail out early if the results aren't looking
1024 // favorable for the CPU path and/or sample a random subset of the pixels).
1025 //
1026 // `CHECKED_THRESHOLD` is roughly: "cost of GPU path / "best case cost of CPU path per pixel"
1027 const ALWAYS_CPU_THRESHOLD: usize = 120 * 120;
1028 const ALWAYS_GPU_THRESHOLD: usize = 200 * 200;
1029 const CHECKED_THRESHOLD: usize = 240 * 240;
1030
1031 let dims = image.dimensions();
1032 let pixel_count = dims.0 as usize * dims.1 as usize;
1033 if pixel_count <= ALWAYS_CPU_THRESHOLD {
1034 Self::UseCpu
1035 } else if pixel_count > ALWAYS_GPU_THRESHOLD {
1036 Self::UseGpu
1037 } else if let Some(fraction) = fraction_shortcircuit_blocks(image) {
1038 // This seems correct...?
1039 // TODO: I think we technically can exit the fraction checking early if we know the
1040 // total fraction value will be over: (threshold - ALWAYS_CPU_THRESHOLD) /
1041 // (CHECKED_THRESHOLD - ALWAYS_CPU_THRESHOLD).
1042 let threshold = fraction * CHECKED_THRESHOLD as f32
1043 + (1.0 - fraction) * ALWAYS_CPU_THRESHOLD as f32;
1044 if pixel_count as f32 <= threshold {
1045 Self::UseCpu
1046 } else {
1047 Self::UseGpu
1048 }
1049 } else {
1050 Self::NotNeeded
1051 }
1052 }
1053}
1054
1055/// Useful to estimates cost of premultiplying alpha in the provided image via
1056/// the CPU method.
1057///
1058/// Computes the fraction of 4 pixel chunks that are fully translucent or
1059/// opaque. Returns `None` if no premultiplication is needed (i.e. all alpha
1060/// values are 255).
1061fn fraction_shortcircuit_blocks(image: &RgbaImage) -> Option<f32> {
1062 let dims = image.dimensions();
1063 let pixel_count = dims.0 as usize * dims.1 as usize;
1064 let (arrays, end) = (&**image)[..pixel_count * 4].as_chunks::<{ 4 * 4 }>();
1065
1066 // Rgba8 has 4 bytes per pixel there should be no remainder when dividing by 4.
1067 let (end, _) = end.as_chunks::<4>();
1068 let end_is_opaque = end.iter().all(|pixel| pixel[3] == 255);
1069
1070 // 14.6 us for 256x256 image
1071 let num_chunks = arrays.len();
1072 let mut num_translucent = 0;
1073 let mut num_opaque = 0;
1074 arrays.iter().for_each(|pixelx4| {
1075 let v = u128::from_ne_bytes(*pixelx4);
1076 let alpha_mask = 0x000000FF_000000FF_000000FF_000000FF;
1077 let masked = v & alpha_mask;
1078 if masked == 0 {
1079 num_translucent += 1;
1080 } else if masked == alpha_mask {
1081 num_opaque += 1;
1082 }
1083 });
1084
1085 if num_chunks == num_opaque && num_translucent == 0 && end_is_opaque {
1086 None
1087 } else {
1088 Some((num_translucent as f32 + num_opaque as f32) / num_chunks as f32)
1089 }
1090}