-
-
Notifications
You must be signed in to change notification settings - Fork 77
/
Copy pathgpu.h
1407 lines (1209 loc) · 61.9 KB
/
gpu.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* This file is part of libplacebo.
*
* libplacebo is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* libplacebo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef LIBPLACEBO_GPU_H_
#define LIBPLACEBO_GPU_H_
#include <stddef.h>
#include <stdbool.h>
#include <stdint.h>
#include <libplacebo/common.h>
#include <libplacebo/cache.h>
#include <libplacebo/log.h>
PL_API_BEGIN
// These are not memory managed, and should represent compile-time constants
typedef const char *pl_debug_tag;
#define PL_DEBUG_TAG (__FILE__ ":" PL_TOSTRING(__LINE__))
// Type of a shader input descriptor.
enum pl_desc_type {
PL_DESC_INVALID = 0,
PL_DESC_SAMPLED_TEX, // C: pl_tex* GLSL: combined texture sampler
// (`pl_tex->params.sampleable` must be set)
PL_DESC_STORAGE_IMG, // C: pl_tex* GLSL: storage image
// (`pl_tex->params.storable` must be set)
PL_DESC_BUF_UNIFORM, // C: pl_buf* GLSL: uniform buffer
// (`pl_buf->params.uniform` must be set)
PL_DESC_BUF_STORAGE, // C: pl_buf* GLSL: storage buffer
// (`pl_buf->params.storable` must be set)
PL_DESC_BUF_TEXEL_UNIFORM,// C: pl_buf* GLSL: uniform samplerBuffer
// (`pl_buf->params.uniform` and `format` must be set)
PL_DESC_BUF_TEXEL_STORAGE,// C: pl_buf* GLSL: uniform imageBuffer
// (`pl_buf->params.uniform` and `format` must be set)
PL_DESC_TYPE_COUNT
};
// This file contains the definition of an API which is designed to abstract
// away from platform-specific APIs like the various OpenGL variants, Direct3D
// and Vulkan in a common way. It is a much more limited API than those APIs,
// since it tries targeting a very small common subset of features that is
// needed to implement libplacebo's rendering.
//
// NOTE: Most, but not all, parameter conditions (phrases such as "must" or
// "valid usage" are explicitly tested and result in error messages followed by
// graceful failure. Exceptions are noted where they exist.
// Structure which wraps metadata describing GLSL capabilities.
struct pl_glsl_version {
int version; // GLSL version (e.g. 450), for #version
bool gles; // GLSL ES semantics (ESSL)
bool vulkan; // GL_KHR_vulkan_glsl semantics
// Compute shader support and limits. If `compute` is false, then all
// of the remaining fields in this section are {0}.
bool compute;
size_t max_shmem_size; // maximum compute shader shared memory size
uint32_t max_group_threads; // maximum number of local threads per work group
uint32_t max_group_size[3]; // maximum work group size per dimension
// If nonzero, signals availability of shader subgroups. This guarantess
// availability of all of the following extensions:
// - GL_KHR_shader_subgroup_basic
// - GL_KHR_shader_subgroup_vote
// - GL_KHR_shader_subgroup_arithmetic
// - GL_KHR_shader_subgroup_ballot
// - GL_KHR_shader_subgroup_shuffle
uint32_t subgroup_size;
// Miscellaneous shader limits
int16_t min_gather_offset; // minimum `textureGatherOffset` offset
int16_t max_gather_offset; // maximum `textureGatherOffset` offset
};
// Backwards compatibility alias
#define pl_glsl_desc pl_glsl_version
// Structure defining the physical limits and capabilities of this GPU
// instance. If a limit is given as 0, that means that feature is unsupported.
struct pl_gpu_limits {
// --- pl_gpu
bool thread_safe; // `pl_gpu` calls are thread-safe
bool callbacks; // supports asynchronous GPU callbacks
// --- pl_buf
size_t max_buf_size; // maximum size of any buffer
size_t max_ubo_size; // maximum size of a `uniform` buffer
size_t max_ssbo_size; // maximum size of a `storable` buffer
size_t max_vbo_size; // maximum size of a `drawable` buffer
size_t max_mapped_size; // maximum size of a `host_mapped` buffer
uint64_t max_buffer_texels; // maximum number of texels in a texel buffer
bool host_cached; // if true, PL_BUF_MEM_HOST buffers are cached
size_t max_mapped_vram; // maximum (known) size of a `host_mapped`
// PL_BUF_MEM_DEVICE buffer, or 0 if this
// combination is not supported
// Required alignment for PL_HANDLE_HOST_PTR imports. This is provided
// merely as a hint to the user. If the host pointer being imported is
// misaligned, libplacebo will internally round (over-map) the region.
size_t align_host_ptr;
// --- pl_tex
uint32_t max_tex_1d_dim; // maximum width for a 1D texture
uint32_t max_tex_2d_dim; // maximum width/height for a 2D texture (required)
uint32_t max_tex_3d_dim; // maximum width/height/depth for a 3D texture
bool blittable_1d_3d; // supports blittable 1D/3D textures
bool buf_transfer; // supports `pl_tex_transfer_params.buf`
// These don't represent hard limits but indicate performance hints for
// optimal alignment. For best performance, the corresponding field
// should be aligned to a multiple of these. They will always be a power
// of two.
size_t align_tex_xfer_pitch; // optimal `pl_tex_transfer_params.row_pitch`
size_t align_tex_xfer_offset; // optimal `pl_tex_transfer_params.buf_offset`
// --- pl_pass
size_t max_variable_comps; // maximum components passed in variables
size_t max_constants; // maximum `pl_pass_params.num_constants`
bool array_size_constants; // push constants can be used to size arrays
size_t max_pushc_size; // maximum `push_constants_size`
size_t align_vertex_stride; // alignment of `pl_pass_params.vertex_stride`
uint32_t max_dispatch[3]; // maximum dispatch size per dimension
// Note: At least one of `max_variable_comps` or `max_ubo_size` is
// guaranteed to be nonzero.
// As a performance hint, the GPU may signal the number of command queues
// it has for fragment and compute shaders, respectively. Users may use
// this information to decide the appropriate type of shader to dispatch.
uint32_t fragment_queues;
uint32_t compute_queues;
};
// Backwards compatibility aliases
#define max_xfer_size max_buf_size
#define align_tex_xfer_stride align_tex_xfer_pitch
// Some `pl_gpu` operations allow sharing GPU resources with external APIs -
// examples include interop with other graphics APIs such as CUDA, and also
// various hardware decoding APIs. This defines the mechanism underpinning the
// communication of such an interoperation.
typedef uint64_t pl_handle_caps;
enum pl_handle_type {
PL_HANDLE_FD = (1 << 0), // `int fd` for POSIX-style APIs
PL_HANDLE_WIN32 = (1 << 1), // `HANDLE` for win32 API
PL_HANDLE_WIN32_KMT = (1 << 2), // `HANDLE` for pre-Windows-8 win32 API
PL_HANDLE_DMA_BUF = (1 << 3), // 'int fd' for a dma_buf fd
PL_HANDLE_HOST_PTR = (1 << 4), // `void *` for a host-allocated pointer
PL_HANDLE_MTL_TEX = (1 << 5), // `MTLTexture*` for Apple platforms
PL_HANDLE_IOSURFACE = (1 << 6), // `IOSurfaceRef` for Apple platforms
};
struct pl_gpu_handle_caps {
pl_handle_caps tex; // supported handles for `pl_tex` + `pl_shared_mem`
pl_handle_caps buf; // supported handles for `pl_buf` + `pl_shared_mem`
pl_handle_caps sync; // supported handles for semaphores
};
// Wrapper for the handle used to communicate a shared resource externally.
// This handle is owned by the `pl_gpu` - if a user wishes to use it in a way
// that takes over ownership (e.g. importing into some APIs), they must clone
// the handle before doing so (e.g. using `dup` for fds). It is important to
// read the external API documentation _very_ carefully as different handle
// types may be managed in different ways. (eg: CUDA takes ownership of an fd,
// but does not take ownership of a win32 handle).
union pl_handle {
int fd; // PL_HANDLE_FD / PL_HANDLE_DMA_BUF
void *handle; // PL_HANDLE_WIN32 / PL_HANDLE_WIN32_KMT / PL_HANDLE_MTL_TEX / PL_HANDLE_IOSURFACE
void *ptr; // PL_HANDLE_HOST_PTR
};
// Structure encapsulating memory that is shared between libplacebo and the
// user. This memory can be imported into external APIs using the handle.
//
// If the object a `pl_shared_mem` belongs to is destroyed (e.g. via
// `pl_buf_destroy`), the handle becomes undefined, as do the contents of the
// memory it points to, as well as any external API objects imported from it.
struct pl_shared_mem {
union pl_handle handle;
size_t size; // the total size of the memory referenced by this handle
size_t offset; // the offset of the object within the referenced memory
// Note: `size` is optional for some APIs and handle types, in particular
// when importing DMABUFs or D3D11 textures.
// For PL_HANDLE_DMA_BUF, this specifies the DRM format modifier that
// describes this resource. Note that when importing `pl_buf`, this must
// be DRM_FORMAT_MOD_LINEAR. For importing `pl_tex`, it can be any
// format modifier supported by the implementation.
uint64_t drm_format_mod;
// When importing a `pl_tex` of type PL_HANDLE_DMA_BUF, this can be used to
// set the image stride (AKA pitch) in memory. If left as 0, defaults to
// the image width/height.
size_t stride_w;
size_t stride_h;
// When importing a `pl_tex` of type PL_HANDLE_MTL_TEX, this determines
// which plane is imported (0 - 2).
unsigned plane;
};
// Structure grouping PCI bus address fields for GPU devices
struct pl_gpu_pci_address {
uint32_t domain;
uint32_t bus;
uint32_t device;
uint32_t function;
};
typedef const struct pl_fmt_t *pl_fmt;
// Abstract device context which wraps an underlying graphics context and can
// be used to dispatch rendering commands.
//
// Thread-safety: Depends on `pl_gpu_limits.thread_safe`
typedef const struct pl_gpu_t {
pl_log log;
struct pl_glsl_version glsl; // GLSL features supported by this GPU
struct pl_gpu_limits limits; // physical device limits and capabilities
// Fields relevant to external API interop. If the underlying device does
// not support interop with other APIs, these will all be {0}.
struct pl_gpu_handle_caps export_caps; // supported handles for exporting
struct pl_gpu_handle_caps import_caps; // supported handles for importing
uint8_t uuid[16]; // underlying device UUID
// Supported texture formats, in preference order. (If there are multiple
// similar formats, the "better" ones come first)
pl_fmt *formats;
int num_formats;
// PCI Bus address of the underlying device, to help with interop.
// This will only be filled in if interop is supported.
struct pl_gpu_pci_address pci;
} *pl_gpu;
// Attach a pl_cache object to this GPU instance. This cache will be
// used to cache all compiled shaders, as well as several other shader objects
// (e.g. cached 3DLUTs). Calling this with `cache = NULL` disables the cache.
//
// Note: Calling this after shaders have already been compiled will not
// retroactively add those shaders to the cache, so it's recommended to set
// this early, before creating any passes.
PL_API void pl_gpu_set_cache(pl_gpu gpu, pl_cache cache);
enum pl_fmt_type {
PL_FMT_UNKNOWN = 0, // also used for inconsistent multi-component formats
PL_FMT_UNORM, // unsigned, normalized integer format (sampled as float)
PL_FMT_SNORM, // signed, normalized integer format (sampled as float)
PL_FMT_UINT, // unsigned integer format (sampled as integer)
PL_FMT_SINT, // signed integer format (sampled as integer)
PL_FMT_FLOAT, // (signed) float formats, any bit size
PL_FMT_TYPE_COUNT,
};
enum pl_fmt_caps {
PL_FMT_CAP_SAMPLEABLE = 1 << 0, // may be sampled from (PL_DESC_SAMPLED_TEX)
PL_FMT_CAP_STORABLE = 1 << 1, // may be used as storage image (PL_DESC_STORAGE_IMG)
PL_FMT_CAP_LINEAR = 1 << 2, // may be linearly samplied from (PL_TEX_SAMPLE_LINEAR)
PL_FMT_CAP_RENDERABLE = 1 << 3, // may be rendered to (pl_pass_params.target_fmt)
PL_FMT_CAP_BLENDABLE = 1 << 4, // may be blended to (pl_pass_params.enable_blend)
PL_FMT_CAP_BLITTABLE = 1 << 5, // may be blitted from/to (pl_tex_blit)
PL_FMT_CAP_VERTEX = 1 << 6, // may be used as a vertex attribute
PL_FMT_CAP_TEXEL_UNIFORM = 1 << 7, // may be used as a texel uniform buffer
PL_FMT_CAP_TEXEL_STORAGE = 1 << 8, // may be used as a texel storage buffer
PL_FMT_CAP_HOST_READABLE = 1 << 9, // may be used with `host_readable` textures
PL_FMT_CAP_READWRITE = 1 << 10, // may be used with PL_DESC_ACCESS_READWRITE
// Notes:
// - PL_FMT_CAP_LINEAR also implies PL_FMT_CAP_SAMPLEABLE
// - PL_FMT_CAP_STORABLE also implies `pl_gpu.glsl.compute`
// - PL_FMT_CAP_BLENDABLE implies PL_FMT_CAP_RENDERABLE
// - PL_FMT_CAP_VERTEX implies that the format is non-opaque
// - PL_FMT_CAP_HOST_READABLE implies that the format is non-opaque
};
struct pl_fmt_plane {
// Underlying format of this particular sub-plane. This describes the
// components, texel size and host representation for the purpose of
// e.g. transfers, blits, and sampling.
pl_fmt format;
// X/Y subsampling shift factor for this plane.
uint8_t shift_x, shift_y;
};
// Structure describing a texel/vertex format.
struct pl_fmt_t {
const char *name; // symbolic name for this format (e.g. rgba32f)
uint64_t signature; // unique but stable signature (for pass reusability)
enum pl_fmt_type type; // the format's data type and interpretation
enum pl_fmt_caps caps; // the features supported by this format
int num_components; // number of components for this format
int component_depth[4]; // meaningful bits per component, texture precision
size_t internal_size; // internal texel size (for blit compatibility)
// For planar formats, this provides a description of each sub-plane.
//
// Note on planar formats: Planar formats are always opaque and typically
// support only a limit subset of capabilities (or none at all). Access
// should be done via sub-planes. (See `pl_tex.planes`)
struct pl_fmt_plane planes[4];
int num_planes; // or 0 for non-planar textures
// This controls the relationship between the data as seen by the host and
// the way it's interpreted by the texture. The host representation is
// always tightly packed (no padding bits in between each component).
//
// This representation assumes little endian ordering, i.e. components
// being ordered from LSB to MSB in memory. Note that for oddly packed
// formats like rgb10a2 or rgb565, this is inconsistent with the naming.
// (That is to say, rgb565 has sample order {2, 1, 0} under this convention
// - because rgb565 treats the R channel as the *most* significant bits)
//
// If `opaque` is true, then there's no meaningful correspondence between
// the two, and all of the remaining fields in this section are unset.
//
// If `emulated` is true, then this format doesn't actually exist on the
// GPU as an uploadable texture format - and any apparent support is being
// emulated (typically using compute shaders in the upload path).
bool opaque;
bool emulated;
size_t texel_size; // total size in bytes per texel
size_t texel_align; // texel alignment requirements (bytes)
int host_bits[4]; // number of meaningful bits in host memory
int sample_order[4]; // sampled index for each component, e.g.
// {2, 1, 0, 3} for BGRA textures
// For sampleable formats, this bool indicates whether or not the format
// is compatible with `textureGather()`
bool gatherable;
// If usable as a vertex or texel buffer format, this gives the GLSL type
// corresponding to the data. (e.g. vec4)
const char *glsl_type;
// If usable as a storage image or texel storage buffer
// (PL_FMT_CAP_STORABLE / PL_FMT_CAP_TEXEL_STORAGE), this gives the GLSL
// texel format corresponding to the format (e.g. rgba16ui), if any. This
// field may be NULL, in which case the format modifier may be left
// unspecified.
const char *glsl_format;
// If available, this gives the fourcc associated with the host
// representation. In particular, this is intended for use with
// PL_HANDLE_DMA_BUF, where this field will match the DRM format from
// <drm_fourcc.h>. May be 0, for formats without matching DRM fourcc.
uint32_t fourcc;
// If `fourcc` is set, this contains the list of supported drm format
// modifiers for this format.
const uint64_t *modifiers;
int num_modifiers;
};
// Returns whether or not a pl_fmt's components are ordered sequentially
// in memory in the order RGBA.
PL_API bool pl_fmt_is_ordered(pl_fmt fmt);
// Returns whether or not a pl_fmt is sampled as a float (e.g. UNORM)
PL_API bool pl_fmt_is_float(pl_fmt fmt);
// Returns whether or not a pl_fmt supports a given DRM modifier.
PL_API bool pl_fmt_has_modifier(pl_fmt fmt, uint64_t modifier);
// Helper function to find a format with a given number of components and
// minimum effective precision per component. If `host_bits` is set, then the
// format will always be non-opaque, unpadded, ordered and have exactly this
// bit depth for each component. Finally, all `caps` must be supported.
PL_API pl_fmt pl_find_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components,
int min_depth, int host_bits, enum pl_fmt_caps caps);
// Finds a vertex format for a given configuration. The resulting vertex will
// have a component depth equivalent to the sizeof() the equivalent host type.
// (e.g. PL_FMT_FLOAT will always have sizeof(float))
PL_API pl_fmt pl_find_vertex_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components);
// Find a format based on its name.
PL_API pl_fmt pl_find_named_fmt(pl_gpu gpu, const char *name);
// Find a format based on its fourcc.
PL_API pl_fmt pl_find_fourcc(pl_gpu gpu, uint32_t fourcc);
// A generic 'timer query' object. These can be used to measure an
// approximation of the GPU execution time of a given operation. Due to the
// highly asynchronous nature of GPUs, the actual results of any individual
// timer query may be delayed by quite a bit. As such, users should avoid
// trying to pair any particular GPU command with any particular timer query
// result, and only reuse `pl_timer` objects with identical operations. The
// results of timer queries are guaranteed to be in-order, but individual
// queries may be dropped, and some operations might not record timer results
// at all. (For example, if the underlying hardware does not support timer
// queries for a given operation type)
//
// Thread-safety: Unsafe
typedef struct pl_timer_t *pl_timer;
// Creates a new timer object. This may return NULL, for example if the
// implementation does not support timers, but since passing NULL to
// `pl_timer_destroy` and `pl_timer_query` is safe, users generally need not
// concern themselves with handling this.
PL_API pl_timer pl_timer_create(pl_gpu gpu);
PL_API void pl_timer_destroy(pl_gpu gpu, pl_timer *);
// Queries any results that have been measured since the last execution of
// `pl_timer_query`. There may be more than one result, in which case the user
// should simply call the function again to get the subsequent values. This
// function returns a value of 0 in the event that there are no more
// unprocessed results.
//
// The results are reported in nanoseconds, but the actual precision of the
// timestamp queries may be significantly lower.
//
// Note: Results do not queue up indefinitely. Generally, the implementation
// will only keep track of a small, fixed number of results internally. Make
// sure to include this function as part of your main rendering loop to process
// all of its results, or older results will be overwritten by newer ones.
PL_API uint64_t pl_timer_query(pl_gpu gpu, pl_timer);
enum pl_buf_mem_type {
PL_BUF_MEM_AUTO = 0, // use whatever seems most appropriate
PL_BUF_MEM_HOST, // try allocating from host memory (RAM)
PL_BUF_MEM_DEVICE, // try allocating from device memory (VRAM)
PL_BUF_MEM_TYPE_COUNT,
// Note: This distinction only matters for discrete GPUs
};
// Structure describing a buffer.
struct pl_buf_params {
size_t size; // size in bytes (must be <= `pl_gpu_limits.max_buf_size`)
bool host_writable; // contents may be updated via pl_buf_write()
bool host_readable; // contents may be read back via pl_buf_read()
bool host_mapped; // create a persistent, RW mapping (pl_buf.data)
// May be used as PL_DESC_BUF_UNIFORM or PL_DESC_BUF_TEXEL_UNIFORM.
// Requires `size <= pl_gpu_limits.max_ubo_size`
bool uniform;
// May be used as PL_DESC_BUF_STORAGE or PL_DESC_BUF_TEXEL_STORAGE.
// Requires `size <= pl_gpu_limits.max_ssbo_size`
bool storable;
// May be used as the source of vertex data for `pl_pass_run`.
bool drawable;
// Provide a hint for the memory type you want to use when allocating
// this buffer's memory.
//
// Note: Restrictions may apply depending on the usage flags. In
// particular, allocating buffers with `uniform` or `storable` enabled from
// non-device memory will almost surely fail.
enum pl_buf_mem_type memory_type;
// Setting this to a format with the `PL_FMT_CAP_TEXEL_*` capability allows
// this buffer to be used as a `PL_DESC_BUF_TEXEL_*`, when `uniform` and
// `storage` are respectively also enabled.
pl_fmt format;
// At most one of `export_handle` and `import_handle` can be set for a
// buffer.
// Setting this indicates that the memory backing this buffer should be
// shared with external APIs, If so, this must be exactly *one* of
// `pl_gpu.export_caps.buf`.
enum pl_handle_type export_handle;
// Setting this indicates that the memory backing this buffer will be
// imported from an external API. If so, this must be exactly *one* of
// `pl_gpu.import_caps.buf`.
enum pl_handle_type import_handle;
// If the shared memory is being imported, the import handle must be
// specified here. Otherwise, this is ignored.
struct pl_shared_mem shared_mem;
// If non-NULL, the buffer will be created with these contents. Otherwise,
// the initial data is undefined. Using this does *not* require setting
// host_writable.
const void *initial_data;
// Arbitrary user data. libplacebo does not use this at all.
void *user_data;
// Arbitrary identifying tag. Used only for debugging purposes.
pl_debug_tag debug_tag;
};
#define pl_buf_params(...) (&(struct pl_buf_params) { \
.debug_tag = PL_DEBUG_TAG, \
__VA_ARGS__ \
})
// A generic buffer, which can be used for multiple purposes (texture transfer,
// storage buffer, uniform buffer, etc.)
//
// Note on efficiency: A pl_buf does not necessarily represent a true "buffer"
// object on the underlying graphics API. It may also refer to a sub-slice of
// a larger buffer, depending on the implementation details of the GPU. The
// bottom line is that users do not need to worry about the efficiency of using
// many small pl_buf objects. Having many small pl_bufs, even lots of few-byte
// vertex buffers, is designed to be completely fine.
//
// Thread-safety: Unsafe
typedef const struct pl_buf_t {
struct pl_buf_params params;
uint8_t *data; // for persistently mapped buffers, points to the first byte
// If `params.handle_type` is set, this structure references the shared
// memory backing this buffer, via the requested handle type.
//
// While this buffer is not in an "exported" state, the contents of the
// memory are undefined. (See: `pl_buf_export`)
struct pl_shared_mem shared_mem;
} *pl_buf;
// Create a buffer. The type of buffer depends on the parameters. The buffer
// parameters must adhere to the restrictions imposed by the pl_gpu_limits.
// Returns NULL on failure.
//
// For buffers with shared memory, the buffer is considered to be in an
// "exported" state by default, and may be used directly by the external API
// after being created (until the first libplacebo operation on the buffer).
PL_API pl_buf pl_buf_create(pl_gpu gpu, const struct pl_buf_params *params);
PL_API void pl_buf_destroy(pl_gpu gpu, pl_buf *buf);
// This behaves like `pl_buf_create`, but if the buffer already exists and has
// incompatible parameters, it will get destroyed first. A buffer is considered
// "compatible" if it has the same buffer type and texel format, a size greater
// than or equal to the requested size, and it has a superset of the features
// the user requested. After this operation, the contents of the buffer are
// undefined.
//
// Note: Due to its unpredictability, it's not allowed to use this with
// `params->initial_data` being set. Similarly, it's not allowed on a buffer
// with `params->export_handle`. since this may invalidate the corresponding
// external API's handle. Conversely, it *is* allowed on a buffer with
// `params->host_mapped`, and the corresponding `buf->data` pointer *may*
// change as a result of doing so.
//
// Note: If the `user_data` alone changes, this does not trigger a buffer
// recreation. In theory, this can be used to detect when the buffer ended
// up being recreated.
PL_API bool pl_buf_recreate(pl_gpu gpu, pl_buf *buf, const struct pl_buf_params *params);
// Update the contents of a buffer, starting at a given offset (must be a
// multiple of 4) and up to a given size, with the contents of *data.
//
// This function will block until the buffer is no longer in use. Use
// `pl_buf_poll` to perform non-blocking queries of buffer availability.
//
// Note: This function can incur synchronization overhead, so it shouldn't be
// used in tight loops. If you do need to loop (e.g. to perform a strided
// write), consider using host-mapped buffers, or fixing the memory in RAM,
// before calling this function.
PL_API void pl_buf_write(pl_gpu gpu, pl_buf buf, size_t buf_offset,
const void *data, size_t size);
// Read back the contents of a buffer, starting at a given offset, storing the
// data into *dest. Returns whether successful.
//
// This function will block until the buffer is no longer in use. Use
// `pl_buf_poll` to perform non-blocking queries of buffer availability.
PL_API bool pl_buf_read(pl_gpu gpu, pl_buf buf, size_t buf_offset,
void *dest, size_t size);
// Copy `size` bytes from one buffer to another, reading from and writing to
// the respective offsets.
PL_API void pl_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset,
pl_buf src, size_t src_offset, size_t size);
// Initiates a buffer export operation, allowing a buffer to be accessed by an
// external API. This is only valid for buffers with `params.handle_type`.
// Calling this twice in a row is a harmless no-op. Returns whether successful.
//
// There is no corresponding "buffer import" operation, the next libplacebo
// operation that touches the buffer (e.g. pl_tex_upload, but also pl_buf_write
// and pl_buf_read) will implicitly import the buffer back to libplacebo. Users
// must ensure that all pending operations made by the external API are fully
// completed before using it in libplacebo again. (Otherwise, the behaviour
// is undefined)
//
// Please note that this function returning does not mean the memory is
// immediately available as such. In general, it will mark a buffer as "in use"
// in the same way any other buffer operation would, and it is the user's
// responsibility to wait until `pl_buf_poll` returns false before accessing
// the memory from the external API.
//
// In terms of the access performed by this operation, it is not considered a
// "read" or "write" and therefore does not technically conflict with reads or
// writes to the buffer performed by the host (via mapped memory - any use of
// `pl_buf_read` or `pl_buf_write` would defeat the purpose of the export).
// However, restrictions made by the external API may apply that prevent this.
//
// The recommended use pattern is something like this:
//
// while (loop) {
// pl_buf buf = get_free_buffer(); // or block on pl_buf_poll
// // write to the buffer using the external API
// pl_tex_upload(gpu, /* ... buf ... */); // implicitly imports
// pl_buf_export(gpu, buf);
// }
//
// i.e. perform an external API operation, then use and immediately export the
// buffer in libplacebo, and finally wait until `pl_buf_poll` is false before
// re-using it in the external API. (Or get a new buffer in the meantime)
PL_API bool pl_buf_export(pl_gpu gpu, pl_buf buf);
// Returns whether or not a buffer is currently "in use". This can either be
// because of a pending read operation, a pending write operation or a pending
// buffer export operation. Any access to the buffer by external APIs or via
// the host pointer (for host-mapped buffers) is forbidden while a buffer is
// "in use". The only exception to this rule is multiple reads, for example
// reading from a buffer with `pl_tex_upload` while simultaneously reading from
// it using mapped memory.
//
// The `timeout`, specified in nanoseconds, indicates how long to block for
// before returning. If set to 0, this function will never block, and only
// returns the current status of the buffer. The actual precision of the
// timeout may be significantly longer than one nanosecond, and has no upper
// bound. This function does not provide hard latency guarantees. This function
// may also return at any time, even if the buffer is still in use. If the user
// wishes to block until the buffer is definitely no longer in use, the
// recommended usage is:
//
// while (pl_buf_poll(gpu, buf, UINT64_MAX))
// ; // do nothing
//
// Note: libplacebo operations on buffers are always internally synchronized,
// so this is only needed for host-mapped or externally exported buffers.
// However, it may be used to do non-blocking queries before calling blocking
// functions such as `pl_buf_read`.
//
// Note: If `pl_gpu_limits.thread_safe` is set, this function is implicitly
// synchronized, meaning it can safely be called on a `pl_buf` that is in use
// by another thread.
PL_API bool pl_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t timeout);
enum pl_tex_sample_mode {
PL_TEX_SAMPLE_NEAREST, // nearest neighbour sampling
PL_TEX_SAMPLE_LINEAR, // linear filtering, requires PL_FMT_CAP_LINEAR
PL_TEX_SAMPLE_MODE_COUNT,
};
enum pl_tex_address_mode {
PL_TEX_ADDRESS_CLAMP, // clamp the nearest edge texel
PL_TEX_ADDRESS_REPEAT, // repeat (tile) the texture
PL_TEX_ADDRESS_MIRROR, // repeat (mirror) the texture
PL_TEX_ADDRESS_MODE_COUNT,
};
// Structure describing a texture.
struct pl_tex_params {
int w, h, d; // physical dimension; unused dimensions must be 0
pl_fmt format;
// The following bools describe what operations can be performed. The
// corresponding pl_fmt capability must be set for every enabled
// operation type.
//
// Note: For planar formats, it is also possible to set capabilities only
// supported by sub-planes. In this case, the corresponding functionality
// will be available for the sub-plane, but not the planar texture itself.
bool sampleable; // usable as a PL_DESC_SAMPLED_TEX
bool renderable; // usable as a render target (pl_pass_run)
// (must only be used with 2D textures)
bool storable; // usable as a storage image (PL_DESC_IMG_*)
bool blit_src; // usable as a blit source
bool blit_dst; // usable as a blit destination
bool host_writable; // may be updated with pl_tex_upload()
bool host_readable; // may be fetched with pl_tex_download()
// Note: For `blit_src`, `blit_dst`, the texture must either be
// 2-dimensional or `pl_gpu_limits.blittable_1d_3d` must be set.
// At most one of `export_handle` and `import_handle` can be set for a
// texture.
// Setting this indicates that the memory backing this texture should be
// shared with external APIs, If so, this must be exactly *one* of
// `pl_gpu.export_caps.tex`.
enum pl_handle_type export_handle;
// Setting this indicates that the memory backing this texture will be
// imported from an external API. If so, this must be exactly *one* of
// `pl_gpu.import_caps.tex`. Mutually exclusive with `initial_data`.
enum pl_handle_type import_handle;
// If the shared memory is being imported, the import handle must be
// specified here. Otherwise, this is ignored.
struct pl_shared_mem shared_mem;
// If non-NULL, the texture will be created with these contents (tightly
// packed). Using this does *not* require setting host_writable. Otherwise,
// the initial data is undefined. Mutually exclusive with `import_handle`.
const void *initial_data;
// Arbitrary user data. libplacebo does not use this at all.
void *user_data;
// Arbitrary identifying tag. Used only for debugging purposes.
pl_debug_tag debug_tag;
};
#define pl_tex_params(...) (&(struct pl_tex_params) { \
.debug_tag = PL_DEBUG_TAG, \
__VA_ARGS__ \
})
static inline int pl_tex_params_dimension(const struct pl_tex_params params)
{
return params.d ? 3 : params.h ? 2 : 1;
}
enum pl_sampler_type {
PL_SAMPLER_NORMAL, // gsampler2D, gsampler3D etc.
PL_SAMPLER_RECT, // gsampler2DRect
PL_SAMPLER_EXTERNAL, // gsamplerExternalOES
PL_SAMPLER_TYPE_COUNT,
};
// Conflates the following typical GPU API concepts:
// - texture itself
// - sampler state
// - staging buffers for texture upload
// - framebuffer objects
// - wrappers for swapchain framebuffers
// - synchronization needed for upload/rendering/etc.
//
// Essentially a pl_tex can be anything ranging from a normal texture, a wrapped
// external/real framebuffer, a framebuffer object + texture pair, a mapped
// texture (via pl_hwdec), or other sorts of things that can be sampled from
// and/or rendered to.
//
// Thread-safety: Unsafe
typedef const struct pl_tex_t *pl_tex;
struct pl_tex_t {
struct pl_tex_params params;
// If `params.format` is a planar format, this contains `pl_tex` handles
// encapsulating individual texture planes. Conversely, if this is a
// sub-plane of a planar texture, `parent` points to the planar texture.
//
// Note: Calling `pl_tex_destroy` on sub-planes is undefined behavior.
pl_tex planes[4];
pl_tex parent;
// If `params.export_handle` is set, this structure references the shared
// memory backing this buffer, via the requested handle type.
//
// While this texture is not in an "exported" state, the contents of the
// memory are undefined. (See: `pl_tex_export`)
//
// Note: Due to vulkan driver limitations, `shared_mem.drm_format_mod` will
// currently always be set to DRM_FORMAT_MOD_INVALID. No guarantee can be
// made about the cross-driver compatibility of textures exported this way.
struct pl_shared_mem shared_mem;
// If `params.sampleable` is true, this indicates the correct sampler type
// to use when sampling from this texture.
enum pl_sampler_type sampler_type;
};
// Create a texture (with undefined contents). Returns NULL on failure. This is
// assumed to be an expensive/rare operation, and may need to perform memory
// allocation or framebuffer creation.
PL_API pl_tex pl_tex_create(pl_gpu gpu, const struct pl_tex_params *params);
PL_API void pl_tex_destroy(pl_gpu gpu, pl_tex *tex);
// This works like `pl_tex_create`, but if the texture already exists and has
// incompatible texture parameters, it will get destroyed first. A texture is
// considered "compatible" if it has the same texture format and sample/address
// mode and it supports a superset of the features the user requested.
//
// Even if the texture is not recreated, calling this function will still
// invalidate the contents of the texture. (Note: Because of this,
// `initial_data` may not be used with `pl_tex_recreate`. Doing so is an error)
//
// Note: If the `user_data` alone changes, this does not trigger a texture
// recreation. In theory, this can be used to detect when the texture ended
// up being recreated.
PL_API bool pl_tex_recreate(pl_gpu gpu, pl_tex *tex, const struct pl_tex_params *params);
// Invalidates the contents of a texture. After this, the contents are fully
// undefined.
PL_API void pl_tex_invalidate(pl_gpu gpu, pl_tex tex);
union pl_clear_color {
float f[4];
int32_t i[4];
uint32_t u[4];
};
// Clear the dst texture with the given color (rgba). This is functionally
// identical to a blit operation, which means `dst->params.blit_dst` must be
// set.
PL_API void pl_tex_clear_ex(pl_gpu gpu, pl_tex dst, const union pl_clear_color color);
// Wrapper for `pl_tex_clear_ex` which only works for floating point textures.
PL_API void pl_tex_clear(pl_gpu gpu, pl_tex dst, const float color[4]);
struct pl_tex_blit_params {
// The texture to blit from. Must have `params.blit_src` enabled.
pl_tex src;
// The texture to blit to. Must have `params.blit_dst` enabled, and a
// format that is loosely compatible with `src`. This essentially means
// that they must have the same `internal_size`. Additionally, UINT
// textures can only be blitted to other UINT textures, and SINT textures
// can only be blitted to other SINT textures.
pl_tex dst;
// The region of the source texture to blit. Must be within the texture
// bounds of `src`. May be flipped. (Optional)
pl_rect3d src_rc;
// The region of the destination texture to blit into. Must be within the
// texture bounds of `dst`. May be flipped. Areas outside of `dst_rc` in
// `dst` are preserved. (Optional)
pl_rect3d dst_rc;
// If `src_rc` and `dst_rc` have different sizes, the texture will be
// scaled using the given texture sampling mode.
enum pl_tex_sample_mode sample_mode;
};
#define pl_tex_blit_params(...) (&(struct pl_tex_blit_params) { __VA_ARGS__ })
// Copy a sub-rectangle from one texture to another.
PL_API void pl_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params);
// Structure describing a texture transfer operation.
struct pl_tex_transfer_params {
// Texture to transfer to/from. Depending on the type of the operation,
// this must have params.host_writable (uploads) or params.host_readable
// (downloads) set, respectively.
pl_tex tex;
// Note: Superfluous parameters are ignored, i.e. for a 1D texture, the y
// and z fields of `rc`, as well as the corresponding pitches, are ignored.
// In all other cases, the pitch must be large enough to contain the
// corresponding dimension of `rc`, and the `rc` must be normalized and
// fully contained within the image dimensions. Missing fields in the `rc`
// are inferred from the image size. If unset, the pitch is inferred
// from `rc` (that is, it's assumed that the data is tightly packed in the
// buffer). Otherwise, `row_pitch` *must* be a multiple of
// `tex->params.format->texel_align`, and `depth_pitch` must be a multiple
// of `row_pitch`.
pl_rect3d rc; // region of the texture to transfer
size_t row_pitch; // the number of bytes separating image rows
size_t depth_pitch; // the number of bytes separating image planes
// An optional timer to report the approximate duration of the texture
// transfer to. Note that this is only an approximation, since the actual
// texture transfer may happen entirely in the background (in particular,
// for implementations with asynchronous transfer capabilities). It's also
// not guaranteed that all GPUs support this.
pl_timer timer;
// An optional callback to fire after the operation completes. If this is
// specified, then the operation is performed asynchronously. Note that
// transfers to/from buffers are always asynchronous, even without, this
// field, so it's more useful for `ptr` transfers. (Though it can still be
// helpful to avoid having to manually poll buffers all the time)
//
// When this is *not* specified, uploads from `ptr` are still asynchronous
// but require a host memcpy, while downloads from `ptr` are blocking. As
// such, it's recommended to always try using asynchronous texture
// transfers wherever possible.
//
// Note: Requires `pl_gpu_limits.callbacks`
//
// Note: Callbacks are implicitly synchronized, meaning that callbacks are
// guaranteed to never execute concurrently with other callbacks. However,
// they may execute from any thread that the `pl_gpu` is used on.
void (*callback)(void *priv);
void *priv; // arbitrary user data
// For the data source/target of a transfer operation, there are two valid
// options:
//
// 1. Transferring to/from a buffer: (requires `pl_gpu_limits.buf_transfer`)
pl_buf buf; // buffer to use
size_t buf_offset; // offset of data within buffer, should be a
// multiple of `tex->params.format->texel_size`
// 2. Transferring to/from host memory directly:
void *ptr; // address of data
bool no_import; // always use memcpy, bypassing host ptr import
// Note: The contents of the memory region / buffer must exactly match the
// texture format; i.e. there is no explicit conversion between formats.
};
#define pl_tex_transfer_params(...) (&(struct pl_tex_transfer_params) { __VA_ARGS__ })
// Upload data to a texture. Returns whether successful.
PL_API bool pl_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params);
// Download data from a texture. Returns whether successful.
PL_API bool pl_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params);
// Returns whether or not a texture is currently "in use". This can either be
// because of a pending read operation, a pending write operation or a pending
// texture export operation. Note that this function's usefulness is extremely
// limited under ordinary circumstances. In practically all cases, textures do
// not need to be directly synchronized by the user, except when interfacing
// with external libraries. This function should NOT, however, be used as a
// crutch to avoid having to implement semaphore-based synchronization. Use
// the API-specific functions such as `pl_vulkan_hold/release` for that.
//
// A good example of a use case in which this function is required is when
// interoperating with external memory management that needs to know when an
// imported texture is safe to free / reclaim internally, in which case
// semaphores are insufficient because memory management is a host operation.
//
// The `timeout`, specified in nanoseconds, indicates how long to block for
// before returning. If set to 0, this function will never block, and only
// returns the current status of the texture. The actual precision of the
// timeout may be significantly longer than one nanosecond, and has no upper
// bound. This function does not provide hard latency guarantees. This function
// may also return at any time, even if the texture is still in use. If the
// user wishes to block until the texture is definitely no longer in use, the
// recommended usage is:
//
// while (pl_tex_poll(gpu, buf, UINT64_MAX))
// ; // do nothing
//
// Note: If `pl_gpu_limits.thread_safe` is set, this function is implicitly
// synchronized, meaning it can safely be called on a `pl_tex` that is in use
// by another thread.
PL_API bool pl_tex_poll(pl_gpu gpu, pl_tex tex, uint64_t timeout);
// Data type of a shader input variable (e.g. uniform, or UBO member)
enum pl_var_type {
PL_VAR_INVALID = 0,
PL_VAR_SINT, // C: int GLSL: int/ivec
PL_VAR_UINT, // C: unsigned int GLSL: uint/uvec
PL_VAR_FLOAT, // C: float GLSL: float/vec/mat
PL_VAR_TYPE_COUNT
};
// Returns the host size (in bytes) of a pl_var_type.
PL_API size_t pl_var_type_size(enum pl_var_type type);
// Represents a shader input variable (concrete data, e.g. vector, matrix)
struct pl_var {
const char *name; // name as used in the shader
enum pl_var_type type;
// The total number of values is given by dim_v * dim_m. For example, a
// vec2 would have dim_v = 2 and dim_m = 1. A mat3x4 would have dim_v = 4
// and dim_m = 3.
int dim_v; // vector dimension
int dim_m; // matrix dimension (number of columns, see below)
int dim_a; // array dimension
};
// Helper functions for constructing the most common pl_vars, with names
// corresponding to their corresponding GLSL built-in types.
PL_API struct pl_var pl_var_float(const char *name);
PL_API struct pl_var pl_var_vec2(const char *name);
PL_API struct pl_var pl_var_vec3(const char *name);
PL_API struct pl_var pl_var_vec4(const char *name);
PL_API struct pl_var pl_var_mat2(const char *name);
PL_API struct pl_var pl_var_mat2x3(const char *name);
PL_API struct pl_var pl_var_mat2x4(const char *name);
PL_API struct pl_var pl_var_mat3(const char *name);
PL_API struct pl_var pl_var_mat3x4(const char *name);
PL_API struct pl_var pl_var_mat4x2(const char *name);
PL_API struct pl_var pl_var_mat4x3(const char *name);
PL_API struct pl_var pl_var_mat4(const char *name);
PL_API struct pl_var pl_var_int(const char *name);
PL_API struct pl_var pl_var_ivec2(const char *name);
PL_API struct pl_var pl_var_ivec3(const char *name);
PL_API struct pl_var pl_var_ivec4(const char *name);
PL_API struct pl_var pl_var_uint(const char *name);
PL_API struct pl_var pl_var_uvec2(const char *name);
PL_API struct pl_var pl_var_uvec3(const char *name);
PL_API struct pl_var pl_var_uvec4(const char *name);
struct pl_named_var {
const char *glsl_name;
struct pl_var var;