SDL

Форк
0
/
SDL_gpu_metal.m 
3988 строк · 145.3 Кб
1
/*
2
  Simple DirectMedia Layer
3
  Copyright (C) 1997-2024 Sam Lantinga <slouken@libsdl.org>
4

5
  This software is provided 'as-is', without any express or implied
6
  warranty.  In no event will the authors be held liable for any damages
7
  arising from the use of this software.
8

9
  Permission is granted to anyone to use this software for any purpose,
10
  including commercial applications, and to alter it and redistribute it
11
  freely, subject to the following restrictions:
12

13
  1. The origin of this software must not be misrepresented; you must not
14
     claim that you wrote the original software. If you use this software
15
     in a product, an acknowledgment in the product documentation would be
16
     appreciated but is not required.
17
  2. Altered source versions must be plainly marked as such, and must not be
18
     misrepresented as being the original software.
19
  3. This notice may not be removed or altered from any source distribution.
20
*/
21

22
#include "SDL_internal.h"
23

24
#if SDL_GPU_METAL
25

26
#include <Metal/Metal.h>
27
#include <QuartzCore/CoreAnimation.h>
28

29
#include "../SDL_sysgpu.h"
30

31
// Defines
32

33
#define METAL_MAX_BUFFER_COUNT      31
34
#define WINDOW_PROPERTY_DATA        "SDL_GPUMetalWindowPropertyData"
35
#define SDL_GPU_SHADERSTAGE_COMPUTE 2
36

37
#define TRACK_RESOURCE(resource, type, array, count, capacity) \
38
    Uint32 i;                                                  \
39
                                                               \
40
    for (i = 0; i < commandBuffer->count; i += 1) {            \
41
        if (commandBuffer->array[i] == resource) {             \
42
            return;                                            \
43
        }                                                      \
44
    }                                                          \
45
                                                               \
46
    if (commandBuffer->count == commandBuffer->capacity) {     \
47
        commandBuffer->capacity += 1;                          \
48
        commandBuffer->array = SDL_realloc(                    \
49
            commandBuffer->array,                              \
50
            commandBuffer->capacity * sizeof(type));           \
51
    }                                                          \
52
    commandBuffer->array[commandBuffer->count] = resource;     \
53
    commandBuffer->count += 1;                                 \
54
    SDL_AtomicIncRef(&resource->referenceCount);
55

56
// Blit Shaders
57

58
#include "Metal_Blit.h"
59

60
// Forward Declarations
61

62
static void METAL_Wait(SDL_GPURenderer *driverData);
63
static void METAL_ReleaseWindow(
64
    SDL_GPURenderer *driverData,
65
    SDL_Window *window);
66
static void METAL_INTERNAL_DestroyBlitResources(SDL_GPURenderer *driverData);
67

68
// Conversions
69

70
static MTLPixelFormat SDLToMetal_SurfaceFormat[] = {
71
    MTLPixelFormatRGBA8Unorm,   // R8G8B8A8_UNORM
72
    MTLPixelFormatBGRA8Unorm,   // B8G8R8A8_UNORM
73
    MTLPixelFormatB5G6R5Unorm,  // B5G6R5_UNORM
74
    MTLPixelFormatBGR5A1Unorm,  // B5G5R5A1_UNORM
75
    MTLPixelFormatABGR4Unorm,   // B4G4R4A4_UNORM
76
    MTLPixelFormatRGB10A2Unorm, // A2R10G10B10_UNORM
77
    MTLPixelFormatRG16Unorm,    // R16G16_UNORM
78
    MTLPixelFormatRGBA16Unorm,  // R16G16B16A16_UNORM
79
    MTLPixelFormatR8Unorm,      // R8_UNORM
80
    MTLPixelFormatA8Unorm,      // A8_UNORM
81
#ifdef SDL_PLATFORM_MACOS
82
    MTLPixelFormatBC1_RGBA,      // BC1_UNORM
83
    MTLPixelFormatBC2_RGBA,      // BC2_UNORM
84
    MTLPixelFormatBC3_RGBA,      // BC3_UNORM
85
    MTLPixelFormatBC7_RGBAUnorm, // BC7_UNORM
86
#else
87
    MTLPixelFormatInvalid, // BC1_UNORM
88
    MTLPixelFormatInvalid, // BC2_UNORM
89
    MTLPixelFormatInvalid, // BC3_UNORM
90
    MTLPixelFormatInvalid, // BC7_UNORM
91
#endif
92
    MTLPixelFormatRG8Snorm,        // R8G8_SNORM
93
    MTLPixelFormatRGBA8Snorm,      // R8G8B8A8_SNORM
94
    MTLPixelFormatR16Float,        // R16_FLOAT
95
    MTLPixelFormatRG16Float,       // R16G16_FLOAT
96
    MTLPixelFormatRGBA16Float,     // R16G16B16A16_FLOAT
97
    MTLPixelFormatR32Float,        // R32_FLOAT
98
    MTLPixelFormatRG32Float,       // R32G32_FLOAT
99
    MTLPixelFormatRGBA32Float,     // R32G32B32A32_FLOAT
100
    MTLPixelFormatR8Uint,          // R8_UINT
101
    MTLPixelFormatRG8Uint,         // R8G8_UINT
102
    MTLPixelFormatRGBA8Uint,       // R8G8B8A8_UINT
103
    MTLPixelFormatR16Uint,         // R16_UINT
104
    MTLPixelFormatRG16Uint,        // R16G16_UINT
105
    MTLPixelFormatRGBA16Uint,      // R16G16B16A16_UINT
106
    MTLPixelFormatRGBA8Unorm_sRGB, // R8G8B8A8_UNORM_SRGB
107
    MTLPixelFormatBGRA8Unorm_sRGB, // B8G8R8A8_UNORM_SRGB
108
#ifdef SDL_PLATFORM_MACOS
109
    MTLPixelFormatBC3_RGBA_sRGB,      // BC3_UNORM_SRGB
110
    MTLPixelFormatBC7_RGBAUnorm_sRGB, // BC7_UNORM_SRGB
111
#else
112
    MTLPixelFormatInvalid, // BC3_UNORM_SRGB
113
    MTLPixelFormatInvalid, // BC7_UNORM_SRGB
114
#endif
115
    MTLPixelFormatDepth16Unorm, // D16_UNORM
116
#ifdef SDL_PLATFORM_MACOS
117
    MTLPixelFormatDepth24Unorm_Stencil8, // D24_UNORM
118
#else
119
    MTLPixelFormatInvalid, // D24_UNORM
120
#endif
121
    MTLPixelFormatDepth32Float, // D32_FLOAT
122
#ifdef SDL_PLATFORM_MACOS
123
    MTLPixelFormatDepth24Unorm_Stencil8, // D24_UNORM_S8_UINT
124
#else
125
    MTLPixelFormatInvalid, // D24_UNORM_S8_UINT
126
#endif
127
    MTLPixelFormatDepth32Float_Stencil8, // D32_FLOAT_S8_UINT
128
};
129
SDL_COMPILE_TIME_ASSERT(SDLToMetal_SurfaceFormat, SDL_arraysize(SDLToMetal_SurfaceFormat) == SDL_GPU_TEXTUREFORMAT_MAX);
130

131
static MTLVertexFormat SDLToMetal_VertexFormat[] = {
132
    MTLVertexFormatInt,               // INT
133
    MTLVertexFormatInt2,              // INT2
134
    MTLVertexFormatInt3,              // INT3
135
    MTLVertexFormatInt4,              // INT4
136
    MTLVertexFormatUInt,              // UINT
137
    MTLVertexFormatUInt2,             // UINT2
138
    MTLVertexFormatUInt3,             // UINT3
139
    MTLVertexFormatUInt4,             // UINT4
140
    MTLVertexFormatFloat,             // FLOAT
141
    MTLVertexFormatFloat2,            // FLOAT2
142
    MTLVertexFormatFloat3,            // FLOAT3
143
    MTLVertexFormatFloat4,            // FLOAT4
144
    MTLVertexFormatChar2,             // BYTE2
145
    MTLVertexFormatChar4,             // BYTE4
146
    MTLVertexFormatUChar2,            // UBYTE2
147
    MTLVertexFormatUChar4,            // UBYTE4
148
    MTLVertexFormatChar2Normalized,   // BYTE2_NORM
149
    MTLVertexFormatChar4Normalized,   // BYTE4_NORM
150
    MTLVertexFormatUChar2Normalized,  // UBYTE2_NORM
151
    MTLVertexFormatUChar4Normalized,  // UBYTE4_NORM
152
    MTLVertexFormatShort2,            // SHORT2
153
    MTLVertexFormatShort4,            // SHORT4
154
    MTLVertexFormatUShort2,           // USHORT2
155
    MTLVertexFormatUShort4,           // USHORT4
156
    MTLVertexFormatShort2Normalized,  // SHORT2_NORM
157
    MTLVertexFormatShort4Normalized,  // SHORT4_NORM
158
    MTLVertexFormatUShort2Normalized, // USHORT2_NORM
159
    MTLVertexFormatUShort4Normalized, // USHORT4_NORM
160
    MTLVertexFormatHalf2,             // HALF2
161
    MTLVertexFormatHalf4              // HALF4
162
};
163

164
static MTLIndexType SDLToMetal_IndexType[] = {
165
    MTLIndexTypeUInt16, // 16BIT
166
    MTLIndexTypeUInt32, // 32BIT
167
};
168

169
static MTLPrimitiveType SDLToMetal_PrimitiveType[] = {
170
    MTLPrimitiveTypePoint,        // POINTLIST
171
    MTLPrimitiveTypeLine,         // LINELIST
172
    MTLPrimitiveTypeLineStrip,    // LINESTRIP
173
    MTLPrimitiveTypeTriangle,     // TRIANGLELIST
174
    MTLPrimitiveTypeTriangleStrip // TRIANGLESTRIP
175
};
176

177
static MTLTriangleFillMode SDLToMetal_PolygonMode[] = {
178
    MTLTriangleFillModeFill,  // FILL
179
    MTLTriangleFillModeLines, // LINE
180
};
181

182
static MTLCullMode SDLToMetal_CullMode[] = {
183
    MTLCullModeNone,  // NONE
184
    MTLCullModeFront, // FRONT
185
    MTLCullModeBack,  // BACK
186
};
187

188
static MTLWinding SDLToMetal_FrontFace[] = {
189
    MTLWindingCounterClockwise, // COUNTER_CLOCKWISE
190
    MTLWindingClockwise,        // CLOCKWISE
191
};
192

193
static MTLBlendFactor SDLToMetal_BlendFactor[] = {
194
    MTLBlendFactorZero,                     // ZERO
195
    MTLBlendFactorOne,                      // ONE
196
    MTLBlendFactorSourceColor,              // SRC_COLOR
197
    MTLBlendFactorOneMinusSourceColor,      // ONE_MINUS_SRC_COLOR
198
    MTLBlendFactorDestinationColor,         // DST_COLOR
199
    MTLBlendFactorOneMinusDestinationColor, // ONE_MINUS_DST_COLOR
200
    MTLBlendFactorSourceAlpha,              // SRC_ALPHA
201
    MTLBlendFactorOneMinusSourceAlpha,      // ONE_MINUS_SRC_ALPHA
202
    MTLBlendFactorDestinationAlpha,         // DST_ALPHA
203
    MTLBlendFactorOneMinusDestinationAlpha, // ONE_MINUS_DST_ALPHA
204
    MTLBlendFactorBlendColor,               // CONSTANT_COLOR
205
    MTLBlendFactorOneMinusBlendColor,       // ONE_MINUS_CONSTANT_COLOR
206
    MTLBlendFactorSourceAlphaSaturated,     // SRC_ALPHA_SATURATE
207
};
208

209
static MTLBlendOperation SDLToMetal_BlendOp[] = {
210
    MTLBlendOperationAdd,             // ADD
211
    MTLBlendOperationSubtract,        // SUBTRACT
212
    MTLBlendOperationReverseSubtract, // REVERSE_SUBTRACT
213
    MTLBlendOperationMin,             // MIN
214
    MTLBlendOperationMax,             // MAX
215
};
216

217
static MTLCompareFunction SDLToMetal_CompareOp[] = {
218
    MTLCompareFunctionNever,        // NEVER
219
    MTLCompareFunctionLess,         // LESS
220
    MTLCompareFunctionEqual,        // EQUAL
221
    MTLCompareFunctionLessEqual,    // LESS_OR_EQUAL
222
    MTLCompareFunctionGreater,      // GREATER
223
    MTLCompareFunctionNotEqual,     // NOT_EQUAL
224
    MTLCompareFunctionGreaterEqual, // GREATER_OR_EQUAL
225
    MTLCompareFunctionAlways,       // ALWAYS
226
};
227

228
static MTLStencilOperation SDLToMetal_StencilOp[] = {
229
    MTLStencilOperationKeep,           // KEEP
230
    MTLStencilOperationZero,           // ZERO
231
    MTLStencilOperationReplace,        // REPLACE
232
    MTLStencilOperationIncrementClamp, // INCREMENT_AND_CLAMP
233
    MTLStencilOperationDecrementClamp, // DECREMENT_AND_CLAMP
234
    MTLStencilOperationInvert,         // INVERT
235
    MTLStencilOperationIncrementWrap,  // INCREMENT_AND_WRAP
236
    MTLStencilOperationDecrementWrap,  // DECREMENT_AND_WRAP
237
};
238

239
static MTLSamplerAddressMode SDLToMetal_SamplerAddressMode[] = {
240
    MTLSamplerAddressModeRepeat,       // REPEAT
241
    MTLSamplerAddressModeMirrorRepeat, // MIRRORED_REPEAT
242
    MTLSamplerAddressModeClampToEdge   // CLAMP_TO_EDGE
243
};
244

245
static MTLSamplerMinMagFilter SDLToMetal_MinMagFilter[] = {
246
    MTLSamplerMinMagFilterNearest, // NEAREST
247
    MTLSamplerMinMagFilterLinear,  // LINEAR
248
};
249

250
static MTLSamplerMipFilter SDLToMetal_MipFilter[] = {
251
    MTLSamplerMipFilterNearest, // NEAREST
252
    MTLSamplerMipFilterLinear,  // LINEAR
253
};
254

255
static MTLLoadAction SDLToMetal_LoadOp[] = {
256
    MTLLoadActionLoad,     // LOAD
257
    MTLLoadActionClear,    // CLEAR
258
    MTLLoadActionDontCare, // DONT_CARE
259
};
260

261
static MTLVertexStepFunction SDLToMetal_StepFunction[] = {
262
    MTLVertexStepFunctionPerVertex,
263
    MTLVertexStepFunctionPerInstance,
264
};
265

266
static NSUInteger SDLToMetal_SampleCount[] = {
267
    1, // SDL_GPU_SAMPLECOUNT_1
268
    2, // SDL_GPU_SAMPLECOUNT_2
269
    4, // SDL_GPU_SAMPLECOUNT_4
270
    8  // SDL_GPU_SAMPLECOUNT_8
271
};
272

273
static MTLTextureType SDLToMetal_TextureType[] = {
274
    MTLTextureType2D,      // SDL_GPU_TEXTURETYPE_2D
275
    MTLTextureType2DArray, // SDL_GPU_TEXTURETYPE_2D_ARRAY
276
    MTLTextureType3D,      // SDL_GPU_TEXTURETYPE_3D
277
    MTLTextureTypeCube     // SDL_GPU_TEXTURETYPE_CUBE
278
};
279

280
static SDL_GPUTextureFormat SwapchainCompositionToFormat[] = {
281
    SDL_GPU_TEXTUREFORMAT_B8G8R8A8_UNORM,      // SDR
282
    SDL_GPU_TEXTUREFORMAT_B8G8R8A8_UNORM_SRGB, // SDR_LINEAR
283
    SDL_GPU_TEXTUREFORMAT_R16G16B16A16_FLOAT,  // HDR_EXTENDED_LINEAR
284
    SDL_GPU_TEXTUREFORMAT_R10G10B10A2_UNORM,   // HDR10_ST2048
285
};
286

287
static CFStringRef SwapchainCompositionToColorSpace[4]; // initialized on device creation
288

289
static MTLStoreAction SDLToMetal_StoreOp(
290
    SDL_GPUStoreOp storeOp,
291
    Uint8 isMultisample)
292
{
293
    if (isMultisample) {
294
        if (storeOp == SDL_GPU_STOREOP_STORE) {
295
            return MTLStoreActionStoreAndMultisampleResolve;
296
        } else {
297
            return MTLStoreActionMultisampleResolve;
298
        }
299
    } else {
300
        if (storeOp == SDL_GPU_STOREOP_STORE) {
301
            return MTLStoreActionStore;
302
        } else {
303
            return MTLStoreActionDontCare;
304
        }
305
    }
306
};
307

308
static MTLColorWriteMask SDLToMetal_ColorWriteMask(
309
    SDL_GPUColorComponentFlags mask)
310
{
311
    MTLColorWriteMask result = 0;
312
    if (mask & SDL_GPU_COLORCOMPONENT_R) {
313
        result |= MTLColorWriteMaskRed;
314
    }
315
    if (mask & SDL_GPU_COLORCOMPONENT_G) {
316
        result |= MTLColorWriteMaskGreen;
317
    }
318
    if (mask & SDL_GPU_COLORCOMPONENT_B) {
319
        result |= MTLColorWriteMaskBlue;
320
    }
321
    if (mask & SDL_GPU_COLORCOMPONENT_A) {
322
        result |= MTLColorWriteMaskAlpha;
323
    }
324
    return result;
325
}
326

327
// Structs
328

329
typedef struct MetalTexture
330
{
331
    id<MTLTexture> handle;
332
    id<MTLTexture> msaaHandle;
333
    SDL_AtomicInt referenceCount;
334
} MetalTexture;
335

336
typedef struct MetalTextureContainer
337
{
338
    TextureCommonHeader header;
339

340
    MetalTexture *activeTexture;
341
    Uint8 canBeCycled;
342

343
    Uint32 textureCapacity;
344
    Uint32 textureCount;
345
    MetalTexture **textures;
346

347
    char *debugName;
348
} MetalTextureContainer;
349

350
typedef struct MetalFence
351
{
352
    SDL_AtomicInt complete;
353
} MetalFence;
354

355
typedef struct MetalWindowData
356
{
357
    SDL_Window *window;
358
    SDL_MetalView view;
359
    CAMetalLayer *layer;
360
    id<CAMetalDrawable> drawable;
361
    MetalTexture texture;
362
    MetalTextureContainer textureContainer;
363
} MetalWindowData;
364

365
typedef struct MetalShader
366
{
367
    id<MTLLibrary> library;
368
    id<MTLFunction> function;
369

370
    Uint32 samplerCount;
371
    Uint32 uniformBufferCount;
372
    Uint32 storageBufferCount;
373
    Uint32 storageTextureCount;
374
} MetalShader;
375

376
typedef struct MetalGraphicsPipeline
377
{
378
    id<MTLRenderPipelineState> handle;
379

380
    float blendConstants[4];
381
    Uint32 sampleMask;
382

383
    SDL_GPURasterizerState rasterizerState;
384
    SDL_GPUPrimitiveType primitiveType;
385

386
    id<MTLDepthStencilState> depthStencilState;
387
    Uint8 stencilReference;
388

389
    Uint32 vertexSamplerCount;
390
    Uint32 vertexUniformBufferCount;
391
    Uint32 vertexStorageBufferCount;
392
    Uint32 vertexStorageTextureCount;
393

394
    Uint32 fragmentSamplerCount;
395
    Uint32 fragmentUniformBufferCount;
396
    Uint32 fragmentStorageBufferCount;
397
    Uint32 fragmentStorageTextureCount;
398
} MetalGraphicsPipeline;
399

400
typedef struct MetalComputePipeline
401
{
402
    id<MTLComputePipelineState> handle;
403
    Uint32 readOnlyStorageTextureCount;
404
    Uint32 writeOnlyStorageTextureCount;
405
    Uint32 readOnlyStorageBufferCount;
406
    Uint32 writeOnlyStorageBufferCount;
407
    Uint32 uniformBufferCount;
408
    Uint32 threadCountX;
409
    Uint32 threadCountY;
410
    Uint32 threadCountZ;
411
} MetalComputePipeline;
412

413
typedef struct MetalBuffer
414
{
415
    id<MTLBuffer> handle;
416
    SDL_AtomicInt referenceCount;
417
} MetalBuffer;
418

419
typedef struct MetalBufferContainer
420
{
421
    MetalBuffer *activeBuffer;
422
    Uint32 size;
423

424
    Uint32 bufferCapacity;
425
    Uint32 bufferCount;
426
    MetalBuffer **buffers;
427

428
    bool isPrivate;
429
    bool isWriteOnly;
430
    char *debugName;
431
} MetalBufferContainer;
432

433
typedef struct MetalUniformBuffer
434
{
435
    id<MTLBuffer> handle;
436
    Uint32 writeOffset;
437
    Uint32 drawOffset;
438
} MetalUniformBuffer;
439

440
typedef struct MetalRenderer MetalRenderer;
441

442
typedef struct MetalCommandBuffer
443
{
444
    CommandBufferCommonHeader common;
445
    MetalRenderer *renderer;
446

447
    // Native Handle
448
    id<MTLCommandBuffer> handle;
449

450
    // Presentation
451
    MetalWindowData **windowDatas;
452
    Uint32 windowDataCount;
453
    Uint32 windowDataCapacity;
454

455
    // Render Pass
456
    id<MTLRenderCommandEncoder> renderEncoder;
457
    MetalGraphicsPipeline *graphicsPipeline;
458
    MetalBuffer *indexBuffer;
459
    Uint32 indexBufferOffset;
460
    SDL_GPUIndexElementSize indexElementSize;
461

462
    // Copy Pass
463
    id<MTLBlitCommandEncoder> blitEncoder;
464

465
    // Compute Pass
466
    id<MTLComputeCommandEncoder> computeEncoder;
467
    MetalComputePipeline *computePipeline;
468

469
    // Resource slot state
470
    bool needVertexSamplerBind;
471
    bool needVertexStorageTextureBind;
472
    bool needVertexStorageBufferBind;
473
    bool needVertexUniformBind;
474

475
    bool needFragmentSamplerBind;
476
    bool needFragmentStorageTextureBind;
477
    bool needFragmentStorageBufferBind;
478
    bool needFragmentUniformBind;
479

480
    bool needComputeTextureBind;
481
    bool needComputeBufferBind;
482
    bool needComputeUniformBind;
483

484
    id<MTLSamplerState> vertexSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
485
    id<MTLTexture> vertexTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
486
    id<MTLTexture> vertexStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
487
    id<MTLBuffer> vertexStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
488

489
    id<MTLSamplerState> fragmentSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
490
    id<MTLTexture> fragmentTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
491
    id<MTLTexture> fragmentStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
492
    id<MTLBuffer> fragmentStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
493

494
    id<MTLTexture> computeReadOnlyTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
495
    id<MTLBuffer> computeReadOnlyBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
496
    id<MTLTexture> computeWriteOnlyTextures[MAX_COMPUTE_WRITE_TEXTURES];
497
    id<MTLBuffer> computeWriteOnlyBuffers[MAX_COMPUTE_WRITE_BUFFERS];
498

499
    // Uniform buffers
500
    MetalUniformBuffer *vertexUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
501
    MetalUniformBuffer *fragmentUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
502
    MetalUniformBuffer *computeUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
503

504
    MetalUniformBuffer **usedUniformBuffers;
505
    Uint32 usedUniformBufferCount;
506
    Uint32 usedUniformBufferCapacity;
507

508
    // Fences
509
    MetalFence *fence;
510
    Uint8 autoReleaseFence;
511

512
    // Reference Counting
513
    MetalBuffer **usedBuffers;
514
    Uint32 usedBufferCount;
515
    Uint32 usedBufferCapacity;
516

517
    MetalTexture **usedTextures;
518
    Uint32 usedTextureCount;
519
    Uint32 usedTextureCapacity;
520
} MetalCommandBuffer;
521

522
typedef struct MetalSampler
523
{
524
    id<MTLSamplerState> handle;
525
} MetalSampler;
526

527
typedef struct BlitPipeline
528
{
529
    SDL_GPUGraphicsPipeline *pipeline;
530
    SDL_GPUTextureFormat format;
531
} BlitPipeline;
532

533
struct MetalRenderer
534
{
535
    // Reference to the parent device
536
    SDL_GPUDevice *sdlGPUDevice;
537

538
    id<MTLDevice> device;
539
    id<MTLCommandQueue> queue;
540

541
    bool debugMode;
542

543
    MetalWindowData **claimedWindows;
544
    Uint32 claimedWindowCount;
545
    Uint32 claimedWindowCapacity;
546

547
    MetalCommandBuffer **availableCommandBuffers;
548
    Uint32 availableCommandBufferCount;
549
    Uint32 availableCommandBufferCapacity;
550

551
    MetalCommandBuffer **submittedCommandBuffers;
552
    Uint32 submittedCommandBufferCount;
553
    Uint32 submittedCommandBufferCapacity;
554

555
    MetalFence **availableFences;
556
    Uint32 availableFenceCount;
557
    Uint32 availableFenceCapacity;
558

559
    MetalUniformBuffer **uniformBufferPool;
560
    Uint32 uniformBufferPoolCount;
561
    Uint32 uniformBufferPoolCapacity;
562

563
    MetalBufferContainer **bufferContainersToDestroy;
564
    Uint32 bufferContainersToDestroyCount;
565
    Uint32 bufferContainersToDestroyCapacity;
566

567
    MetalTextureContainer **textureContainersToDestroy;
568
    Uint32 textureContainersToDestroyCount;
569
    Uint32 textureContainersToDestroyCapacity;
570

571
    // Blit
572
    SDL_GPUShader *blitVertexShader;
573
    SDL_GPUShader *blitFrom2DShader;
574
    SDL_GPUShader *blitFrom2DArrayShader;
575
    SDL_GPUShader *blitFrom3DShader;
576
    SDL_GPUShader *blitFromCubeShader;
577

578
    SDL_GPUSampler *blitNearestSampler;
579
    SDL_GPUSampler *blitLinearSampler;
580

581
    BlitPipelineCacheEntry *blitPipelines;
582
    Uint32 blitPipelineCount;
583
    Uint32 blitPipelineCapacity;
584

585
    // Mutexes
586
    SDL_Mutex *submitLock;
587
    SDL_Mutex *acquireCommandBufferLock;
588
    SDL_Mutex *acquireUniformBufferLock;
589
    SDL_Mutex *disposeLock;
590
    SDL_Mutex *fenceLock;
591
    SDL_Mutex *windowLock;
592
};
593

594
// Helper Functions
595

596
static Uint32 METAL_INTERNAL_GetVertexBufferIndex(Uint32 binding)
597
{
598
    return METAL_MAX_BUFFER_COUNT - 1 - binding;
599
}
600

601
// FIXME: This should be moved into SDL_sysgpu.h
602
static inline Uint32 METAL_INTERNAL_NextHighestAlignment(
603
    Uint32 n,
604
    Uint32 align)
605
{
606
    return align * ((n + align - 1) / align);
607
}
608

609
// Quit
610

611
static void METAL_DestroyDevice(SDL_GPUDevice *device)
612
{
613
    MetalRenderer *renderer = (MetalRenderer *)device->driverData;
614

615
    // Flush any remaining GPU work...
616
    METAL_Wait(device->driverData);
617

618
    // Release the window data
619
    for (Sint32 i = renderer->claimedWindowCount - 1; i >= 0; i -= 1) {
620
        METAL_ReleaseWindow(device->driverData, renderer->claimedWindows[i]->window);
621
    }
622
    SDL_free(renderer->claimedWindows);
623

624
    // Release the blit resources
625
    METAL_INTERNAL_DestroyBlitResources(device->driverData);
626

627
    // Release uniform buffers
628
    for (Uint32 i = 0; i < renderer->uniformBufferPoolCount; i += 1) {
629
        renderer->uniformBufferPool[i]->handle = nil;
630
        SDL_free(renderer->uniformBufferPool[i]);
631
    }
632
    SDL_free(renderer->uniformBufferPool);
633

634
    // Release destroyed resource lists
635
    SDL_free(renderer->bufferContainersToDestroy);
636
    SDL_free(renderer->textureContainersToDestroy);
637

638
    // Release command buffer infrastructure
639
    for (Uint32 i = 0; i < renderer->availableCommandBufferCount; i += 1) {
640
        MetalCommandBuffer *commandBuffer = renderer->availableCommandBuffers[i];
641
        SDL_free(commandBuffer->usedBuffers);
642
        SDL_free(commandBuffer->usedTextures);
643
        SDL_free(commandBuffer->usedUniformBuffers);
644
        SDL_free(commandBuffer->windowDatas);
645
        SDL_free(commandBuffer);
646
    }
647
    SDL_free(renderer->availableCommandBuffers);
648
    SDL_free(renderer->submittedCommandBuffers);
649

650
    // Release fence infrastructure
651
    for (Uint32 i = 0; i < renderer->availableFenceCount; i += 1) {
652
        SDL_free(renderer->availableFences[i]);
653
    }
654
    SDL_free(renderer->availableFences);
655

656
    // Release the mutexes
657
    SDL_DestroyMutex(renderer->submitLock);
658
    SDL_DestroyMutex(renderer->acquireCommandBufferLock);
659
    SDL_DestroyMutex(renderer->acquireUniformBufferLock);
660
    SDL_DestroyMutex(renderer->disposeLock);
661
    SDL_DestroyMutex(renderer->fenceLock);
662
    SDL_DestroyMutex(renderer->windowLock);
663

664
    // Release the command queue
665
    renderer->queue = nil;
666

667
    // Free the primary structures
668
    SDL_free(renderer);
669
    SDL_free(device);
670
}
671

672
// Resource tracking
673

674
static void METAL_INTERNAL_TrackBuffer(
675
    MetalCommandBuffer *commandBuffer,
676
    MetalBuffer *buffer)
677
{
678
    TRACK_RESOURCE(
679
        buffer,
680
        MetalBuffer *,
681
        usedBuffers,
682
        usedBufferCount,
683
        usedBufferCapacity);
684
}
685

686
static void METAL_INTERNAL_TrackTexture(
687
    MetalCommandBuffer *commandBuffer,
688
    MetalTexture *texture)
689
{
690
    TRACK_RESOURCE(
691
        texture,
692
        MetalTexture *,
693
        usedTextures,
694
        usedTextureCount,
695
        usedTextureCapacity);
696
}
697

698
static void METAL_INTERNAL_TrackUniformBuffer(
699
    MetalCommandBuffer *commandBuffer,
700
    MetalUniformBuffer *uniformBuffer)
701
{
702
    Uint32 i;
703
    for (i = 0; i < commandBuffer->usedUniformBufferCount; i += 1) {
704
        if (commandBuffer->usedUniformBuffers[i] == uniformBuffer) {
705
            return;
706
        }
707
    }
708

709
    if (commandBuffer->usedUniformBufferCount == commandBuffer->usedUniformBufferCapacity) {
710
        commandBuffer->usedUniformBufferCapacity += 1;
711
        commandBuffer->usedUniformBuffers = SDL_realloc(
712
            commandBuffer->usedUniformBuffers,
713
            commandBuffer->usedUniformBufferCapacity * sizeof(MetalUniformBuffer *));
714
    }
715

716
    commandBuffer->usedUniformBuffers[commandBuffer->usedUniformBufferCount] = uniformBuffer;
717
    commandBuffer->usedUniformBufferCount += 1;
718
}
719

720
// Shader Compilation
721

722
typedef struct MetalLibraryFunction
723
{
724
    id<MTLLibrary> library;
725
    id<MTLFunction> function;
726
} MetalLibraryFunction;
727

728
// This function assumes that it's called from within an autorelease pool
729
static MetalLibraryFunction METAL_INTERNAL_CompileShader(
730
    MetalRenderer *renderer,
731
    SDL_GPUShaderFormat format,
732
    const Uint8 *code,
733
    size_t codeSize,
734
    const char *entryPointName)
735
{
736
    MetalLibraryFunction libraryFunction = { nil, nil };
737
    id<MTLLibrary> library;
738
    NSError *error;
739
    dispatch_data_t data;
740
    id<MTLFunction> function;
741

742
    if (format == SDL_GPU_SHADERFORMAT_MSL) {
743
        NSString *codeString = [[NSString alloc]
744
            initWithBytes:code
745
                   length:codeSize
746
                 encoding:NSUTF8StringEncoding];
747
        library = [renderer->device
748
            newLibraryWithSource:codeString
749
                         options:nil
750
                           error:&error];
751
    } else if (format == SDL_GPU_SHADERFORMAT_METALLIB) {
752
        data = dispatch_data_create(
753
            code,
754
            codeSize,
755
            dispatch_get_global_queue(0, 0),
756
            ^{ /* do nothing */ });
757
        library = [renderer->device newLibraryWithData:data error:&error];
758
    } else {
759
        SDL_assert(!"SDL_gpu.c should have already validated this!");
760
        return libraryFunction;
761
    }
762

763
    if (library == nil) {
764
        SDL_LogError(
765
            SDL_LOG_CATEGORY_GPU,
766
            "Creating MTLLibrary failed: %s",
767
            [[error description] cStringUsingEncoding:[NSString defaultCStringEncoding]]);
768
        return libraryFunction;
769
    } else if (error != nil) {
770
        SDL_LogWarn(
771
            SDL_LOG_CATEGORY_GPU,
772
            "Creating MTLLibrary failed: %s",
773
            [[error description] cStringUsingEncoding:[NSString defaultCStringEncoding]]);
774
    }
775

776
    function = [library newFunctionWithName:@(entryPointName)];
777
    if (function == nil) {
778
        SDL_LogError(
779
            SDL_LOG_CATEGORY_GPU,
780
            "Creating MTLFunction failed");
781
        return libraryFunction;
782
    }
783

784
    libraryFunction.library = library;
785
    libraryFunction.function = function;
786
    return libraryFunction;
787
}
788

789
// Disposal
790

791
static void METAL_INTERNAL_DestroyTextureContainer(
792
    MetalTextureContainer *container)
793
{
794
    for (Uint32 i = 0; i < container->textureCount; i += 1) {
795
        container->textures[i]->handle = nil;
796
        container->textures[i]->msaaHandle = nil;
797
        SDL_free(container->textures[i]);
798
    }
799
    if (container->debugName != NULL) {
800
        SDL_free(container->debugName);
801
    }
802
    SDL_free(container->textures);
803
    SDL_free(container);
804
}
805

806
static void METAL_ReleaseTexture(
807
    SDL_GPURenderer *driverData,
808
    SDL_GPUTexture *texture)
809
{
810
    MetalRenderer *renderer = (MetalRenderer *)driverData;
811
    MetalTextureContainer *container = (MetalTextureContainer *)texture;
812

813
    SDL_LockMutex(renderer->disposeLock);
814

815
    EXPAND_ARRAY_IF_NEEDED(
816
        renderer->textureContainersToDestroy,
817
        MetalTextureContainer *,
818
        renderer->textureContainersToDestroyCount + 1,
819
        renderer->textureContainersToDestroyCapacity,
820
        renderer->textureContainersToDestroyCapacity + 1);
821

822
    renderer->textureContainersToDestroy[renderer->textureContainersToDestroyCount] = container;
823
    renderer->textureContainersToDestroyCount += 1;
824

825
    SDL_UnlockMutex(renderer->disposeLock);
826
}
827

828
static void METAL_ReleaseSampler(
829
    SDL_GPURenderer *driverData,
830
    SDL_GPUSampler *sampler)
831
{
832
    @autoreleasepool {
833
        MetalSampler *metalSampler = (MetalSampler *)sampler;
834
        metalSampler->handle = nil;
835
        SDL_free(metalSampler);
836
    }
837
}
838

839
static void METAL_INTERNAL_DestroyBufferContainer(
840
    MetalBufferContainer *container)
841
{
842
    for (Uint32 i = 0; i < container->bufferCount; i += 1) {
843
        container->buffers[i]->handle = nil;
844
        SDL_free(container->buffers[i]);
845
    }
846
    if (container->debugName != NULL) {
847
        SDL_free(container->debugName);
848
    }
849
    SDL_free(container->buffers);
850
    SDL_free(container);
851
}
852

853
static void METAL_ReleaseBuffer(
854
    SDL_GPURenderer *driverData,
855
    SDL_GPUBuffer *buffer)
856
{
857
    MetalRenderer *renderer = (MetalRenderer *)driverData;
858
    MetalBufferContainer *container = (MetalBufferContainer *)buffer;
859

860
    SDL_LockMutex(renderer->disposeLock);
861

862
    EXPAND_ARRAY_IF_NEEDED(
863
        renderer->bufferContainersToDestroy,
864
        MetalBufferContainer *,
865
        renderer->bufferContainersToDestroyCount + 1,
866
        renderer->bufferContainersToDestroyCapacity,
867
        renderer->bufferContainersToDestroyCapacity + 1);
868

869
    renderer->bufferContainersToDestroy[renderer->bufferContainersToDestroyCount] = container;
870
    renderer->bufferContainersToDestroyCount += 1;
871

872
    SDL_UnlockMutex(renderer->disposeLock);
873
}
874

875
static void METAL_ReleaseTransferBuffer(
876
    SDL_GPURenderer *driverData,
877
    SDL_GPUTransferBuffer *transferBuffer)
878
{
879
    METAL_ReleaseBuffer(
880
        driverData,
881
        (SDL_GPUBuffer *)transferBuffer);
882
}
883

884
static void METAL_ReleaseShader(
885
    SDL_GPURenderer *driverData,
886
    SDL_GPUShader *shader)
887
{
888
    @autoreleasepool {
889
        MetalShader *metalShader = (MetalShader *)shader;
890
        metalShader->function = nil;
891
        metalShader->library = nil;
892
        SDL_free(metalShader);
893
    }
894
}
895

896
static void METAL_ReleaseComputePipeline(
897
    SDL_GPURenderer *driverData,
898
    SDL_GPUComputePipeline *computePipeline)
899
{
900
    @autoreleasepool {
901
        MetalComputePipeline *metalComputePipeline = (MetalComputePipeline *)computePipeline;
902
        metalComputePipeline->handle = nil;
903
        SDL_free(metalComputePipeline);
904
    }
905
}
906

907
static void METAL_ReleaseGraphicsPipeline(
908
    SDL_GPURenderer *driverData,
909
    SDL_GPUGraphicsPipeline *graphicsPipeline)
910
{
911
    @autoreleasepool {
912
        MetalGraphicsPipeline *metalGraphicsPipeline = (MetalGraphicsPipeline *)graphicsPipeline;
913
        metalGraphicsPipeline->handle = nil;
914
        metalGraphicsPipeline->depthStencilState = nil;
915
        SDL_free(metalGraphicsPipeline);
916
    }
917
}
918

919
// Pipeline Creation
920

921
static SDL_GPUComputePipeline *METAL_CreateComputePipeline(
922
    SDL_GPURenderer *driverData,
923
    SDL_GPUComputePipelineCreateInfo *pipelineCreateInfo)
924
{
925
    @autoreleasepool {
926
        MetalRenderer *renderer = (MetalRenderer *)driverData;
927
        MetalLibraryFunction libraryFunction;
928
        id<MTLComputePipelineState> handle;
929
        MetalComputePipeline *pipeline;
930
        NSError *error;
931

932
        libraryFunction = METAL_INTERNAL_CompileShader(
933
            renderer,
934
            pipelineCreateInfo->format,
935
            pipelineCreateInfo->code,
936
            pipelineCreateInfo->codeSize,
937
            pipelineCreateInfo->entryPointName);
938

939
        if (libraryFunction.library == nil || libraryFunction.function == nil) {
940
            return NULL;
941
        }
942

943
        handle = [renderer->device newComputePipelineStateWithFunction:libraryFunction.function error:&error];
944
        if (error != NULL) {
945
            SDL_LogError(
946
                SDL_LOG_CATEGORY_GPU,
947
                "Creating compute pipeline failed: %s", [[error description] UTF8String]);
948
            return NULL;
949
        }
950

951
        pipeline = SDL_malloc(sizeof(MetalComputePipeline));
952
        pipeline->handle = handle;
953
        pipeline->readOnlyStorageTextureCount = pipelineCreateInfo->readOnlyStorageTextureCount;
954
        pipeline->writeOnlyStorageTextureCount = pipelineCreateInfo->writeOnlyStorageTextureCount;
955
        pipeline->readOnlyStorageBufferCount = pipelineCreateInfo->readOnlyStorageBufferCount;
956
        pipeline->writeOnlyStorageBufferCount = pipelineCreateInfo->writeOnlyStorageBufferCount;
957
        pipeline->uniformBufferCount = pipelineCreateInfo->uniformBufferCount;
958
        pipeline->threadCountX = pipelineCreateInfo->threadCountX;
959
        pipeline->threadCountY = pipelineCreateInfo->threadCountY;
960
        pipeline->threadCountZ = pipelineCreateInfo->threadCountZ;
961

962
        return (SDL_GPUComputePipeline *)pipeline;
963
    }
964
}
965

966
static SDL_GPUGraphicsPipeline *METAL_CreateGraphicsPipeline(
967
    SDL_GPURenderer *driverData,
968
    SDL_GPUGraphicsPipelineCreateInfo *pipelineCreateInfo)
969
{
970
    @autoreleasepool {
971
        MetalRenderer *renderer = (MetalRenderer *)driverData;
972
        MetalShader *vertexShader = (MetalShader *)pipelineCreateInfo->vertexShader;
973
        MetalShader *fragmentShader = (MetalShader *)pipelineCreateInfo->fragmentShader;
974
        MTLRenderPipelineDescriptor *pipelineDescriptor;
975
        SDL_GPUColorAttachmentBlendState *blendState;
976
        MTLVertexDescriptor *vertexDescriptor;
977
        Uint32 binding;
978
        MTLDepthStencilDescriptor *depthStencilDescriptor;
979
        MTLStencilDescriptor *frontStencilDescriptor = NULL;
980
        MTLStencilDescriptor *backStencilDescriptor = NULL;
981
        id<MTLDepthStencilState> depthStencilState = nil;
982
        id<MTLRenderPipelineState> pipelineState = nil;
983
        NSError *error = NULL;
984
        MetalGraphicsPipeline *result = NULL;
985

986
        pipelineDescriptor = [MTLRenderPipelineDescriptor new];
987

988
        // Blend
989

990
        for (Uint32 i = 0; i < pipelineCreateInfo->attachmentInfo.colorAttachmentCount; i += 1) {
991
            blendState = &pipelineCreateInfo->attachmentInfo.colorAttachmentDescriptions[i].blendState;
992

993
            pipelineDescriptor.colorAttachments[i].pixelFormat = SDLToMetal_SurfaceFormat[pipelineCreateInfo->attachmentInfo.colorAttachmentDescriptions[i].format];
994
            pipelineDescriptor.colorAttachments[i].writeMask = SDLToMetal_ColorWriteMask(blendState->colorWriteMask);
995
            pipelineDescriptor.colorAttachments[i].blendingEnabled = blendState->blendEnable;
996
            pipelineDescriptor.colorAttachments[i].rgbBlendOperation = SDLToMetal_BlendOp[blendState->colorBlendOp];
997
            pipelineDescriptor.colorAttachments[i].alphaBlendOperation = SDLToMetal_BlendOp[blendState->alphaBlendOp];
998
            pipelineDescriptor.colorAttachments[i].sourceRGBBlendFactor = SDLToMetal_BlendFactor[blendState->srcColorBlendFactor];
999
            pipelineDescriptor.colorAttachments[i].sourceAlphaBlendFactor = SDLToMetal_BlendFactor[blendState->srcAlphaBlendFactor];
1000
            pipelineDescriptor.colorAttachments[i].destinationRGBBlendFactor = SDLToMetal_BlendFactor[blendState->dstColorBlendFactor];
1001
            pipelineDescriptor.colorAttachments[i].destinationAlphaBlendFactor = SDLToMetal_BlendFactor[blendState->dstAlphaBlendFactor];
1002
        }
1003

1004
        // Multisample
1005

1006
        pipelineDescriptor.rasterSampleCount = SDLToMetal_SampleCount[pipelineCreateInfo->multisampleState.sampleCount];
1007

1008
        // Depth Stencil
1009

1010
        if (pipelineCreateInfo->attachmentInfo.hasDepthStencilAttachment) {
1011
            pipelineDescriptor.depthAttachmentPixelFormat = SDLToMetal_SurfaceFormat[pipelineCreateInfo->attachmentInfo.depthStencilFormat];
1012

1013
            if (pipelineCreateInfo->depthStencilState.stencilTestEnable) {
1014
                pipelineDescriptor.stencilAttachmentPixelFormat = SDLToMetal_SurfaceFormat[pipelineCreateInfo->attachmentInfo.depthStencilFormat];
1015

1016
                frontStencilDescriptor = [MTLStencilDescriptor new];
1017
                frontStencilDescriptor.stencilCompareFunction = SDLToMetal_CompareOp[pipelineCreateInfo->depthStencilState.frontStencilState.compareOp];
1018
                frontStencilDescriptor.stencilFailureOperation = SDLToMetal_StencilOp[pipelineCreateInfo->depthStencilState.frontStencilState.failOp];
1019
                frontStencilDescriptor.depthStencilPassOperation = SDLToMetal_StencilOp[pipelineCreateInfo->depthStencilState.frontStencilState.passOp];
1020
                frontStencilDescriptor.depthFailureOperation = SDLToMetal_StencilOp[pipelineCreateInfo->depthStencilState.frontStencilState.depthFailOp];
1021
                frontStencilDescriptor.readMask = pipelineCreateInfo->depthStencilState.compareMask;
1022
                frontStencilDescriptor.writeMask = pipelineCreateInfo->depthStencilState.writeMask;
1023

1024
                backStencilDescriptor = [MTLStencilDescriptor new];
1025
                backStencilDescriptor.stencilCompareFunction = SDLToMetal_CompareOp[pipelineCreateInfo->depthStencilState.backStencilState.compareOp];
1026
                backStencilDescriptor.stencilFailureOperation = SDLToMetal_StencilOp[pipelineCreateInfo->depthStencilState.backStencilState.failOp];
1027
                backStencilDescriptor.depthStencilPassOperation = SDLToMetal_StencilOp[pipelineCreateInfo->depthStencilState.backStencilState.passOp];
1028
                backStencilDescriptor.depthFailureOperation = SDLToMetal_StencilOp[pipelineCreateInfo->depthStencilState.backStencilState.depthFailOp];
1029
                backStencilDescriptor.readMask = pipelineCreateInfo->depthStencilState.compareMask;
1030
                backStencilDescriptor.writeMask = pipelineCreateInfo->depthStencilState.writeMask;
1031
            }
1032

1033
            depthStencilDescriptor = [MTLDepthStencilDescriptor new];
1034
            depthStencilDescriptor.depthCompareFunction = pipelineCreateInfo->depthStencilState.depthTestEnable ? SDLToMetal_CompareOp[pipelineCreateInfo->depthStencilState.compareOp] : MTLCompareFunctionAlways;
1035
            depthStencilDescriptor.depthWriteEnabled = pipelineCreateInfo->depthStencilState.depthWriteEnable;
1036
            depthStencilDescriptor.frontFaceStencil = frontStencilDescriptor;
1037
            depthStencilDescriptor.backFaceStencil = backStencilDescriptor;
1038

1039
            depthStencilState = [renderer->device newDepthStencilStateWithDescriptor:depthStencilDescriptor];
1040
        }
1041

1042
        // Shaders
1043

1044
        pipelineDescriptor.vertexFunction = vertexShader->function;
1045
        pipelineDescriptor.fragmentFunction = fragmentShader->function;
1046

1047
        // Vertex Descriptor
1048

1049
        if (pipelineCreateInfo->vertexInputState.vertexBindingCount > 0) {
1050
            vertexDescriptor = [MTLVertexDescriptor vertexDescriptor];
1051

1052
            for (Uint32 i = 0; i < pipelineCreateInfo->vertexInputState.vertexAttributeCount; i += 1) {
1053
                Uint32 loc = pipelineCreateInfo->vertexInputState.vertexAttributes[i].location;
1054
                vertexDescriptor.attributes[loc].format = SDLToMetal_VertexFormat[pipelineCreateInfo->vertexInputState.vertexAttributes[i].format];
1055
                vertexDescriptor.attributes[loc].offset = pipelineCreateInfo->vertexInputState.vertexAttributes[i].offset;
1056
                vertexDescriptor.attributes[loc].bufferIndex = METAL_INTERNAL_GetVertexBufferIndex(pipelineCreateInfo->vertexInputState.vertexAttributes[i].binding);
1057
            }
1058

1059
            for (Uint32 i = 0; i < pipelineCreateInfo->vertexInputState.vertexBindingCount; i += 1) {
1060
                binding = METAL_INTERNAL_GetVertexBufferIndex(pipelineCreateInfo->vertexInputState.vertexBindings[i].binding);
1061
                vertexDescriptor.layouts[binding].stepFunction = SDLToMetal_StepFunction[pipelineCreateInfo->vertexInputState.vertexBindings[i].inputRate];
1062
                vertexDescriptor.layouts[binding].stepRate = (pipelineCreateInfo->vertexInputState.vertexBindings[i].inputRate == SDL_GPU_VERTEXINPUTRATE_INSTANCE) ? pipelineCreateInfo->vertexInputState.vertexBindings[i].instanceStepRate : 1;
1063
                vertexDescriptor.layouts[binding].stride = pipelineCreateInfo->vertexInputState.vertexBindings[i].stride;
1064
            }
1065

1066
            pipelineDescriptor.vertexDescriptor = vertexDescriptor;
1067
        }
1068

1069
        // Create the graphics pipeline
1070

1071
        pipelineState = [renderer->device newRenderPipelineStateWithDescriptor:pipelineDescriptor error:&error];
1072
        if (error != NULL) {
1073
            SDL_LogError(
1074
                SDL_LOG_CATEGORY_GPU,
1075
                "Creating render pipeline failed: %s", [[error description] UTF8String]);
1076
            return NULL;
1077
        }
1078

1079
        result = SDL_malloc(sizeof(MetalGraphicsPipeline));
1080
        result->handle = pipelineState;
1081
        result->blendConstants[0] = pipelineCreateInfo->blendConstants[0];
1082
        result->blendConstants[1] = pipelineCreateInfo->blendConstants[1];
1083
        result->blendConstants[2] = pipelineCreateInfo->blendConstants[2];
1084
        result->blendConstants[3] = pipelineCreateInfo->blendConstants[3];
1085
        result->sampleMask = pipelineCreateInfo->multisampleState.sampleMask;
1086
        result->depthStencilState = depthStencilState;
1087
        result->stencilReference = pipelineCreateInfo->depthStencilState.reference;
1088
        result->rasterizerState = pipelineCreateInfo->rasterizerState;
1089
        result->primitiveType = pipelineCreateInfo->primitiveType;
1090
        result->vertexSamplerCount = vertexShader->samplerCount;
1091
        result->vertexUniformBufferCount = vertexShader->uniformBufferCount;
1092
        result->vertexStorageBufferCount = vertexShader->storageBufferCount;
1093
        result->vertexStorageTextureCount = vertexShader->storageTextureCount;
1094
        result->fragmentSamplerCount = fragmentShader->samplerCount;
1095
        result->fragmentUniformBufferCount = fragmentShader->uniformBufferCount;
1096
        result->fragmentStorageBufferCount = fragmentShader->storageBufferCount;
1097
        result->fragmentStorageTextureCount = fragmentShader->storageTextureCount;
1098
        return (SDL_GPUGraphicsPipeline *)result;
1099
    }
1100
}
1101

1102
// Debug Naming
1103

1104
static void METAL_SetBufferName(
1105
    SDL_GPURenderer *driverData,
1106
    SDL_GPUBuffer *buffer,
1107
    const char *text)
1108
{
1109
    @autoreleasepool {
1110
        MetalRenderer *renderer = (MetalRenderer *)driverData;
1111
        MetalBufferContainer *container = (MetalBufferContainer *)buffer;
1112
        size_t textLength = SDL_strlen(text) + 1;
1113

1114
        if (renderer->debugMode) {
1115
            container->debugName = SDL_realloc(
1116
                container->debugName,
1117
                textLength);
1118

1119
            SDL_utf8strlcpy(
1120
                container->debugName,
1121
                text,
1122
                textLength);
1123

1124
            for (Uint32 i = 0; i < container->bufferCount; i += 1) {
1125
                container->buffers[i]->handle.label = @(text);
1126
            }
1127
        }
1128
    }
1129
}
1130

1131
static void METAL_SetTextureName(
1132
    SDL_GPURenderer *driverData,
1133
    SDL_GPUTexture *texture,
1134
    const char *text)
1135
{
1136
    @autoreleasepool {
1137
        MetalRenderer *renderer = (MetalRenderer *)driverData;
1138
        MetalTextureContainer *container = (MetalTextureContainer *)texture;
1139
        size_t textLength = SDL_strlen(text) + 1;
1140

1141
        if (renderer->debugMode) {
1142
            container->debugName = SDL_realloc(
1143
                container->debugName,
1144
                textLength);
1145

1146
            SDL_utf8strlcpy(
1147
                container->debugName,
1148
                text,
1149
                textLength);
1150

1151
            for (Uint32 i = 0; i < container->textureCount; i += 1) {
1152
                container->textures[i]->handle.label = @(text);
1153
            }
1154
        }
1155
    }
1156
}
1157

1158
static void METAL_InsertDebugLabel(
1159
    SDL_GPUCommandBuffer *commandBuffer,
1160
    const char *text)
1161
{
1162
    @autoreleasepool {
1163
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
1164
        NSString *label = @(text);
1165

1166
        if (metalCommandBuffer->renderEncoder) {
1167
            [metalCommandBuffer->renderEncoder insertDebugSignpost:label];
1168
        } else if (metalCommandBuffer->blitEncoder) {
1169
            [metalCommandBuffer->blitEncoder insertDebugSignpost:label];
1170
        } else if (metalCommandBuffer->computeEncoder) {
1171
            [metalCommandBuffer->computeEncoder insertDebugSignpost:label];
1172
        } else {
1173
            // Metal doesn't have insertDebugSignpost for command buffers...
1174
            [metalCommandBuffer->handle pushDebugGroup:label];
1175
            [metalCommandBuffer->handle popDebugGroup];
1176
        }
1177
    }
1178
}
1179

1180
static void METAL_PushDebugGroup(
1181
    SDL_GPUCommandBuffer *commandBuffer,
1182
    const char *name)
1183
{
1184
    @autoreleasepool {
1185
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
1186
        NSString *label = @(name);
1187

1188
        if (metalCommandBuffer->renderEncoder) {
1189
            [metalCommandBuffer->renderEncoder pushDebugGroup:label];
1190
        } else if (metalCommandBuffer->blitEncoder) {
1191
            [metalCommandBuffer->blitEncoder pushDebugGroup:label];
1192
        } else if (metalCommandBuffer->computeEncoder) {
1193
            [metalCommandBuffer->computeEncoder pushDebugGroup:label];
1194
        } else {
1195
            [metalCommandBuffer->handle pushDebugGroup:label];
1196
        }
1197
    }
1198
}
1199

1200
static void METAL_PopDebugGroup(
1201
    SDL_GPUCommandBuffer *commandBuffer)
1202
{
1203
    @autoreleasepool {
1204
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
1205

1206
        if (metalCommandBuffer->renderEncoder) {
1207
            [metalCommandBuffer->renderEncoder popDebugGroup];
1208
        } else if (metalCommandBuffer->blitEncoder) {
1209
            [metalCommandBuffer->blitEncoder popDebugGroup];
1210
        } else if (metalCommandBuffer->computeEncoder) {
1211
            [metalCommandBuffer->computeEncoder popDebugGroup];
1212
        } else {
1213
            [metalCommandBuffer->handle popDebugGroup];
1214
        }
1215
    }
1216
}
1217

1218
// Resource Creation
1219

1220
static SDL_GPUSampler *METAL_CreateSampler(
1221
    SDL_GPURenderer *driverData,
1222
    SDL_GPUSamplerCreateInfo *samplerCreateInfo)
1223
{
1224
    @autoreleasepool {
1225
        MetalRenderer *renderer = (MetalRenderer *)driverData;
1226
        MTLSamplerDescriptor *samplerDesc = [MTLSamplerDescriptor new];
1227
        id<MTLSamplerState> sampler;
1228
        MetalSampler *metalSampler;
1229

1230
        samplerDesc.rAddressMode = SDLToMetal_SamplerAddressMode[samplerCreateInfo->addressModeU];
1231
        samplerDesc.sAddressMode = SDLToMetal_SamplerAddressMode[samplerCreateInfo->addressModeV];
1232
        samplerDesc.tAddressMode = SDLToMetal_SamplerAddressMode[samplerCreateInfo->addressModeW];
1233
        samplerDesc.minFilter = SDLToMetal_MinMagFilter[samplerCreateInfo->minFilter];
1234
        samplerDesc.magFilter = SDLToMetal_MinMagFilter[samplerCreateInfo->magFilter];
1235
        samplerDesc.mipFilter = SDLToMetal_MipFilter[samplerCreateInfo->mipmapMode]; // FIXME: Is this right with non-mipmapped samplers?
1236
        samplerDesc.lodMinClamp = samplerCreateInfo->minLod;
1237
        samplerDesc.lodMaxClamp = samplerCreateInfo->maxLod;
1238
        samplerDesc.maxAnisotropy = (NSUInteger)((samplerCreateInfo->anisotropyEnable) ? samplerCreateInfo->maxAnisotropy : 1);
1239
        samplerDesc.compareFunction = (samplerCreateInfo->compareEnable) ? SDLToMetal_CompareOp[samplerCreateInfo->compareOp] : MTLCompareFunctionAlways;
1240
        samplerDesc.borderColor = MTLSamplerBorderColorTransparentBlack; // arbitrary, unused
1241

1242
        sampler = [renderer->device newSamplerStateWithDescriptor:samplerDesc];
1243
        if (sampler == NULL) {
1244
            SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to create sampler");
1245
            return NULL;
1246
        }
1247

1248
        metalSampler = (MetalSampler *)SDL_malloc(sizeof(MetalSampler));
1249
        metalSampler->handle = sampler;
1250
        return (SDL_GPUSampler *)metalSampler;
1251
    }
1252
}
1253

1254
static SDL_GPUShader *METAL_CreateShader(
1255
    SDL_GPURenderer *driverData,
1256
    SDL_GPUShaderCreateInfo *shaderCreateInfo)
1257
{
1258
    @autoreleasepool {
1259
        MetalLibraryFunction libraryFunction;
1260
        MetalShader *result;
1261

1262
        libraryFunction = METAL_INTERNAL_CompileShader(
1263
            (MetalRenderer *)driverData,
1264
            shaderCreateInfo->format,
1265
            shaderCreateInfo->code,
1266
            shaderCreateInfo->codeSize,
1267
            shaderCreateInfo->entryPointName);
1268

1269
        if (libraryFunction.library == nil || libraryFunction.function == nil) {
1270
            return NULL;
1271
        }
1272

1273
        result = SDL_malloc(sizeof(MetalShader));
1274
        result->library = libraryFunction.library;
1275
        result->function = libraryFunction.function;
1276
        result->samplerCount = shaderCreateInfo->samplerCount;
1277
        result->storageBufferCount = shaderCreateInfo->storageBufferCount;
1278
        result->storageTextureCount = shaderCreateInfo->storageTextureCount;
1279
        result->uniformBufferCount = shaderCreateInfo->uniformBufferCount;
1280
        return (SDL_GPUShader *)result;
1281
    }
1282
}
1283

1284
// This function assumes that it's called from within an autorelease pool
1285
static MetalTexture *METAL_INTERNAL_CreateTexture(
1286
    MetalRenderer *renderer,
1287
    SDL_GPUTextureCreateInfo *textureCreateInfo)
1288
{
1289
    MTLTextureDescriptor *textureDescriptor = [MTLTextureDescriptor new];
1290
    id<MTLTexture> texture;
1291
    id<MTLTexture> msaaTexture = NULL;
1292
    MetalTexture *metalTexture;
1293

1294
    textureDescriptor.textureType = SDLToMetal_TextureType[textureCreateInfo->type];
1295
    textureDescriptor.pixelFormat = SDLToMetal_SurfaceFormat[textureCreateInfo->format];
1296
    // This format isn't natively supported so let's swizzle!
1297
    if (textureCreateInfo->format == SDL_GPU_TEXTUREFORMAT_B4G4R4A4_UNORM) {
1298
        textureDescriptor.swizzle = MTLTextureSwizzleChannelsMake(
1299
            MTLTextureSwizzleBlue,
1300
            MTLTextureSwizzleGreen,
1301
            MTLTextureSwizzleRed,
1302
            MTLTextureSwizzleAlpha);
1303
    }
1304

1305
    textureDescriptor.width = textureCreateInfo->width;
1306
    textureDescriptor.height = textureCreateInfo->height;
1307
    textureDescriptor.depth = (textureCreateInfo->type == SDL_GPU_TEXTURETYPE_3D) ? textureCreateInfo->layerCountOrDepth : 1;
1308
    textureDescriptor.mipmapLevelCount = textureCreateInfo->levelCount;
1309
    textureDescriptor.sampleCount = 1;
1310
    textureDescriptor.arrayLength = (textureCreateInfo->type == SDL_GPU_TEXTURETYPE_2D_ARRAY) ? textureCreateInfo->layerCountOrDepth : 1;
1311
    textureDescriptor.storageMode = MTLStorageModePrivate;
1312

1313
    textureDescriptor.usage = 0;
1314
    if (textureCreateInfo->usageFlags & (SDL_GPU_TEXTUREUSAGE_COLOR_TARGET |
1315
                                         SDL_GPU_TEXTUREUSAGE_DEPTH_STENCIL_TARGET)) {
1316
        textureDescriptor.usage |= MTLTextureUsageRenderTarget;
1317
    }
1318
    if (textureCreateInfo->usageFlags & (SDL_GPU_TEXTUREUSAGE_SAMPLER |
1319
                                         SDL_GPU_TEXTUREUSAGE_GRAPHICS_STORAGE_READ |
1320
                                         SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_READ)) {
1321
        textureDescriptor.usage |= MTLTextureUsageShaderRead;
1322
    }
1323
    if (textureCreateInfo->usageFlags & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) {
1324
        textureDescriptor.usage |= MTLTextureUsageShaderWrite;
1325
    }
1326

1327
    texture = [renderer->device newTextureWithDescriptor:textureDescriptor];
1328
    if (texture == NULL) {
1329
        SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to create MTLTexture!");
1330
        return NULL;
1331
    }
1332

1333
    // Create the MSAA texture, if needed
1334
    if (textureCreateInfo->sampleCount > SDL_GPU_SAMPLECOUNT_1 && textureCreateInfo->type == SDL_GPU_TEXTURETYPE_2D) {
1335
        textureDescriptor.textureType = MTLTextureType2DMultisample;
1336
        textureDescriptor.sampleCount = SDLToMetal_SampleCount[textureCreateInfo->sampleCount];
1337
        textureDescriptor.usage = MTLTextureUsageRenderTarget;
1338

1339
        msaaTexture = [renderer->device newTextureWithDescriptor:textureDescriptor];
1340
        if (msaaTexture == NULL) {
1341
            SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to create MSAA MTLTexture!");
1342
            return NULL;
1343
        }
1344
    }
1345

1346
    metalTexture = (MetalTexture *)SDL_malloc(sizeof(MetalTexture));
1347
    metalTexture->handle = texture;
1348
    metalTexture->msaaHandle = msaaTexture;
1349
    SDL_AtomicSet(&metalTexture->referenceCount, 0);
1350
    return metalTexture;
1351
}
1352

1353
static bool METAL_SupportsSampleCount(
1354
    SDL_GPURenderer *driverData,
1355
    SDL_GPUTextureFormat format,
1356
    SDL_GPUSampleCount sampleCount)
1357
{
1358
    @autoreleasepool {
1359
        MetalRenderer *renderer = (MetalRenderer *)driverData;
1360
        NSUInteger mtlSampleCount = SDLToMetal_SampleCount[sampleCount];
1361
        return [renderer->device supportsTextureSampleCount:mtlSampleCount];
1362
    }
1363
}
1364

1365
static SDL_GPUTexture *METAL_CreateTexture(
1366
    SDL_GPURenderer *driverData,
1367
    SDL_GPUTextureCreateInfo *textureCreateInfo)
1368
{
1369
    @autoreleasepool {
1370
        MetalRenderer *renderer = (MetalRenderer *)driverData;
1371
        MetalTextureContainer *container;
1372
        MetalTexture *texture;
1373

1374
        texture = METAL_INTERNAL_CreateTexture(
1375
            renderer,
1376
            textureCreateInfo);
1377

1378
        if (texture == NULL) {
1379
            SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to create texture!");
1380
            return NULL;
1381
        }
1382

1383
        container = SDL_malloc(sizeof(MetalTextureContainer));
1384
        container->canBeCycled = 1;
1385
        container->header.info = *textureCreateInfo;
1386
        container->activeTexture = texture;
1387
        container->textureCapacity = 1;
1388
        container->textureCount = 1;
1389
        container->textures = SDL_malloc(
1390
            container->textureCapacity * sizeof(MetalTexture *));
1391
        container->textures[0] = texture;
1392
        container->debugName = NULL;
1393

1394
        return (SDL_GPUTexture *)container;
1395
    }
1396
}
1397

1398
// This function assumes that it's called from within an autorelease pool
1399
static MetalTexture *METAL_INTERNAL_PrepareTextureForWrite(
1400
    MetalRenderer *renderer,
1401
    MetalTextureContainer *container,
1402
    bool cycle)
1403
{
1404
    Uint32 i;
1405

1406
    // Cycle the active texture handle if needed
1407
    if (cycle && container->canBeCycled) {
1408
        for (i = 0; i < container->textureCount; i += 1) {
1409
            if (SDL_AtomicGet(&container->textures[i]->referenceCount) == 0) {
1410
                container->activeTexture = container->textures[i];
1411
                return container->activeTexture;
1412
            }
1413
        }
1414

1415
        EXPAND_ARRAY_IF_NEEDED(
1416
            container->textures,
1417
            MetalTexture *,
1418
            container->textureCount + 1,
1419
            container->textureCapacity,
1420
            container->textureCapacity + 1);
1421

1422
        container->textures[container->textureCount] = METAL_INTERNAL_CreateTexture(
1423
            renderer,
1424
            &container->header.info);
1425
        container->textureCount += 1;
1426

1427
        container->activeTexture = container->textures[container->textureCount - 1];
1428

1429
        if (renderer->debugMode && container->debugName != NULL) {
1430
            container->activeTexture->handle.label = @(container->debugName);
1431
        }
1432
    }
1433

1434
    return container->activeTexture;
1435
}
1436

1437
// This function assumes that it's called from within an autorelease pool
1438
static MetalBuffer *METAL_INTERNAL_CreateBuffer(
1439
    MetalRenderer *renderer,
1440
    Uint32 sizeInBytes,
1441
    MTLResourceOptions resourceOptions)
1442
{
1443
    id<MTLBuffer> bufferHandle;
1444
    MetalBuffer *metalBuffer;
1445

1446
    // Storage buffers have to be 4-aligned, so might as well align them all
1447
    sizeInBytes = METAL_INTERNAL_NextHighestAlignment(sizeInBytes, 4);
1448

1449
    bufferHandle = [renderer->device newBufferWithLength:sizeInBytes options:resourceOptions];
1450
    if (bufferHandle == NULL) {
1451
        SDL_LogError(SDL_LOG_CATEGORY_GPU, "Could not create buffer");
1452
        return NULL;
1453
    }
1454

1455
    metalBuffer = SDL_malloc(sizeof(MetalBuffer));
1456
    metalBuffer->handle = bufferHandle;
1457
    SDL_AtomicSet(&metalBuffer->referenceCount, 0);
1458

1459
    return metalBuffer;
1460
}
1461

1462
// This function assumes that it's called from within an autorelease pool
1463
static MetalBufferContainer *METAL_INTERNAL_CreateBufferContainer(
1464
    MetalRenderer *renderer,
1465
    Uint32 sizeInBytes,
1466
    bool isPrivate,
1467
    bool isWriteOnly)
1468
{
1469
    MetalBufferContainer *container = SDL_malloc(sizeof(MetalBufferContainer));
1470
    MTLResourceOptions resourceOptions;
1471

1472
    container->size = sizeInBytes;
1473
    container->bufferCapacity = 1;
1474
    container->bufferCount = 1;
1475
    container->buffers = SDL_malloc(
1476
        container->bufferCapacity * sizeof(MetalBuffer *));
1477
    container->isPrivate = isPrivate;
1478
    container->isWriteOnly = isWriteOnly;
1479
    container->debugName = NULL;
1480

1481
    if (isPrivate) {
1482
        resourceOptions = MTLResourceStorageModePrivate;
1483
    } else {
1484
        if (isWriteOnly) {
1485
            resourceOptions = MTLResourceCPUCacheModeWriteCombined;
1486
        } else {
1487
            resourceOptions = MTLResourceCPUCacheModeDefaultCache;
1488
        }
1489
    }
1490

1491
    container->buffers[0] = METAL_INTERNAL_CreateBuffer(
1492
        renderer,
1493
        sizeInBytes,
1494
        resourceOptions);
1495
    container->activeBuffer = container->buffers[0];
1496

1497
    return container;
1498
}
1499

1500
static SDL_GPUBuffer *METAL_CreateBuffer(
1501
    SDL_GPURenderer *driverData,
1502
    SDL_GPUBufferUsageFlags usageFlags,
1503
    Uint32 sizeInBytes)
1504
{
1505
    @autoreleasepool {
1506
        return (SDL_GPUBuffer *)METAL_INTERNAL_CreateBufferContainer(
1507
            (MetalRenderer *)driverData,
1508
            sizeInBytes,
1509
            true,
1510
            false);
1511
    }
1512
}
1513

1514
static SDL_GPUTransferBuffer *METAL_CreateTransferBuffer(
1515
    SDL_GPURenderer *driverData,
1516
    SDL_GPUTransferBufferUsage usage,
1517
    Uint32 sizeInBytes)
1518
{
1519
    @autoreleasepool {
1520
        return (SDL_GPUTransferBuffer *)METAL_INTERNAL_CreateBufferContainer(
1521
            (MetalRenderer *)driverData,
1522
            sizeInBytes,
1523
            false,
1524
            usage == SDL_GPU_TRANSFERBUFFERUSAGE_UPLOAD);
1525
    }
1526
}
1527

1528
// This function assumes that it's called from within an autorelease pool
1529
static MetalUniformBuffer *METAL_INTERNAL_CreateUniformBuffer(
1530
    MetalRenderer *renderer,
1531
    Uint32 sizeInBytes)
1532
{
1533
    MetalUniformBuffer *uniformBuffer;
1534
    id<MTLBuffer> bufferHandle;
1535

1536
    bufferHandle = [renderer->device newBufferWithLength:sizeInBytes options:MTLResourceCPUCacheModeWriteCombined];
1537
    if (bufferHandle == nil) {
1538
        SDL_LogError(SDL_LOG_CATEGORY_GPU, "Could not create uniform buffer");
1539
        return NULL;
1540
    }
1541

1542
    uniformBuffer = SDL_malloc(sizeof(MetalUniformBuffer));
1543
    uniformBuffer->handle = bufferHandle;
1544
    uniformBuffer->writeOffset = 0;
1545
    uniformBuffer->drawOffset = 0;
1546

1547
    return uniformBuffer;
1548
}
1549

1550
// This function assumes that it's called from within an autorelease pool
1551
static MetalBuffer *METAL_INTERNAL_PrepareBufferForWrite(
1552
    MetalRenderer *renderer,
1553
    MetalBufferContainer *container,
1554
    bool cycle)
1555
{
1556
    MTLResourceOptions resourceOptions;
1557
    Uint32 i;
1558

1559
    // Cycle if needed
1560
    if (cycle && SDL_AtomicGet(&container->activeBuffer->referenceCount) > 0) {
1561
        for (i = 0; i < container->bufferCount; i += 1) {
1562
            if (SDL_AtomicGet(&container->buffers[i]->referenceCount) == 0) {
1563
                container->activeBuffer = container->buffers[i];
1564
                return container->activeBuffer;
1565
            }
1566
        }
1567

1568
        EXPAND_ARRAY_IF_NEEDED(
1569
            container->buffers,
1570
            MetalBuffer *,
1571
            container->bufferCount + 1,
1572
            container->bufferCapacity,
1573
            container->bufferCapacity + 1);
1574

1575
        if (container->isPrivate) {
1576
            resourceOptions = MTLResourceStorageModePrivate;
1577
        } else {
1578
            if (container->isWriteOnly) {
1579
                resourceOptions = MTLResourceCPUCacheModeWriteCombined;
1580
            } else {
1581
                resourceOptions = MTLResourceCPUCacheModeDefaultCache;
1582
            }
1583
        }
1584

1585
        container->buffers[container->bufferCount] = METAL_INTERNAL_CreateBuffer(
1586
            renderer,
1587
            container->size,
1588
            resourceOptions);
1589
        container->bufferCount += 1;
1590

1591
        container->activeBuffer = container->buffers[container->bufferCount - 1];
1592

1593
        if (renderer->debugMode && container->debugName != NULL) {
1594
            container->activeBuffer->handle.label = @(container->debugName);
1595
        }
1596
    }
1597

1598
    return container->activeBuffer;
1599
}
1600

1601
// TransferBuffer Data
1602

1603
static void *METAL_MapTransferBuffer(
1604
    SDL_GPURenderer *driverData,
1605
    SDL_GPUTransferBuffer *transferBuffer,
1606
    bool cycle)
1607
{
1608
    @autoreleasepool {
1609
        MetalRenderer *renderer = (MetalRenderer *)driverData;
1610
        MetalBufferContainer *container = (MetalBufferContainer *)transferBuffer;
1611
        MetalBuffer *buffer = METAL_INTERNAL_PrepareBufferForWrite(renderer, container, cycle);
1612
        return [buffer->handle contents];
1613
    }
1614
}
1615

1616
static void METAL_UnmapTransferBuffer(
1617
    SDL_GPURenderer *driverData,
1618
    SDL_GPUTransferBuffer *transferBuffer)
1619
{
1620
#ifdef SDL_PLATFORM_MACOS
1621
    @autoreleasepool {
1622
        // FIXME: Is this necessary?
1623
        MetalBufferContainer *container = (MetalBufferContainer *)transferBuffer;
1624
        MetalBuffer *buffer = container->activeBuffer;
1625
        if (buffer->handle.storageMode == MTLStorageModeManaged) {
1626
            [buffer->handle didModifyRange:NSMakeRange(0, container->size)];
1627
        }
1628
    }
1629
#endif
1630
}
1631

1632
// Copy Pass
1633

1634
static void METAL_BeginCopyPass(
1635
    SDL_GPUCommandBuffer *commandBuffer)
1636
{
1637
    @autoreleasepool {
1638
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
1639
        metalCommandBuffer->blitEncoder = [metalCommandBuffer->handle blitCommandEncoder];
1640
    }
1641
}
1642

1643
static void METAL_UploadToTexture(
1644
    SDL_GPUCommandBuffer *commandBuffer,
1645
    SDL_GPUTextureTransferInfo *source,
1646
    SDL_GPUTextureRegion *destination,
1647
    bool cycle)
1648
{
1649
    @autoreleasepool {
1650
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
1651
        MetalRenderer *renderer = metalCommandBuffer->renderer;
1652
        MetalBufferContainer *bufferContainer = (MetalBufferContainer *)source->transferBuffer;
1653
        MetalTextureContainer *textureContainer = (MetalTextureContainer *)destination->texture;
1654

1655
        MetalTexture *metalTexture = METAL_INTERNAL_PrepareTextureForWrite(renderer, textureContainer, cycle);
1656

1657
        [metalCommandBuffer->blitEncoder
1658
                 copyFromBuffer:bufferContainer->activeBuffer->handle
1659
                   sourceOffset:source->offset
1660
              sourceBytesPerRow:BytesPerRow(destination->w, textureContainer->header.info.format)
1661
            sourceBytesPerImage:BytesPerImage(destination->w, destination->h, textureContainer->header.info.format)
1662
                     sourceSize:MTLSizeMake(destination->w, destination->h, destination->d)
1663
                      toTexture:metalTexture->handle
1664
               destinationSlice:destination->layer
1665
               destinationLevel:destination->mipLevel
1666
              destinationOrigin:MTLOriginMake(destination->x, destination->y, destination->z)];
1667

1668
        METAL_INTERNAL_TrackTexture(metalCommandBuffer, metalTexture);
1669
        METAL_INTERNAL_TrackBuffer(metalCommandBuffer, bufferContainer->activeBuffer);
1670
    }
1671
}
1672

1673
static void METAL_UploadToBuffer(
1674
    SDL_GPUCommandBuffer *commandBuffer,
1675
    SDL_GPUTransferBufferLocation *source,
1676
    SDL_GPUBufferRegion *destination,
1677
    bool cycle)
1678
{
1679
    @autoreleasepool {
1680
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
1681
        MetalRenderer *renderer = metalCommandBuffer->renderer;
1682
        MetalBufferContainer *transferContainer = (MetalBufferContainer *)source->transferBuffer;
1683
        MetalBufferContainer *bufferContainer = (MetalBufferContainer *)destination->buffer;
1684

1685
        MetalBuffer *metalBuffer = METAL_INTERNAL_PrepareBufferForWrite(
1686
            renderer,
1687
            bufferContainer,
1688
            cycle);
1689

1690
        [metalCommandBuffer->blitEncoder
1691
               copyFromBuffer:transferContainer->activeBuffer->handle
1692
                 sourceOffset:source->offset
1693
                     toBuffer:metalBuffer->handle
1694
            destinationOffset:destination->offset
1695
                         size:destination->size];
1696

1697
        METAL_INTERNAL_TrackBuffer(metalCommandBuffer, metalBuffer);
1698
        METAL_INTERNAL_TrackBuffer(metalCommandBuffer, transferContainer->activeBuffer);
1699
    }
1700
}
1701

1702
static void METAL_CopyTextureToTexture(
1703
    SDL_GPUCommandBuffer *commandBuffer,
1704
    SDL_GPUTextureLocation *source,
1705
    SDL_GPUTextureLocation *destination,
1706
    Uint32 w,
1707
    Uint32 h,
1708
    Uint32 d,
1709
    bool cycle)
1710
{
1711
    @autoreleasepool {
1712
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
1713
        MetalRenderer *renderer = metalCommandBuffer->renderer;
1714
        MetalTextureContainer *srcContainer = (MetalTextureContainer *)source->texture;
1715
        MetalTextureContainer *dstContainer = (MetalTextureContainer *)destination->texture;
1716

1717
        MetalTexture *srcTexture = srcContainer->activeTexture;
1718
        MetalTexture *dstTexture = METAL_INTERNAL_PrepareTextureForWrite(
1719
            renderer,
1720
            dstContainer,
1721
            cycle);
1722

1723
        [metalCommandBuffer->blitEncoder
1724
              copyFromTexture:srcTexture->handle
1725
                  sourceSlice:source->layer
1726
                  sourceLevel:source->mipLevel
1727
                 sourceOrigin:MTLOriginMake(source->x, source->y, source->z)
1728
                   sourceSize:MTLSizeMake(w, h, d)
1729
                    toTexture:dstTexture->handle
1730
             destinationSlice:destination->layer
1731
             destinationLevel:destination->mipLevel
1732
            destinationOrigin:MTLOriginMake(destination->x, destination->y, destination->z)];
1733

1734
        METAL_INTERNAL_TrackTexture(metalCommandBuffer, srcTexture);
1735
        METAL_INTERNAL_TrackTexture(metalCommandBuffer, dstTexture);
1736
    }
1737
}
1738

1739
static void METAL_CopyBufferToBuffer(
1740
    SDL_GPUCommandBuffer *commandBuffer,
1741
    SDL_GPUBufferLocation *source,
1742
    SDL_GPUBufferLocation *destination,
1743
    Uint32 size,
1744
    bool cycle)
1745
{
1746
    @autoreleasepool {
1747
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
1748
        MetalRenderer *renderer = metalCommandBuffer->renderer;
1749
        MetalBufferContainer *srcContainer = (MetalBufferContainer *)source->buffer;
1750
        MetalBufferContainer *dstContainer = (MetalBufferContainer *)destination->buffer;
1751

1752
        MetalBuffer *srcBuffer = srcContainer->activeBuffer;
1753
        MetalBuffer *dstBuffer = METAL_INTERNAL_PrepareBufferForWrite(
1754
            renderer,
1755
            dstContainer,
1756
            cycle);
1757

1758
        [metalCommandBuffer->blitEncoder
1759
               copyFromBuffer:srcBuffer->handle
1760
                 sourceOffset:source->offset
1761
                     toBuffer:dstBuffer->handle
1762
            destinationOffset:destination->offset
1763
                         size:size];
1764

1765
        METAL_INTERNAL_TrackBuffer(metalCommandBuffer, srcBuffer);
1766
        METAL_INTERNAL_TrackBuffer(metalCommandBuffer, dstBuffer);
1767
    }
1768
}
1769

1770
static void METAL_DownloadFromTexture(
1771
    SDL_GPUCommandBuffer *commandBuffer,
1772
    SDL_GPUTextureRegion *source,
1773
    SDL_GPUTextureTransferInfo *destination)
1774
{
1775
    @autoreleasepool {
1776
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
1777
        MetalRenderer *renderer = metalCommandBuffer->renderer;
1778
        MetalTextureContainer *textureContainer = (MetalTextureContainer *)source->texture;
1779
        MetalTexture *metalTexture = textureContainer->activeTexture;
1780
        MetalBufferContainer *bufferContainer = (MetalBufferContainer *)destination->transferBuffer;
1781
        Uint32 bufferStride = destination->imagePitch;
1782
        Uint32 bufferImageHeight = destination->imageHeight;
1783
        Uint32 bytesPerRow, bytesPerDepthSlice;
1784

1785
        MetalBuffer *dstBuffer = METAL_INTERNAL_PrepareBufferForWrite(
1786
            renderer,
1787
            bufferContainer,
1788
            false);
1789

1790
        MTLOrigin regionOrigin = MTLOriginMake(
1791
            source->x,
1792
            source->y,
1793
            source->z);
1794

1795
        MTLSize regionSize = MTLSizeMake(
1796
            source->w,
1797
            source->h,
1798
            source->d);
1799

1800
        if (bufferStride == 0 || bufferImageHeight == 0) {
1801
            bufferStride = source->w;
1802
            bufferImageHeight = source->h;
1803
        }
1804

1805
        bytesPerRow = BytesPerRow(bufferStride, textureContainer->header.info.format);
1806
        bytesPerDepthSlice = bytesPerRow * bufferImageHeight;
1807

1808
        [metalCommandBuffer->blitEncoder
1809
                     copyFromTexture:metalTexture->handle
1810
                         sourceSlice:source->layer
1811
                         sourceLevel:source->mipLevel
1812
                        sourceOrigin:regionOrigin
1813
                          sourceSize:regionSize
1814
                            toBuffer:dstBuffer->handle
1815
                   destinationOffset:destination->offset
1816
              destinationBytesPerRow:bytesPerRow
1817
            destinationBytesPerImage:bytesPerDepthSlice];
1818

1819
        METAL_INTERNAL_TrackTexture(metalCommandBuffer, metalTexture);
1820
        METAL_INTERNAL_TrackBuffer(metalCommandBuffer, dstBuffer);
1821
    }
1822
}
1823

1824
static void METAL_DownloadFromBuffer(
1825
    SDL_GPUCommandBuffer *commandBuffer,
1826
    SDL_GPUBufferRegion *source,
1827
    SDL_GPUTransferBufferLocation *destination)
1828
{
1829
    SDL_GPUBufferLocation sourceLocation;
1830
    sourceLocation.buffer = source->buffer;
1831
    sourceLocation.offset = source->offset;
1832

1833
    METAL_CopyBufferToBuffer(
1834
        commandBuffer,
1835
        &sourceLocation,
1836
        (SDL_GPUBufferLocation *)destination,
1837
        source->size,
1838
        false);
1839
}
1840

1841
static void METAL_EndCopyPass(
1842
    SDL_GPUCommandBuffer *commandBuffer)
1843
{
1844
    @autoreleasepool {
1845
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
1846
        [metalCommandBuffer->blitEncoder endEncoding];
1847
        metalCommandBuffer->blitEncoder = nil;
1848
    }
1849
}
1850

1851
static void METAL_GenerateMipmaps(
1852
    SDL_GPUCommandBuffer *commandBuffer,
1853
    SDL_GPUTexture *texture)
1854
{
1855
    @autoreleasepool {
1856
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
1857
        MetalTextureContainer *container = (MetalTextureContainer *)texture;
1858
        MetalTexture *metalTexture = container->activeTexture;
1859

1860
        METAL_BeginCopyPass(commandBuffer);
1861
        [metalCommandBuffer->blitEncoder
1862
            generateMipmapsForTexture:metalTexture->handle];
1863
        METAL_EndCopyPass(commandBuffer);
1864

1865
        METAL_INTERNAL_TrackTexture(metalCommandBuffer, metalTexture);
1866
    }
1867
}
1868

1869
// Graphics State
1870

1871
static void METAL_INTERNAL_AllocateCommandBuffers(
1872
    MetalRenderer *renderer,
1873
    Uint32 allocateCount)
1874
{
1875
    MetalCommandBuffer *commandBuffer;
1876

1877
    renderer->availableCommandBufferCapacity += allocateCount;
1878

1879
    renderer->availableCommandBuffers = SDL_realloc(
1880
        renderer->availableCommandBuffers,
1881
        sizeof(MetalCommandBuffer *) * renderer->availableCommandBufferCapacity);
1882

1883
    for (Uint32 i = 0; i < allocateCount; i += 1) {
1884
        commandBuffer = SDL_calloc(1, sizeof(MetalCommandBuffer));
1885
        commandBuffer->renderer = renderer;
1886

1887
        // The native Metal command buffer is created in METAL_AcquireCommandBuffer
1888

1889
        commandBuffer->windowDataCapacity = 1;
1890
        commandBuffer->windowDataCount = 0;
1891
        commandBuffer->windowDatas = SDL_malloc(
1892
            commandBuffer->windowDataCapacity * sizeof(MetalWindowData *));
1893

1894
        // Reference Counting
1895
        commandBuffer->usedBufferCapacity = 4;
1896
        commandBuffer->usedBufferCount = 0;
1897
        commandBuffer->usedBuffers = SDL_malloc(
1898
            commandBuffer->usedBufferCapacity * sizeof(MetalBuffer *));
1899

1900
        commandBuffer->usedTextureCapacity = 4;
1901
        commandBuffer->usedTextureCount = 0;
1902
        commandBuffer->usedTextures = SDL_malloc(
1903
            commandBuffer->usedTextureCapacity * sizeof(MetalTexture *));
1904

1905
        renderer->availableCommandBuffers[renderer->availableCommandBufferCount] = commandBuffer;
1906
        renderer->availableCommandBufferCount += 1;
1907
    }
1908
}
1909

1910
static MetalCommandBuffer *METAL_INTERNAL_GetInactiveCommandBufferFromPool(
1911
    MetalRenderer *renderer)
1912
{
1913
    MetalCommandBuffer *commandBuffer;
1914

1915
    if (renderer->availableCommandBufferCount == 0) {
1916
        METAL_INTERNAL_AllocateCommandBuffers(
1917
            renderer,
1918
            renderer->availableCommandBufferCapacity);
1919
    }
1920

1921
    commandBuffer = renderer->availableCommandBuffers[renderer->availableCommandBufferCount - 1];
1922
    renderer->availableCommandBufferCount -= 1;
1923

1924
    return commandBuffer;
1925
}
1926

1927
static Uint8 METAL_INTERNAL_CreateFence(
1928
    MetalRenderer *renderer)
1929
{
1930
    MetalFence *fence;
1931

1932
    fence = SDL_malloc(sizeof(MetalFence));
1933
    SDL_AtomicSet(&fence->complete, 0);
1934

1935
    // Add it to the available pool
1936
    // FIXME: Should this be EXPAND_IF_NEEDED?
1937
    if (renderer->availableFenceCount >= renderer->availableFenceCapacity) {
1938
        renderer->availableFenceCapacity *= 2;
1939

1940
        renderer->availableFences = SDL_realloc(
1941
            renderer->availableFences,
1942
            sizeof(MetalFence *) * renderer->availableFenceCapacity);
1943
    }
1944

1945
    renderer->availableFences[renderer->availableFenceCount] = fence;
1946
    renderer->availableFenceCount += 1;
1947

1948
    return 1;
1949
}
1950

1951
static Uint8 METAL_INTERNAL_AcquireFence(
1952
    MetalRenderer *renderer,
1953
    MetalCommandBuffer *commandBuffer)
1954
{
1955
    MetalFence *fence;
1956

1957
    // Acquire a fence from the pool
1958
    SDL_LockMutex(renderer->fenceLock);
1959

1960
    if (renderer->availableFenceCount == 0) {
1961
        if (!METAL_INTERNAL_CreateFence(renderer)) {
1962
            SDL_UnlockMutex(renderer->fenceLock);
1963
            SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to create fence!");
1964
            return 0;
1965
        }
1966
    }
1967

1968
    fence = renderer->availableFences[renderer->availableFenceCount - 1];
1969
    renderer->availableFenceCount -= 1;
1970

1971
    SDL_UnlockMutex(renderer->fenceLock);
1972

1973
    // Associate the fence with the command buffer
1974
    commandBuffer->fence = fence;
1975
    SDL_AtomicSet(&fence->complete, 0); // FIXME: Is this right?
1976

1977
    return 1;
1978
}
1979

1980
static SDL_GPUCommandBuffer *METAL_AcquireCommandBuffer(
1981
    SDL_GPURenderer *driverData)
1982
{
1983
    @autoreleasepool {
1984
        MetalRenderer *renderer = (MetalRenderer *)driverData;
1985
        MetalCommandBuffer *commandBuffer;
1986

1987
        SDL_LockMutex(renderer->acquireCommandBufferLock);
1988

1989
        commandBuffer = METAL_INTERNAL_GetInactiveCommandBufferFromPool(renderer);
1990
        commandBuffer->handle = [renderer->queue commandBuffer];
1991

1992
        commandBuffer->graphicsPipeline = NULL;
1993
        commandBuffer->computePipeline = NULL;
1994
        for (Uint32 i = 0; i < MAX_UNIFORM_BUFFERS_PER_STAGE; i += 1) {
1995
            commandBuffer->vertexUniformBuffers[i] = NULL;
1996
            commandBuffer->fragmentUniformBuffers[i] = NULL;
1997
            commandBuffer->computeUniformBuffers[i] = NULL;
1998
        }
1999

2000
        // FIXME: Do we actually need to set this?
2001
        commandBuffer->needVertexSamplerBind = true;
2002
        commandBuffer->needVertexStorageTextureBind = true;
2003
        commandBuffer->needVertexStorageBufferBind = true;
2004
        commandBuffer->needVertexUniformBind = true;
2005
        commandBuffer->needFragmentSamplerBind = true;
2006
        commandBuffer->needFragmentStorageTextureBind = true;
2007
        commandBuffer->needFragmentStorageBufferBind = true;
2008
        commandBuffer->needFragmentUniformBind = true;
2009
        commandBuffer->needComputeBufferBind = true;
2010
        commandBuffer->needComputeTextureBind = true;
2011
        commandBuffer->needComputeUniformBind = true;
2012

2013
        METAL_INTERNAL_AcquireFence(renderer, commandBuffer);
2014
        commandBuffer->autoReleaseFence = 1;
2015

2016
        SDL_UnlockMutex(renderer->acquireCommandBufferLock);
2017

2018
        return (SDL_GPUCommandBuffer *)commandBuffer;
2019
    }
2020
}
2021

2022
// This function assumes that it's called from within an autorelease pool
2023
static MetalUniformBuffer *METAL_INTERNAL_AcquireUniformBufferFromPool(
2024
    MetalCommandBuffer *commandBuffer)
2025
{
2026
    MetalRenderer *renderer = commandBuffer->renderer;
2027
    MetalUniformBuffer *uniformBuffer;
2028

2029
    SDL_LockMutex(renderer->acquireUniformBufferLock);
2030

2031
    if (renderer->uniformBufferPoolCount > 0) {
2032
        uniformBuffer = renderer->uniformBufferPool[renderer->uniformBufferPoolCount - 1];
2033
        renderer->uniformBufferPoolCount -= 1;
2034
    } else {
2035
        uniformBuffer = METAL_INTERNAL_CreateUniformBuffer(
2036
            renderer,
2037
            UNIFORM_BUFFER_SIZE);
2038
    }
2039

2040
    SDL_UnlockMutex(renderer->acquireUniformBufferLock);
2041

2042
    METAL_INTERNAL_TrackUniformBuffer(commandBuffer, uniformBuffer);
2043

2044
    return uniformBuffer;
2045
}
2046

2047
static void METAL_INTERNAL_ReturnUniformBufferToPool(
2048
    MetalRenderer *renderer,
2049
    MetalUniformBuffer *uniformBuffer)
2050
{
2051
    if (renderer->uniformBufferPoolCount >= renderer->uniformBufferPoolCapacity) {
2052
        renderer->uniformBufferPoolCapacity *= 2;
2053
        renderer->uniformBufferPool = SDL_realloc(
2054
            renderer->uniformBufferPool,
2055
            renderer->uniformBufferPoolCapacity * sizeof(MetalUniformBuffer *));
2056
    }
2057

2058
    renderer->uniformBufferPool[renderer->uniformBufferPoolCount] = uniformBuffer;
2059
    renderer->uniformBufferPoolCount += 1;
2060

2061
    uniformBuffer->writeOffset = 0;
2062
    uniformBuffer->drawOffset = 0;
2063
}
2064

2065
static void METAL_BeginRenderPass(
2066
    SDL_GPUCommandBuffer *commandBuffer,
2067
    SDL_GPUColorAttachmentInfo *colorAttachmentInfos,
2068
    Uint32 colorAttachmentCount,
2069
    SDL_GPUDepthStencilAttachmentInfo *depthStencilAttachmentInfo)
2070
{
2071
    @autoreleasepool {
2072
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2073
        MetalRenderer *renderer = metalCommandBuffer->renderer;
2074
        MTLRenderPassDescriptor *passDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
2075
        Uint32 vpWidth = UINT_MAX;
2076
        Uint32 vpHeight = UINT_MAX;
2077
        MTLViewport viewport;
2078
        MTLScissorRect scissorRect;
2079

2080
        for (Uint32 i = 0; i < colorAttachmentCount; i += 1) {
2081
            MetalTextureContainer *container = (MetalTextureContainer *)colorAttachmentInfos[i].texture;
2082
            MetalTexture *texture = METAL_INTERNAL_PrepareTextureForWrite(
2083
                renderer,
2084
                container,
2085
                colorAttachmentInfos[i].cycle);
2086

2087
            if (texture->msaaHandle) {
2088
                passDescriptor.colorAttachments[i].texture = texture->msaaHandle;
2089
                passDescriptor.colorAttachments[i].resolveTexture = texture->handle;
2090
            } else {
2091
                passDescriptor.colorAttachments[i].texture = texture->handle;
2092
            }
2093
            passDescriptor.colorAttachments[i].level = colorAttachmentInfos[i].mipLevel;
2094
            if (container->header.info.type == SDL_GPU_TEXTURETYPE_3D) {
2095
                passDescriptor.colorAttachments[i].depthPlane = colorAttachmentInfos[i].layerOrDepthPlane;
2096
            } else {
2097
                passDescriptor.colorAttachments[i].slice = colorAttachmentInfos[i].layerOrDepthPlane;
2098
            }
2099
            passDescriptor.colorAttachments[i].clearColor = MTLClearColorMake(
2100
                colorAttachmentInfos[i].clearColor.r,
2101
                colorAttachmentInfos[i].clearColor.g,
2102
                colorAttachmentInfos[i].clearColor.b,
2103
                colorAttachmentInfos[i].clearColor.a);
2104
            passDescriptor.colorAttachments[i].loadAction = SDLToMetal_LoadOp[colorAttachmentInfos[i].loadOp];
2105
            passDescriptor.colorAttachments[i].storeAction = SDLToMetal_StoreOp(
2106
                colorAttachmentInfos[i].storeOp,
2107
                texture->msaaHandle ? 1 : 0);
2108

2109
            METAL_INTERNAL_TrackTexture(metalCommandBuffer, texture);
2110
        }
2111

2112
        if (depthStencilAttachmentInfo != NULL) {
2113
            MetalTextureContainer *container = (MetalTextureContainer *)depthStencilAttachmentInfo->texture;
2114
            MetalTexture *texture = METAL_INTERNAL_PrepareTextureForWrite(
2115
                renderer,
2116
                container,
2117
                depthStencilAttachmentInfo->cycle);
2118

2119
            if (texture->msaaHandle) {
2120
                passDescriptor.depthAttachment.texture = texture->msaaHandle;
2121
                passDescriptor.depthAttachment.resolveTexture = texture->handle;
2122
            } else {
2123
                passDescriptor.depthAttachment.texture = texture->handle;
2124
            }
2125
            passDescriptor.depthAttachment.loadAction = SDLToMetal_LoadOp[depthStencilAttachmentInfo->loadOp];
2126
            passDescriptor.depthAttachment.storeAction = SDLToMetal_StoreOp(
2127
                depthStencilAttachmentInfo->storeOp,
2128
                texture->msaaHandle ? 1 : 0);
2129
            passDescriptor.depthAttachment.clearDepth = depthStencilAttachmentInfo->depthStencilClearValue.depth;
2130

2131
            if (IsStencilFormat(container->header.info.format)) {
2132
                if (texture->msaaHandle) {
2133
                    passDescriptor.stencilAttachment.texture = texture->msaaHandle;
2134
                    passDescriptor.stencilAttachment.resolveTexture = texture->handle;
2135
                } else {
2136
                    passDescriptor.stencilAttachment.texture = texture->handle;
2137
                }
2138
                passDescriptor.stencilAttachment.loadAction = SDLToMetal_LoadOp[depthStencilAttachmentInfo->loadOp];
2139
                passDescriptor.stencilAttachment.storeAction = SDLToMetal_StoreOp(
2140
                    depthStencilAttachmentInfo->storeOp,
2141
                    texture->msaaHandle ? 1 : 0);
2142
                passDescriptor.stencilAttachment.clearStencil = depthStencilAttachmentInfo->depthStencilClearValue.stencil;
2143
            }
2144

2145
            METAL_INTERNAL_TrackTexture(metalCommandBuffer, texture);
2146
        }
2147

2148
        metalCommandBuffer->renderEncoder = [metalCommandBuffer->handle renderCommandEncoderWithDescriptor:passDescriptor];
2149

2150
        // The viewport cannot be larger than the smallest attachment.
2151
        for (Uint32 i = 0; i < colorAttachmentCount; i += 1) {
2152
            MetalTextureContainer *container = (MetalTextureContainer *)colorAttachmentInfos[i].texture;
2153
            Uint32 w = container->header.info.width >> colorAttachmentInfos[i].mipLevel;
2154
            Uint32 h = container->header.info.height >> colorAttachmentInfos[i].mipLevel;
2155

2156
            if (w < vpWidth) {
2157
                vpWidth = w;
2158
            }
2159

2160
            if (h < vpHeight) {
2161
                vpHeight = h;
2162
            }
2163
        }
2164

2165
        if (depthStencilAttachmentInfo != NULL) {
2166
            MetalTextureContainer *container = (MetalTextureContainer *)depthStencilAttachmentInfo->texture;
2167
            Uint32 w = container->header.info.width;
2168
            Uint32 h = container->header.info.height;
2169

2170
            if (w < vpWidth) {
2171
                vpWidth = w;
2172
            }
2173

2174
            if (h < vpHeight) {
2175
                vpHeight = h;
2176
            }
2177
        }
2178

2179
        // Set default viewport and scissor state
2180
        viewport.originX = 0;
2181
        viewport.originY = 0;
2182
        viewport.width = vpWidth;
2183
        viewport.height = vpHeight;
2184
        viewport.znear = 0;
2185
        viewport.zfar = 1;
2186
        [metalCommandBuffer->renderEncoder setViewport:viewport];
2187

2188
        scissorRect.x = 0;
2189
        scissorRect.y = 0;
2190
        scissorRect.width = vpWidth;
2191
        scissorRect.height = vpHeight;
2192
        [metalCommandBuffer->renderEncoder setScissorRect:scissorRect];
2193
    }
2194
}
2195

2196
static void METAL_BindGraphicsPipeline(
2197
    SDL_GPUCommandBuffer *commandBuffer,
2198
    SDL_GPUGraphicsPipeline *graphicsPipeline)
2199
{
2200
    @autoreleasepool {
2201
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2202
        MetalGraphicsPipeline *metalGraphicsPipeline = (MetalGraphicsPipeline *)graphicsPipeline;
2203
        SDL_GPURasterizerState *rast = &metalGraphicsPipeline->rasterizerState;
2204

2205
        metalCommandBuffer->graphicsPipeline = metalGraphicsPipeline;
2206

2207
        [metalCommandBuffer->renderEncoder setRenderPipelineState:metalGraphicsPipeline->handle];
2208

2209
        // Apply rasterizer state
2210
        [metalCommandBuffer->renderEncoder setTriangleFillMode:SDLToMetal_PolygonMode[metalGraphicsPipeline->rasterizerState.fillMode]];
2211
        [metalCommandBuffer->renderEncoder setCullMode:SDLToMetal_CullMode[metalGraphicsPipeline->rasterizerState.cullMode]];
2212
        [metalCommandBuffer->renderEncoder setFrontFacingWinding:SDLToMetal_FrontFace[metalGraphicsPipeline->rasterizerState.frontFace]];
2213
        [metalCommandBuffer->renderEncoder
2214
            setDepthBias:((rast->depthBiasEnable) ? rast->depthBiasConstantFactor : 0)
2215
              slopeScale:((rast->depthBiasEnable) ? rast->depthBiasSlopeFactor : 0)
2216
              clamp:((rast->depthBiasEnable) ? rast->depthBiasClamp : 0)];
2217

2218
        // Apply blend constants
2219
        [metalCommandBuffer->renderEncoder
2220
            setBlendColorRed:metalGraphicsPipeline->blendConstants[0]
2221
                       green:metalGraphicsPipeline->blendConstants[1]
2222
                        blue:metalGraphicsPipeline->blendConstants[2]
2223
                       alpha:metalGraphicsPipeline->blendConstants[3]];
2224

2225
        // Apply depth-stencil state
2226
        if (metalGraphicsPipeline->depthStencilState != NULL) {
2227
            [metalCommandBuffer->renderEncoder
2228
                setDepthStencilState:metalGraphicsPipeline->depthStencilState];
2229
            [metalCommandBuffer->renderEncoder
2230
                setStencilReferenceValue:metalGraphicsPipeline->stencilReference];
2231
        }
2232

2233
        for (Uint32 i = 0; i < metalGraphicsPipeline->vertexUniformBufferCount; i += 1) {
2234
            if (metalCommandBuffer->vertexUniformBuffers[i] == NULL) {
2235
                metalCommandBuffer->vertexUniformBuffers[i] = METAL_INTERNAL_AcquireUniformBufferFromPool(
2236
                    metalCommandBuffer);
2237
            }
2238
        }
2239

2240
        for (Uint32 i = 0; i < metalGraphicsPipeline->fragmentUniformBufferCount; i += 1) {
2241
            if (metalCommandBuffer->fragmentUniformBuffers[i] == NULL) {
2242
                metalCommandBuffer->fragmentUniformBuffers[i] = METAL_INTERNAL_AcquireUniformBufferFromPool(
2243
                    metalCommandBuffer);
2244
            }
2245
        }
2246

2247
        metalCommandBuffer->needVertexUniformBind = true;
2248
        metalCommandBuffer->needFragmentUniformBind = true;
2249
    }
2250
}
2251

2252
static void METAL_SetViewport(
2253
    SDL_GPUCommandBuffer *commandBuffer,
2254
    SDL_GPUViewport *viewport)
2255
{
2256
    @autoreleasepool {
2257
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2258
        MTLViewport metalViewport;
2259

2260
        metalViewport.originX = viewport->x;
2261
        metalViewport.originY = viewport->y;
2262
        metalViewport.width = viewport->w;
2263
        metalViewport.height = viewport->h;
2264
        metalViewport.znear = viewport->minDepth;
2265
        metalViewport.zfar = viewport->maxDepth;
2266

2267
        [metalCommandBuffer->renderEncoder setViewport:metalViewport];
2268
    }
2269
}
2270

2271
static void METAL_SetScissor(
2272
    SDL_GPUCommandBuffer *commandBuffer,
2273
    SDL_Rect *scissor)
2274
{
2275
    @autoreleasepool {
2276
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2277
        MTLScissorRect metalScissor;
2278

2279
        metalScissor.x = scissor->x;
2280
        metalScissor.y = scissor->y;
2281
        metalScissor.width = scissor->w;
2282
        metalScissor.height = scissor->h;
2283

2284
        [metalCommandBuffer->renderEncoder setScissorRect:metalScissor];
2285
    }
2286
}
2287

2288
static void METAL_BindVertexBuffers(
2289
    SDL_GPUCommandBuffer *commandBuffer,
2290
    Uint32 firstBinding,
2291
    SDL_GPUBufferBinding *pBindings,
2292
    Uint32 bindingCount)
2293
{
2294
    @autoreleasepool {
2295
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2296
        id<MTLBuffer> metalBuffers[MAX_BUFFER_BINDINGS];
2297
        NSUInteger bufferOffsets[MAX_BUFFER_BINDINGS];
2298
        NSRange range = NSMakeRange(METAL_INTERNAL_GetVertexBufferIndex(firstBinding), bindingCount);
2299

2300
        if (range.length == 0) {
2301
            return;
2302
        }
2303

2304
        for (Uint32 i = 0; i < range.length; i += 1) {
2305
            MetalBuffer *currentBuffer = ((MetalBufferContainer *)pBindings[i].buffer)->activeBuffer;
2306
            NSUInteger bindingIndex = range.length - 1 - i;
2307
            metalBuffers[bindingIndex] = currentBuffer->handle;
2308
            bufferOffsets[bindingIndex] = pBindings[i].offset;
2309
            METAL_INTERNAL_TrackBuffer(metalCommandBuffer, currentBuffer);
2310
        }
2311

2312
        [metalCommandBuffer->renderEncoder setVertexBuffers:metalBuffers offsets:bufferOffsets withRange:range];
2313
    }
2314
}
2315

2316
static void METAL_BindIndexBuffer(
2317
    SDL_GPUCommandBuffer *commandBuffer,
2318
    SDL_GPUBufferBinding *pBinding,
2319
    SDL_GPUIndexElementSize indexElementSize)
2320
{
2321
    MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2322
    metalCommandBuffer->indexBuffer = ((MetalBufferContainer *)pBinding->buffer)->activeBuffer;
2323
    metalCommandBuffer->indexBufferOffset = pBinding->offset;
2324
    metalCommandBuffer->indexElementSize = indexElementSize;
2325

2326
    METAL_INTERNAL_TrackBuffer(metalCommandBuffer, metalCommandBuffer->indexBuffer);
2327
}
2328

2329
static void METAL_BindVertexSamplers(
2330
    SDL_GPUCommandBuffer *commandBuffer,
2331
    Uint32 firstSlot,
2332
    SDL_GPUTextureSamplerBinding *textureSamplerBindings,
2333
    Uint32 bindingCount)
2334
{
2335
    MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2336
    MetalTextureContainer *textureContainer;
2337

2338
    for (Uint32 i = 0; i < bindingCount; i += 1) {
2339
        textureContainer = (MetalTextureContainer *)textureSamplerBindings[i].texture;
2340

2341
        METAL_INTERNAL_TrackTexture(
2342
            metalCommandBuffer,
2343
            textureContainer->activeTexture);
2344

2345
        metalCommandBuffer->vertexSamplers[firstSlot + i] =
2346
            ((MetalSampler *)textureSamplerBindings[i].sampler)->handle;
2347

2348
        metalCommandBuffer->vertexTextures[firstSlot + i] =
2349
            textureContainer->activeTexture->handle;
2350
    }
2351

2352
    metalCommandBuffer->needVertexSamplerBind = true;
2353
}
2354

2355
static void METAL_BindVertexStorageTextures(
2356
    SDL_GPUCommandBuffer *commandBuffer,
2357
    Uint32 firstSlot,
2358
    SDL_GPUTexture **storageTextures,
2359
    Uint32 bindingCount)
2360
{
2361
    MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2362
    MetalTextureContainer *textureContainer;
2363

2364
    for (Uint32 i = 0; i < bindingCount; i += 1) {
2365
        textureContainer = (MetalTextureContainer *)storageTextures[i];
2366

2367
        METAL_INTERNAL_TrackTexture(
2368
            metalCommandBuffer,
2369
            textureContainer->activeTexture);
2370

2371
        metalCommandBuffer->vertexStorageTextures[firstSlot + i] =
2372
            textureContainer->activeTexture->handle;
2373
    }
2374

2375
    metalCommandBuffer->needVertexStorageTextureBind = true;
2376
}
2377

2378
static void METAL_BindVertexStorageBuffers(
2379
    SDL_GPUCommandBuffer *commandBuffer,
2380
    Uint32 firstSlot,
2381
    SDL_GPUBuffer **storageBuffers,
2382
    Uint32 bindingCount)
2383
{
2384
    MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2385
    MetalBufferContainer *bufferContainer;
2386

2387
    for (Uint32 i = 0; i < bindingCount; i += 1) {
2388
        bufferContainer = (MetalBufferContainer *)storageBuffers[i];
2389

2390
        METAL_INTERNAL_TrackBuffer(
2391
            metalCommandBuffer,
2392
            bufferContainer->activeBuffer);
2393

2394
        metalCommandBuffer->vertexStorageBuffers[firstSlot + i] =
2395
            bufferContainer->activeBuffer->handle;
2396
    }
2397

2398
    metalCommandBuffer->needVertexStorageBufferBind = true;
2399
}
2400

2401
static void METAL_BindFragmentSamplers(
2402
    SDL_GPUCommandBuffer *commandBuffer,
2403
    Uint32 firstSlot,
2404
    SDL_GPUTextureSamplerBinding *textureSamplerBindings,
2405
    Uint32 bindingCount)
2406
{
2407
    MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2408
    MetalTextureContainer *textureContainer;
2409

2410
    for (Uint32 i = 0; i < bindingCount; i += 1) {
2411
        textureContainer = (MetalTextureContainer *)textureSamplerBindings[i].texture;
2412

2413
        METAL_INTERNAL_TrackTexture(
2414
            metalCommandBuffer,
2415
            textureContainer->activeTexture);
2416

2417
        metalCommandBuffer->fragmentSamplers[firstSlot + i] =
2418
            ((MetalSampler *)textureSamplerBindings[i].sampler)->handle;
2419

2420
        metalCommandBuffer->fragmentTextures[firstSlot + i] =
2421
            textureContainer->activeTexture->handle;
2422
    }
2423

2424
    metalCommandBuffer->needFragmentSamplerBind = true;
2425
}
2426

2427
static void METAL_BindFragmentStorageTextures(
2428
    SDL_GPUCommandBuffer *commandBuffer,
2429
    Uint32 firstSlot,
2430
    SDL_GPUTexture **storageTextures,
2431
    Uint32 bindingCount)
2432
{
2433
    MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2434
    MetalTextureContainer *textureContainer;
2435

2436
    for (Uint32 i = 0; i < bindingCount; i += 1) {
2437
        textureContainer = (MetalTextureContainer *)storageTextures[i];
2438

2439
        METAL_INTERNAL_TrackTexture(
2440
            metalCommandBuffer,
2441
            textureContainer->activeTexture);
2442

2443
        metalCommandBuffer->fragmentStorageTextures[firstSlot + i] =
2444
            textureContainer->activeTexture->handle;
2445
    }
2446

2447
    metalCommandBuffer->needFragmentStorageTextureBind = true;
2448
}
2449

2450
static void METAL_BindFragmentStorageBuffers(
2451
    SDL_GPUCommandBuffer *commandBuffer,
2452
    Uint32 firstSlot,
2453
    SDL_GPUBuffer **storageBuffers,
2454
    Uint32 bindingCount)
2455
{
2456
    MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2457
    MetalBufferContainer *bufferContainer;
2458

2459
    for (Uint32 i = 0; i < bindingCount; i += 1) {
2460
        bufferContainer = (MetalBufferContainer *)storageBuffers[i];
2461

2462
        METAL_INTERNAL_TrackBuffer(
2463
            metalCommandBuffer,
2464
            bufferContainer->activeBuffer);
2465

2466
        metalCommandBuffer->fragmentStorageBuffers[firstSlot + i] =
2467
            bufferContainer->activeBuffer->handle;
2468
    }
2469

2470
    metalCommandBuffer->needFragmentStorageBufferBind = true;
2471
}
2472

2473
// This function assumes that it's called from within an autorelease pool
2474
static void METAL_INTERNAL_BindGraphicsResources(
2475
    MetalCommandBuffer *commandBuffer)
2476
{
2477
    MetalGraphicsPipeline *graphicsPipeline = commandBuffer->graphicsPipeline;
2478
    NSUInteger offsets[MAX_STORAGE_BUFFERS_PER_STAGE] = { 0 };
2479

2480
    // Vertex Samplers+Textures
2481

2482
    if (graphicsPipeline->vertexSamplerCount > 0 && commandBuffer->needVertexSamplerBind) {
2483
        [commandBuffer->renderEncoder setVertexSamplerStates:commandBuffer->vertexSamplers
2484
                                                   withRange:NSMakeRange(0, graphicsPipeline->vertexSamplerCount)];
2485
        [commandBuffer->renderEncoder setVertexTextures:commandBuffer->vertexTextures
2486
                                              withRange:NSMakeRange(0, graphicsPipeline->vertexSamplerCount)];
2487
        commandBuffer->needVertexSamplerBind = false;
2488
    }
2489

2490
    // Vertex Storage Textures
2491

2492
    if (graphicsPipeline->vertexStorageTextureCount > 0 && commandBuffer->needVertexStorageTextureBind) {
2493
        [commandBuffer->renderEncoder setVertexTextures:commandBuffer->vertexStorageTextures
2494
                                              withRange:NSMakeRange(graphicsPipeline->vertexSamplerCount,
2495
                                                                    graphicsPipeline->vertexStorageTextureCount)];
2496
        commandBuffer->needVertexStorageTextureBind = false;
2497
    }
2498

2499
    // Vertex Storage Buffers
2500

2501
    if (graphicsPipeline->vertexStorageBufferCount > 0 && commandBuffer->needVertexStorageBufferBind) {
2502
        [commandBuffer->renderEncoder setVertexBuffers:commandBuffer->vertexStorageBuffers
2503
                                               offsets:offsets
2504
                                             withRange:NSMakeRange(graphicsPipeline->vertexUniformBufferCount,
2505
                                                                   graphicsPipeline->vertexStorageBufferCount)];
2506
        commandBuffer->needVertexStorageBufferBind = false;
2507
    }
2508

2509
    // Vertex Uniform Buffers
2510

2511
    if (graphicsPipeline->vertexUniformBufferCount > 0 && commandBuffer->needVertexUniformBind) {
2512
        for (Uint32 i = 0; i < graphicsPipeline->vertexUniformBufferCount; i += 1) {
2513
            [commandBuffer->renderEncoder
2514
                setVertexBuffer:commandBuffer->vertexUniformBuffers[i]->handle
2515
                         offset:commandBuffer->vertexUniformBuffers[i]->drawOffset
2516
                        atIndex:i];
2517
        }
2518
        commandBuffer->needVertexUniformBind = false;
2519
    }
2520

2521
    // Fragment Samplers+Textures
2522

2523
    if (graphicsPipeline->fragmentSamplerCount > 0 && commandBuffer->needFragmentSamplerBind) {
2524
        [commandBuffer->renderEncoder setFragmentSamplerStates:commandBuffer->fragmentSamplers
2525
                                                     withRange:NSMakeRange(0, graphicsPipeline->fragmentSamplerCount)];
2526
        [commandBuffer->renderEncoder setFragmentTextures:commandBuffer->fragmentTextures
2527
                                                withRange:NSMakeRange(0, graphicsPipeline->fragmentSamplerCount)];
2528
        commandBuffer->needFragmentSamplerBind = false;
2529
    }
2530

2531
    // Fragment Storage Textures
2532

2533
    if (graphicsPipeline->fragmentStorageTextureCount > 0 && commandBuffer->needFragmentStorageTextureBind) {
2534
        [commandBuffer->renderEncoder setFragmentTextures:commandBuffer->fragmentStorageTextures
2535
                                                withRange:NSMakeRange(graphicsPipeline->fragmentSamplerCount,
2536
                                                                      graphicsPipeline->fragmentStorageTextureCount)];
2537
        commandBuffer->needFragmentStorageTextureBind = false;
2538
    }
2539

2540
    // Fragment Storage Buffers
2541

2542
    if (graphicsPipeline->fragmentStorageBufferCount > 0 && commandBuffer->needFragmentStorageBufferBind) {
2543
        [commandBuffer->renderEncoder setFragmentBuffers:commandBuffer->fragmentStorageBuffers
2544
                                                 offsets:offsets
2545
                                               withRange:NSMakeRange(graphicsPipeline->fragmentUniformBufferCount,
2546
                                                                     graphicsPipeline->fragmentStorageBufferCount)];
2547
        commandBuffer->needFragmentStorageBufferBind = false;
2548
    }
2549

2550
    // Fragment Uniform Buffers
2551
    if (graphicsPipeline->fragmentUniformBufferCount > 0 && commandBuffer->needFragmentUniformBind) {
2552
        for (Uint32 i = 0; i < graphicsPipeline->fragmentUniformBufferCount; i += 1) {
2553
            [commandBuffer->renderEncoder
2554
                setFragmentBuffer:commandBuffer->fragmentUniformBuffers[i]->handle
2555
                           offset:commandBuffer->fragmentUniformBuffers[i]->drawOffset
2556
                          atIndex:i];
2557
        }
2558
        commandBuffer->needFragmentUniformBind = false;
2559
    }
2560
}
2561

2562
// This function assumes that it's called from within an autorelease pool
2563
static void METAL_INTERNAL_BindComputeResources(
2564
    MetalCommandBuffer *commandBuffer)
2565
{
2566
    MetalComputePipeline *computePipeline = commandBuffer->computePipeline;
2567
    NSUInteger offsets[MAX_STORAGE_BUFFERS_PER_STAGE] = { 0 }; // 8 is the max for both read and write-only
2568

2569
    if (commandBuffer->needComputeTextureBind) {
2570
        // Bind read-only textures
2571
        if (computePipeline->readOnlyStorageTextureCount > 0) {
2572
            [commandBuffer->computeEncoder setTextures:commandBuffer->computeReadOnlyTextures
2573
                                             withRange:NSMakeRange(0, computePipeline->readOnlyStorageTextureCount)];
2574
        }
2575

2576
        // Bind write-only textures
2577
        if (computePipeline->writeOnlyStorageTextureCount > 0) {
2578
            [commandBuffer->computeEncoder setTextures:commandBuffer->computeWriteOnlyTextures
2579
                                             withRange:NSMakeRange(
2580
                                                           computePipeline->readOnlyStorageTextureCount,
2581
                                                           computePipeline->writeOnlyStorageTextureCount)];
2582
        }
2583
        commandBuffer->needComputeTextureBind = false;
2584
    }
2585

2586
    if (commandBuffer->needComputeBufferBind) {
2587
        // Bind read-only buffers
2588
        if (computePipeline->readOnlyStorageBufferCount > 0) {
2589
            [commandBuffer->computeEncoder setBuffers:commandBuffer->computeReadOnlyBuffers
2590
                                              offsets:offsets
2591
                                            withRange:NSMakeRange(computePipeline->uniformBufferCount,
2592
                                                                  computePipeline->readOnlyStorageBufferCount)];
2593
        }
2594
        // Bind write-only buffers
2595
        if (computePipeline->writeOnlyStorageBufferCount > 0) {
2596
            [commandBuffer->computeEncoder setBuffers:commandBuffer->computeWriteOnlyBuffers
2597
                                              offsets:offsets
2598
                                            withRange:NSMakeRange(
2599
                                                          computePipeline->uniformBufferCount +
2600
                                                              computePipeline->readOnlyStorageBufferCount,
2601
                                                          computePipeline->writeOnlyStorageBufferCount)];
2602
        }
2603
        commandBuffer->needComputeBufferBind = false;
2604
    }
2605

2606
    if (commandBuffer->needComputeUniformBind) {
2607
        for (Uint32 i = 0; i < computePipeline->uniformBufferCount; i += 1) {
2608
            [commandBuffer->computeEncoder
2609
                setBuffer:commandBuffer->computeUniformBuffers[i]->handle
2610
                   offset:commandBuffer->computeUniformBuffers[i]->drawOffset
2611
                  atIndex:i];
2612
        }
2613

2614
        commandBuffer->needComputeUniformBind = false;
2615
    }
2616
}
2617

2618
static void METAL_DrawIndexedPrimitives(
2619
    SDL_GPUCommandBuffer *commandBuffer,
2620
    Uint32 indexCount,
2621
    Uint32 instanceCount,
2622
    Uint32 firstIndex,
2623
    Sint32 vertexOffset,
2624
    Uint32 firstInstance)
2625
{
2626
    @autoreleasepool {
2627
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2628
        SDL_GPUPrimitiveType primitiveType = metalCommandBuffer->graphicsPipeline->primitiveType;
2629
        Uint32 indexSize = IndexSize(metalCommandBuffer->indexElementSize);
2630

2631
        METAL_INTERNAL_BindGraphicsResources(metalCommandBuffer);
2632

2633
        [metalCommandBuffer->renderEncoder
2634
            drawIndexedPrimitives:SDLToMetal_PrimitiveType[primitiveType]
2635
                       indexCount:indexCount
2636
                        indexType:SDLToMetal_IndexType[metalCommandBuffer->indexElementSize]
2637
                      indexBuffer:metalCommandBuffer->indexBuffer->handle
2638
                indexBufferOffset:metalCommandBuffer->indexBufferOffset + (firstIndex * indexSize)
2639
                    instanceCount:instanceCount
2640
                       baseVertex:vertexOffset
2641
                     baseInstance:firstInstance];
2642
    }
2643
}
2644

2645
static void METAL_DrawPrimitives(
2646
    SDL_GPUCommandBuffer *commandBuffer,
2647
    Uint32 vertexCount,
2648
    Uint32 instanceCount,
2649
    Uint32 firstVertex,
2650
    Uint32 firstInstance)
2651
{
2652
    @autoreleasepool {
2653
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2654
        SDL_GPUPrimitiveType primitiveType = metalCommandBuffer->graphicsPipeline->primitiveType;
2655

2656
        METAL_INTERNAL_BindGraphicsResources(metalCommandBuffer);
2657

2658
        [metalCommandBuffer->renderEncoder
2659
            drawPrimitives:SDLToMetal_PrimitiveType[primitiveType]
2660
               vertexStart:firstVertex
2661
               vertexCount:vertexCount
2662
             instanceCount:instanceCount
2663
              baseInstance:firstInstance];
2664
    }
2665
}
2666

2667
static void METAL_DrawPrimitivesIndirect(
2668
    SDL_GPUCommandBuffer *commandBuffer,
2669
    SDL_GPUBuffer *buffer,
2670
    Uint32 offsetInBytes,
2671
    Uint32 drawCount,
2672
    Uint32 stride)
2673
{
2674
    @autoreleasepool {
2675
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2676
        MetalBuffer *metalBuffer = ((MetalBufferContainer *)buffer)->activeBuffer;
2677
        SDL_GPUPrimitiveType primitiveType = metalCommandBuffer->graphicsPipeline->primitiveType;
2678

2679
        METAL_INTERNAL_BindGraphicsResources(metalCommandBuffer);
2680

2681
        /* Metal: "We have multi-draw at home!"
2682
         * Multi-draw at home:
2683
         */
2684
        for (Uint32 i = 0; i < drawCount; i += 1) {
2685
            [metalCommandBuffer->renderEncoder
2686
                      drawPrimitives:SDLToMetal_PrimitiveType[primitiveType]
2687
                      indirectBuffer:metalBuffer->handle
2688
                indirectBufferOffset:offsetInBytes + (stride * i)];
2689
        }
2690

2691
        METAL_INTERNAL_TrackBuffer(metalCommandBuffer, metalBuffer);
2692
    }
2693
}
2694

2695
static void METAL_DrawIndexedPrimitivesIndirect(
2696
    SDL_GPUCommandBuffer *commandBuffer,
2697
    SDL_GPUBuffer *buffer,
2698
    Uint32 offsetInBytes,
2699
    Uint32 drawCount,
2700
    Uint32 stride)
2701
{
2702
    @autoreleasepool {
2703
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2704
        MetalBuffer *metalBuffer = ((MetalBufferContainer *)buffer)->activeBuffer;
2705
        SDL_GPUPrimitiveType primitiveType = metalCommandBuffer->graphicsPipeline->primitiveType;
2706

2707
        METAL_INTERNAL_BindGraphicsResources(metalCommandBuffer);
2708

2709
        for (Uint32 i = 0; i < drawCount; i += 1) {
2710
            [metalCommandBuffer->renderEncoder
2711
                drawIndexedPrimitives:SDLToMetal_PrimitiveType[primitiveType]
2712
                            indexType:SDLToMetal_IndexType[metalCommandBuffer->indexElementSize]
2713
                          indexBuffer:metalCommandBuffer->indexBuffer->handle
2714
                    indexBufferOffset:metalCommandBuffer->indexBufferOffset
2715
                       indirectBuffer:metalBuffer->handle
2716
                 indirectBufferOffset:offsetInBytes + (stride * i)];
2717
        }
2718

2719
        METAL_INTERNAL_TrackBuffer(metalCommandBuffer, metalBuffer);
2720
    }
2721
}
2722

2723
static void METAL_EndRenderPass(
2724
    SDL_GPUCommandBuffer *commandBuffer)
2725
{
2726
    @autoreleasepool {
2727
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2728
        [metalCommandBuffer->renderEncoder endEncoding];
2729
        metalCommandBuffer->renderEncoder = nil;
2730

2731
        for (Uint32 i = 0; i < MAX_TEXTURE_SAMPLERS_PER_STAGE; i += 1) {
2732
            metalCommandBuffer->vertexSamplers[i] = nil;
2733
            metalCommandBuffer->vertexTextures[i] = nil;
2734
            metalCommandBuffer->fragmentSamplers[i] = nil;
2735
            metalCommandBuffer->fragmentTextures[i] = nil;
2736
        }
2737
        for (Uint32 i = 0; i < MAX_STORAGE_TEXTURES_PER_STAGE; i += 1) {
2738
            metalCommandBuffer->vertexStorageTextures[i] = nil;
2739
            metalCommandBuffer->fragmentStorageTextures[i] = nil;
2740
        }
2741
        for (Uint32 i = 0; i < MAX_STORAGE_BUFFERS_PER_STAGE; i += 1) {
2742
            metalCommandBuffer->vertexStorageBuffers[i] = nil;
2743
            metalCommandBuffer->fragmentStorageBuffers[i] = nil;
2744
        }
2745
    }
2746
}
2747

2748
// This function assumes that it's called from within an autorelease pool
2749
static void METAL_INTERNAL_PushUniformData(
2750
    MetalCommandBuffer *metalCommandBuffer,
2751
    SDL_GPUShaderStage shaderStage,
2752
    Uint32 slotIndex,
2753
    const void *data,
2754
    Uint32 dataLengthInBytes)
2755
{
2756
    MetalUniformBuffer *metalUniformBuffer;
2757
    Uint32 alignedDataLength;
2758

2759
    if (shaderStage == SDL_GPU_SHADERSTAGE_VERTEX) {
2760
        if (metalCommandBuffer->vertexUniformBuffers[slotIndex] == NULL) {
2761
            metalCommandBuffer->vertexUniformBuffers[slotIndex] = METAL_INTERNAL_AcquireUniformBufferFromPool(
2762
                metalCommandBuffer);
2763
        }
2764
        metalUniformBuffer = metalCommandBuffer->vertexUniformBuffers[slotIndex];
2765
    } else if (shaderStage == SDL_GPU_SHADERSTAGE_FRAGMENT) {
2766
        if (metalCommandBuffer->fragmentUniformBuffers[slotIndex] == NULL) {
2767
            metalCommandBuffer->fragmentUniformBuffers[slotIndex] = METAL_INTERNAL_AcquireUniformBufferFromPool(
2768
                metalCommandBuffer);
2769
        }
2770
        metalUniformBuffer = metalCommandBuffer->fragmentUniformBuffers[slotIndex];
2771
    } else if (shaderStage == SDL_GPU_SHADERSTAGE_COMPUTE) {
2772
        if (metalCommandBuffer->computeUniformBuffers[slotIndex] == NULL) {
2773
            metalCommandBuffer->computeUniformBuffers[slotIndex] = METAL_INTERNAL_AcquireUniformBufferFromPool(
2774
                metalCommandBuffer);
2775
        }
2776
        metalUniformBuffer = metalCommandBuffer->computeUniformBuffers[slotIndex];
2777
    } else {
2778
        SDL_LogError(SDL_LOG_CATEGORY_GPU, "Unrecognized shader stage!");
2779
        return;
2780
    }
2781

2782
    alignedDataLength = METAL_INTERNAL_NextHighestAlignment(
2783
        dataLengthInBytes,
2784
        256);
2785

2786
    if (metalUniformBuffer->writeOffset + alignedDataLength >= UNIFORM_BUFFER_SIZE) {
2787
        metalUniformBuffer = METAL_INTERNAL_AcquireUniformBufferFromPool(
2788
            metalCommandBuffer);
2789

2790
        metalUniformBuffer->writeOffset = 0;
2791
        metalUniformBuffer->drawOffset = 0;
2792

2793
        if (shaderStage == SDL_GPU_SHADERSTAGE_VERTEX) {
2794
            metalCommandBuffer->vertexUniformBuffers[slotIndex] = metalUniformBuffer;
2795
        } else if (shaderStage == SDL_GPU_SHADERSTAGE_FRAGMENT) {
2796
            metalCommandBuffer->fragmentUniformBuffers[slotIndex] = metalUniformBuffer;
2797
        } else if (shaderStage == SDL_GPU_SHADERSTAGE_COMPUTE) {
2798
            metalCommandBuffer->computeUniformBuffers[slotIndex] = metalUniformBuffer;
2799
        } else {
2800
            SDL_LogError(SDL_LOG_CATEGORY_GPU, "Unrecognized shader stage!");
2801
            return;
2802
        }
2803
    }
2804

2805
    metalUniformBuffer->drawOffset = metalUniformBuffer->writeOffset;
2806

2807
    SDL_memcpy(
2808
        (metalUniformBuffer->handle).contents + metalUniformBuffer->writeOffset,
2809
        data,
2810
        dataLengthInBytes);
2811

2812
    metalUniformBuffer->writeOffset += alignedDataLength;
2813

2814
    if (shaderStage == SDL_GPU_SHADERSTAGE_VERTEX) {
2815
        metalCommandBuffer->needVertexUniformBind = true;
2816
    } else if (shaderStage == SDL_GPU_SHADERSTAGE_FRAGMENT) {
2817
        metalCommandBuffer->needFragmentUniformBind = true;
2818
    } else if (shaderStage == SDL_GPU_SHADERSTAGE_COMPUTE) {
2819
        metalCommandBuffer->needComputeUniformBind = true;
2820
    } else {
2821
        SDL_LogError(SDL_LOG_CATEGORY_GPU, "Unrecognized shader stage!");
2822
    }
2823
}
2824

2825
static void METAL_PushVertexUniformData(
2826
    SDL_GPUCommandBuffer *commandBuffer,
2827
    Uint32 slotIndex,
2828
    const void *data,
2829
    Uint32 dataLengthInBytes)
2830
{
2831
    @autoreleasepool {
2832
        METAL_INTERNAL_PushUniformData(
2833
            (MetalCommandBuffer *)commandBuffer,
2834
            SDL_GPU_SHADERSTAGE_VERTEX,
2835
            slotIndex,
2836
            data,
2837
            dataLengthInBytes);
2838
    }
2839
}
2840

2841
static void METAL_PushFragmentUniformData(
2842
    SDL_GPUCommandBuffer *commandBuffer,
2843
    Uint32 slotIndex,
2844
    const void *data,
2845
    Uint32 dataLengthInBytes)
2846
{
2847
    @autoreleasepool {
2848
        METAL_INTERNAL_PushUniformData(
2849
            (MetalCommandBuffer *)commandBuffer,
2850
            SDL_GPU_SHADERSTAGE_FRAGMENT,
2851
            slotIndex,
2852
            data,
2853
            dataLengthInBytes);
2854
    }
2855
}
2856

2857
// Blit
2858

2859
static void METAL_Blit(
2860
    SDL_GPUCommandBuffer *commandBuffer,
2861
    SDL_GPUBlitRegion *source,
2862
    SDL_GPUBlitRegion *destination,
2863
    SDL_FlipMode flipMode,
2864
    SDL_GPUFilter filterMode,
2865
    bool cycle)
2866
{
2867
    MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2868
    MetalRenderer *renderer = (MetalRenderer *)metalCommandBuffer->renderer;
2869

2870
    SDL_GPU_BlitCommon(
2871
        commandBuffer,
2872
        source,
2873
        destination,
2874
        flipMode,
2875
        filterMode,
2876
        cycle,
2877
        renderer->blitLinearSampler,
2878
        renderer->blitNearestSampler,
2879
        renderer->blitVertexShader,
2880
        renderer->blitFrom2DShader,
2881
        renderer->blitFrom2DArrayShader,
2882
        renderer->blitFrom3DShader,
2883
        renderer->blitFromCubeShader,
2884
        &renderer->blitPipelines,
2885
        &renderer->blitPipelineCount,
2886
        &renderer->blitPipelineCapacity);
2887
}
2888

2889
// Compute State
2890

2891
static void METAL_BeginComputePass(
2892
    SDL_GPUCommandBuffer *commandBuffer,
2893
    SDL_GPUStorageTextureWriteOnlyBinding *storageTextureBindings,
2894
    Uint32 storageTextureBindingCount,
2895
    SDL_GPUStorageBufferWriteOnlyBinding *storageBufferBindings,
2896
    Uint32 storageBufferBindingCount)
2897
{
2898
    @autoreleasepool {
2899
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2900
        MetalTextureContainer *textureContainer;
2901
        MetalTexture *texture;
2902
        id<MTLTexture> textureView;
2903
        MetalBufferContainer *bufferContainer;
2904
        MetalBuffer *buffer;
2905

2906
        metalCommandBuffer->computeEncoder = [metalCommandBuffer->handle computeCommandEncoder];
2907

2908
        for (Uint32 i = 0; i < storageTextureBindingCount; i += 1) {
2909
            textureContainer = (MetalTextureContainer *)storageTextureBindings[i].texture;
2910

2911
            texture = METAL_INTERNAL_PrepareTextureForWrite(
2912
                metalCommandBuffer->renderer,
2913
                textureContainer,
2914
                storageTextureBindings[i].cycle);
2915

2916
            METAL_INTERNAL_TrackTexture(metalCommandBuffer, texture);
2917

2918
            textureView = [texture->handle newTextureViewWithPixelFormat:SDLToMetal_SurfaceFormat[textureContainer->header.info.format]
2919
                                                             textureType:SDLToMetal_TextureType[textureContainer->header.info.type]
2920
                                                                  levels:NSMakeRange(storageTextureBindings[i].mipLevel, 1)
2921
                                                                  slices:NSMakeRange(storageTextureBindings[i].layer, 1)];
2922

2923
            metalCommandBuffer->computeWriteOnlyTextures[i] = textureView;
2924
            metalCommandBuffer->needComputeTextureBind = true;
2925
        }
2926

2927
        for (Uint32 i = 0; i < storageBufferBindingCount; i += 1) {
2928
            bufferContainer = (MetalBufferContainer *)storageBufferBindings[i].buffer;
2929

2930
            buffer = METAL_INTERNAL_PrepareBufferForWrite(
2931
                metalCommandBuffer->renderer,
2932
                bufferContainer,
2933
                storageBufferBindings[i].cycle);
2934

2935
            METAL_INTERNAL_TrackBuffer(
2936
                metalCommandBuffer,
2937
                buffer);
2938

2939
            metalCommandBuffer->computeWriteOnlyBuffers[i] = buffer->handle;
2940
            metalCommandBuffer->needComputeBufferBind = true;
2941
        }
2942
    }
2943
}
2944

2945
static void METAL_BindComputePipeline(
2946
    SDL_GPUCommandBuffer *commandBuffer,
2947
    SDL_GPUComputePipeline *computePipeline)
2948
{
2949
    @autoreleasepool {
2950
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2951
        MetalComputePipeline *pipeline = (MetalComputePipeline *)computePipeline;
2952

2953
        metalCommandBuffer->computePipeline = pipeline;
2954

2955
        [metalCommandBuffer->computeEncoder setComputePipelineState:pipeline->handle];
2956

2957
        for (Uint32 i = 0; i < pipeline->uniformBufferCount; i += 1) {
2958
            if (metalCommandBuffer->computeUniformBuffers[i] == NULL) {
2959
                metalCommandBuffer->computeUniformBuffers[i] = METAL_INTERNAL_AcquireUniformBufferFromPool(
2960
                    metalCommandBuffer);
2961
            }
2962
        }
2963

2964
        metalCommandBuffer->needComputeUniformBind = true;
2965
    }
2966
}
2967

2968
static void METAL_BindComputeStorageTextures(
2969
    SDL_GPUCommandBuffer *commandBuffer,
2970
    Uint32 firstSlot,
2971
    SDL_GPUTexture **storageTextures,
2972
    Uint32 bindingCount)
2973
{
2974
    MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2975
    MetalTextureContainer *textureContainer;
2976

2977
    for (Uint32 i = 0; i < bindingCount; i += 1) {
2978
        textureContainer = (MetalTextureContainer *)storageTextures[i];
2979

2980
        METAL_INTERNAL_TrackTexture(
2981
            metalCommandBuffer,
2982
            textureContainer->activeTexture);
2983

2984
        metalCommandBuffer->computeReadOnlyTextures[firstSlot + i] =
2985
            textureContainer->activeTexture->handle;
2986
    }
2987

2988
    metalCommandBuffer->needComputeTextureBind = true;
2989
}
2990

2991
static void METAL_BindComputeStorageBuffers(
2992
    SDL_GPUCommandBuffer *commandBuffer,
2993
    Uint32 firstSlot,
2994
    SDL_GPUBuffer **storageBuffers,
2995
    Uint32 bindingCount)
2996
{
2997
    MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
2998
    MetalBufferContainer *bufferContainer;
2999

3000
    for (Uint32 i = 0; i < bindingCount; i += 1) {
3001
        bufferContainer = (MetalBufferContainer *)storageBuffers[i];
3002

3003
        METAL_INTERNAL_TrackBuffer(
3004
            metalCommandBuffer,
3005
            bufferContainer->activeBuffer);
3006

3007
        metalCommandBuffer->computeReadOnlyBuffers[firstSlot + i] =
3008
            bufferContainer->activeBuffer->handle;
3009
    }
3010

3011
    metalCommandBuffer->needComputeBufferBind = true;
3012
}
3013

3014
static void METAL_PushComputeUniformData(
3015
    SDL_GPUCommandBuffer *commandBuffer,
3016
    Uint32 slotIndex,
3017
    const void *data,
3018
    Uint32 dataLengthInBytes)
3019
{
3020
    @autoreleasepool {
3021
        METAL_INTERNAL_PushUniformData(
3022
            (MetalCommandBuffer *)commandBuffer,
3023
            SDL_GPU_SHADERSTAGE_COMPUTE,
3024
            slotIndex,
3025
            data,
3026
            dataLengthInBytes);
3027
    }
3028
}
3029

3030
static void METAL_DispatchCompute(
3031
    SDL_GPUCommandBuffer *commandBuffer,
3032
    Uint32 groupCountX,
3033
    Uint32 groupCountY,
3034
    Uint32 groupCountZ)
3035
{
3036
    @autoreleasepool {
3037
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
3038
        MTLSize threadgroups = MTLSizeMake(groupCountX, groupCountY, groupCountZ);
3039
        MTLSize threadsPerThreadgroup = MTLSizeMake(
3040
            metalCommandBuffer->computePipeline->threadCountX,
3041
            metalCommandBuffer->computePipeline->threadCountY,
3042
            metalCommandBuffer->computePipeline->threadCountZ);
3043

3044
        METAL_INTERNAL_BindComputeResources(metalCommandBuffer);
3045

3046
        [metalCommandBuffer->computeEncoder
3047
             dispatchThreadgroups:threadgroups
3048
            threadsPerThreadgroup:threadsPerThreadgroup];
3049
    }
3050
}
3051

3052
static void METAL_DispatchComputeIndirect(
3053
    SDL_GPUCommandBuffer *commandBuffer,
3054
    SDL_GPUBuffer *buffer,
3055
    Uint32 offsetInBytes)
3056
{
3057
    @autoreleasepool {
3058
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
3059
        MetalBuffer *metalBuffer = ((MetalBufferContainer *)buffer)->activeBuffer;
3060
        MTLSize threadsPerThreadgroup = MTLSizeMake(
3061
            metalCommandBuffer->computePipeline->threadCountX,
3062
            metalCommandBuffer->computePipeline->threadCountY,
3063
            metalCommandBuffer->computePipeline->threadCountZ);
3064

3065
        METAL_INTERNAL_BindComputeResources(metalCommandBuffer);
3066

3067
        [metalCommandBuffer->computeEncoder
3068
            dispatchThreadgroupsWithIndirectBuffer:metalBuffer->handle
3069
                              indirectBufferOffset:offsetInBytes
3070
                             threadsPerThreadgroup:threadsPerThreadgroup];
3071

3072
        METAL_INTERNAL_TrackBuffer(metalCommandBuffer, metalBuffer);
3073
    }
3074
}
3075

3076
static void METAL_EndComputePass(
3077
    SDL_GPUCommandBuffer *commandBuffer)
3078
{
3079
    @autoreleasepool {
3080
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
3081
        [metalCommandBuffer->computeEncoder endEncoding];
3082
        metalCommandBuffer->computeEncoder = nil;
3083

3084
        for (Uint32 i = 0; i < MAX_COMPUTE_WRITE_TEXTURES; i += 1) {
3085
            metalCommandBuffer->computeWriteOnlyTextures[i] = nil;
3086
        }
3087
        for (Uint32 i = 0; i < MAX_COMPUTE_WRITE_BUFFERS; i += 1) {
3088
            metalCommandBuffer->computeWriteOnlyBuffers[i] = nil;
3089
        }
3090
        for (Uint32 i = 0; i < MAX_STORAGE_TEXTURES_PER_STAGE; i += 1) {
3091
            metalCommandBuffer->computeReadOnlyTextures[i] = nil;
3092
        }
3093
        for (Uint32 i = 0; i < MAX_STORAGE_BUFFERS_PER_STAGE; i += 1) {
3094
            metalCommandBuffer->computeReadOnlyBuffers[i] = nil;
3095
        }
3096
    }
3097
}
3098

3099
// Fence Cleanup
3100

3101
static void METAL_INTERNAL_ReleaseFenceToPool(
3102
    MetalRenderer *renderer,
3103
    MetalFence *fence)
3104
{
3105
    SDL_LockMutex(renderer->fenceLock);
3106

3107
    // FIXME: Should this use EXPAND_IF_NEEDED?
3108
    if (renderer->availableFenceCount == renderer->availableFenceCapacity) {
3109
        renderer->availableFenceCapacity *= 2;
3110
        renderer->availableFences = SDL_realloc(
3111
            renderer->availableFences,
3112
            renderer->availableFenceCapacity * sizeof(MetalFence *));
3113
    }
3114
    renderer->availableFences[renderer->availableFenceCount] = fence;
3115
    renderer->availableFenceCount += 1;
3116

3117
    SDL_UnlockMutex(renderer->fenceLock);
3118
}
3119

3120
static void METAL_ReleaseFence(
3121
    SDL_GPURenderer *driverData,
3122
    SDL_GPUFence *fence)
3123
{
3124
    METAL_INTERNAL_ReleaseFenceToPool(
3125
        (MetalRenderer *)driverData,
3126
        (MetalFence *)fence);
3127
}
3128

3129
// Cleanup
3130

3131
static void METAL_INTERNAL_CleanCommandBuffer(
3132
    MetalRenderer *renderer,
3133
    MetalCommandBuffer *commandBuffer)
3134
{
3135
    Uint32 i;
3136

3137
    // Reference Counting
3138
    for (i = 0; i < commandBuffer->usedBufferCount; i += 1) {
3139
        (void)SDL_AtomicDecRef(&commandBuffer->usedBuffers[i]->referenceCount);
3140
    }
3141
    commandBuffer->usedBufferCount = 0;
3142

3143
    for (i = 0; i < commandBuffer->usedTextureCount; i += 1) {
3144
        (void)SDL_AtomicDecRef(&commandBuffer->usedTextures[i]->referenceCount);
3145
    }
3146
    commandBuffer->usedTextureCount = 0;
3147

3148
    // Uniform buffers are now available
3149

3150
    SDL_LockMutex(renderer->acquireUniformBufferLock);
3151

3152
    for (i = 0; i < commandBuffer->usedUniformBufferCount; i += 1) {
3153
        METAL_INTERNAL_ReturnUniformBufferToPool(
3154
            renderer,
3155
            commandBuffer->usedUniformBuffers[i]);
3156
    }
3157
    commandBuffer->usedUniformBufferCount = 0;
3158

3159
    SDL_UnlockMutex(renderer->acquireUniformBufferLock);
3160

3161
    // Reset presentation
3162
    commandBuffer->windowDataCount = 0;
3163

3164
    // Reset bindings
3165
    commandBuffer->indexBuffer = NULL;
3166
    for (i = 0; i < MAX_TEXTURE_SAMPLERS_PER_STAGE; i += 1) {
3167
        commandBuffer->vertexSamplers[i] = nil;
3168
        commandBuffer->vertexTextures[i] = nil;
3169
        commandBuffer->fragmentSamplers[i] = nil;
3170
        commandBuffer->fragmentTextures[i] = nil;
3171
    }
3172
    for (i = 0; i < MAX_STORAGE_TEXTURES_PER_STAGE; i += 1) {
3173
        commandBuffer->vertexStorageTextures[i] = nil;
3174
        commandBuffer->fragmentStorageTextures[i] = nil;
3175
        commandBuffer->computeReadOnlyTextures[i] = nil;
3176
    }
3177
    for (i = 0; i < MAX_STORAGE_BUFFERS_PER_STAGE; i += 1) {
3178
        commandBuffer->vertexStorageBuffers[i] = nil;
3179
        commandBuffer->fragmentStorageBuffers[i] = nil;
3180
        commandBuffer->computeReadOnlyBuffers[i] = nil;
3181
    }
3182
    for (i = 0; i < MAX_COMPUTE_WRITE_TEXTURES; i += 1) {
3183
        commandBuffer->computeWriteOnlyTextures[i] = nil;
3184
    }
3185
    for (i = 0; i < MAX_COMPUTE_WRITE_BUFFERS; i += 1) {
3186
        commandBuffer->computeWriteOnlyBuffers[i] = nil;
3187
    }
3188

3189
    // The fence is now available (unless SubmitAndAcquireFence was called)
3190
    if (commandBuffer->autoReleaseFence) {
3191
        METAL_ReleaseFence(
3192
            (SDL_GPURenderer *)renderer,
3193
            (SDL_GPUFence *)commandBuffer->fence);
3194
    }
3195

3196
    // Return command buffer to pool
3197
    SDL_LockMutex(renderer->acquireCommandBufferLock);
3198
    // FIXME: Should this use EXPAND_IF_NEEDED?
3199
    if (renderer->availableCommandBufferCount == renderer->availableCommandBufferCapacity) {
3200
        renderer->availableCommandBufferCapacity += 1;
3201
        renderer->availableCommandBuffers = SDL_realloc(
3202
            renderer->availableCommandBuffers,
3203
            renderer->availableCommandBufferCapacity * sizeof(MetalCommandBuffer *));
3204
    }
3205
    renderer->availableCommandBuffers[renderer->availableCommandBufferCount] = commandBuffer;
3206
    renderer->availableCommandBufferCount += 1;
3207
    SDL_UnlockMutex(renderer->acquireCommandBufferLock);
3208

3209
    // Remove this command buffer from the submitted list
3210
    for (i = 0; i < renderer->submittedCommandBufferCount; i += 1) {
3211
        if (renderer->submittedCommandBuffers[i] == commandBuffer) {
3212
            renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1];
3213
            renderer->submittedCommandBufferCount -= 1;
3214
        }
3215
    }
3216
}
3217

3218
// This function assumes that it's called from within an autorelease pool
3219
static void METAL_INTERNAL_PerformPendingDestroys(
3220
    MetalRenderer *renderer)
3221
{
3222
    Sint32 referenceCount = 0;
3223
    Sint32 i;
3224
    Uint32 j;
3225

3226
    for (i = renderer->bufferContainersToDestroyCount - 1; i >= 0; i -= 1) {
3227
        referenceCount = 0;
3228
        for (j = 0; j < renderer->bufferContainersToDestroy[i]->bufferCount; j += 1) {
3229
            referenceCount += SDL_AtomicGet(&renderer->bufferContainersToDestroy[i]->buffers[j]->referenceCount);
3230
        }
3231

3232
        if (referenceCount == 0) {
3233
            METAL_INTERNAL_DestroyBufferContainer(
3234
                renderer->bufferContainersToDestroy[i]);
3235

3236
            renderer->bufferContainersToDestroy[i] = renderer->bufferContainersToDestroy[renderer->bufferContainersToDestroyCount - 1];
3237
            renderer->bufferContainersToDestroyCount -= 1;
3238
        }
3239
    }
3240

3241
    for (i = renderer->textureContainersToDestroyCount - 1; i >= 0; i -= 1) {
3242
        referenceCount = 0;
3243
        for (j = 0; j < renderer->textureContainersToDestroy[i]->textureCount; j += 1) {
3244
            referenceCount += SDL_AtomicGet(&renderer->textureContainersToDestroy[i]->textures[j]->referenceCount);
3245
        }
3246

3247
        if (referenceCount == 0) {
3248
            METAL_INTERNAL_DestroyTextureContainer(
3249
                renderer->textureContainersToDestroy[i]);
3250

3251
            renderer->textureContainersToDestroy[i] = renderer->textureContainersToDestroy[renderer->textureContainersToDestroyCount - 1];
3252
            renderer->textureContainersToDestroyCount -= 1;
3253
        }
3254
    }
3255
}
3256

3257
// Fences
3258

3259
static void METAL_WaitForFences(
3260
    SDL_GPURenderer *driverData,
3261
    bool waitAll,
3262
    SDL_GPUFence **pFences,
3263
    Uint32 fenceCount)
3264
{
3265
    @autoreleasepool {
3266
        MetalRenderer *renderer = (MetalRenderer *)driverData;
3267
        bool waiting;
3268

3269
        if (waitAll) {
3270
            for (Uint32 i = 0; i < fenceCount; i += 1) {
3271
                while (!SDL_AtomicGet(&((MetalFence *)pFences[i])->complete)) {
3272
                    // Spin!
3273
                }
3274
            }
3275
        } else {
3276
            waiting = 1;
3277
            while (waiting) {
3278
                for (Uint32 i = 0; i < fenceCount; i += 1) {
3279
                    if (SDL_AtomicGet(&((MetalFence *)pFences[i])->complete) > 0) {
3280
                        waiting = 0;
3281
                        break;
3282
                    }
3283
                }
3284
            }
3285
        }
3286

3287
        METAL_INTERNAL_PerformPendingDestroys(renderer);
3288
    }
3289
}
3290

3291
static bool METAL_QueryFence(
3292
    SDL_GPURenderer *driverData,
3293
    SDL_GPUFence *fence)
3294
{
3295
    MetalFence *metalFence = (MetalFence *)fence;
3296
    return SDL_AtomicGet(&metalFence->complete) == 1;
3297
}
3298

3299
// Window and Swapchain Management
3300

3301
static MetalWindowData *METAL_INTERNAL_FetchWindowData(SDL_Window *window)
3302
{
3303
    SDL_PropertiesID properties = SDL_GetWindowProperties(window);
3304
    return (MetalWindowData *)SDL_GetPointerProperty(properties, WINDOW_PROPERTY_DATA, NULL);
3305
}
3306

3307
static bool METAL_SupportsSwapchainComposition(
3308
    SDL_GPURenderer *driverData,
3309
    SDL_Window *window,
3310
    SDL_GPUSwapchainComposition swapchainComposition)
3311
{
3312
#ifndef SDL_PLATFORM_MACOS
3313
    if (swapchainComposition == SDL_GPU_SWAPCHAINCOMPOSITION_HDR10_ST2048) {
3314
        return false;
3315
    }
3316
#endif
3317

3318
    if (@available(macOS 11.0, *)) {
3319
        return true;
3320
    } else {
3321
        return swapchainComposition != SDL_GPU_SWAPCHAINCOMPOSITION_HDR10_ST2048;
3322
    }
3323
}
3324

3325
// This function assumes that it's called from within an autorelease pool
3326
static Uint8 METAL_INTERNAL_CreateSwapchain(
3327
    MetalRenderer *renderer,
3328
    MetalWindowData *windowData,
3329
    SDL_GPUSwapchainComposition swapchainComposition,
3330
    SDL_GPUPresentMode presentMode)
3331
{
3332
    CGColorSpaceRef colorspace;
3333
    CGSize drawableSize;
3334

3335
    windowData->view = SDL_Metal_CreateView(windowData->window);
3336
    windowData->drawable = nil;
3337

3338
    windowData->layer = (__bridge CAMetalLayer *)(SDL_Metal_GetLayer(windowData->view));
3339
    windowData->layer.device = renderer->device;
3340
#ifdef SDL_PLATFORM_MACOS
3341
    windowData->layer.displaySyncEnabled = (presentMode != SDL_GPU_PRESENTMODE_IMMEDIATE);
3342
#endif
3343
    windowData->layer.pixelFormat = SDLToMetal_SurfaceFormat[SwapchainCompositionToFormat[swapchainComposition]];
3344
#ifndef SDL_PLATFORM_TVOS
3345
    windowData->layer.wantsExtendedDynamicRangeContent = (swapchainComposition != SDL_GPU_SWAPCHAINCOMPOSITION_SDR);
3346
#endif
3347

3348
    colorspace = CGColorSpaceCreateWithName(SwapchainCompositionToColorSpace[swapchainComposition]);
3349
    windowData->layer.colorspace = colorspace;
3350
    CGColorSpaceRelease(colorspace);
3351

3352
    windowData->texture.handle = nil; // This will be set in AcquireSwapchainTexture.
3353

3354
    // Precache blit pipelines for the swapchain format
3355
    for (Uint32 i = 0; i < 4; i += 1) {
3356
        SDL_GPU_FetchBlitPipeline(
3357
            renderer->sdlGPUDevice,
3358
            (SDL_GPUTextureType)i,
3359
            SwapchainCompositionToFormat[swapchainComposition],
3360
            renderer->blitVertexShader,
3361
            renderer->blitFrom2DShader,
3362
            renderer->blitFrom2DArrayShader,
3363
            renderer->blitFrom3DShader,
3364
            renderer->blitFromCubeShader,
3365
            &renderer->blitPipelines,
3366
            &renderer->blitPipelineCount,
3367
            &renderer->blitPipelineCapacity);
3368
    }
3369

3370
    // Set up the texture container
3371
    SDL_zero(windowData->textureContainer);
3372
    windowData->textureContainer.canBeCycled = 0;
3373
    windowData->textureContainer.activeTexture = &windowData->texture;
3374
    windowData->textureContainer.textureCapacity = 1;
3375
    windowData->textureContainer.textureCount = 1;
3376
    windowData->textureContainer.header.info.format = SwapchainCompositionToFormat[swapchainComposition];
3377
    windowData->textureContainer.header.info.levelCount = 1;
3378
    windowData->textureContainer.header.info.layerCountOrDepth = 1;
3379
    windowData->textureContainer.header.info.type = SDL_GPU_TEXTURETYPE_2D;
3380
    windowData->textureContainer.header.info.usageFlags = SDL_GPU_TEXTUREUSAGE_COLOR_TARGET;
3381

3382
    drawableSize = windowData->layer.drawableSize;
3383
    windowData->textureContainer.header.info.width = (Uint32)drawableSize.width;
3384
    windowData->textureContainer.header.info.height = (Uint32)drawableSize.height;
3385

3386
    return 1;
3387
}
3388

3389
static bool METAL_SupportsPresentMode(
3390
    SDL_GPURenderer *driverData,
3391
    SDL_Window *window,
3392
    SDL_GPUPresentMode presentMode)
3393
{
3394
    switch (presentMode) {
3395
#ifdef SDL_PLATFORM_MACOS
3396
    case SDL_GPU_PRESENTMODE_IMMEDIATE:
3397
#endif
3398
    case SDL_GPU_PRESENTMODE_VSYNC:
3399
        return true;
3400
    default:
3401
        return false;
3402
    }
3403
}
3404

3405
static bool METAL_ClaimWindow(
3406
    SDL_GPURenderer *driverData,
3407
    SDL_Window *window)
3408
{
3409
    @autoreleasepool {
3410
        MetalRenderer *renderer = (MetalRenderer *)driverData;
3411
        MetalWindowData *windowData = METAL_INTERNAL_FetchWindowData(window);
3412

3413
        if (windowData == NULL) {
3414
            windowData = (MetalWindowData *)SDL_calloc(1, sizeof(MetalWindowData));
3415
            windowData->window = window;
3416

3417
            if (METAL_INTERNAL_CreateSwapchain(renderer, windowData, SDL_GPU_SWAPCHAINCOMPOSITION_SDR, SDL_GPU_PRESENTMODE_VSYNC)) {
3418
                SDL_SetPointerProperty(SDL_GetWindowProperties(window), WINDOW_PROPERTY_DATA, windowData);
3419

3420
                SDL_LockMutex(renderer->windowLock);
3421

3422
                if (renderer->claimedWindowCount >= renderer->claimedWindowCapacity) {
3423
                    renderer->claimedWindowCapacity *= 2;
3424
                    renderer->claimedWindows = SDL_realloc(
3425
                        renderer->claimedWindows,
3426
                        renderer->claimedWindowCapacity * sizeof(MetalWindowData *));
3427
                }
3428
                renderer->claimedWindows[renderer->claimedWindowCount] = windowData;
3429
                renderer->claimedWindowCount += 1;
3430

3431
                SDL_UnlockMutex(renderer->windowLock);
3432

3433
                return true;
3434
            } else {
3435
                SDL_LogError(SDL_LOG_CATEGORY_GPU, "Could not create swapchain, failed to claim window!");
3436
                SDL_free(windowData);
3437
                return false;
3438
            }
3439
        } else {
3440
            SDL_LogWarn(SDL_LOG_CATEGORY_GPU, "Window already claimed!");
3441
            return false;
3442
        }
3443
    }
3444
}
3445

3446
static void METAL_ReleaseWindow(
3447
    SDL_GPURenderer *driverData,
3448
    SDL_Window *window)
3449
{
3450
    @autoreleasepool {
3451
        MetalRenderer *renderer = (MetalRenderer *)driverData;
3452
        MetalWindowData *windowData = METAL_INTERNAL_FetchWindowData(window);
3453

3454
        if (windowData == NULL) {
3455
            return;
3456
        }
3457

3458
        METAL_Wait(driverData);
3459
        SDL_Metal_DestroyView(windowData->view);
3460

3461
        SDL_LockMutex(renderer->windowLock);
3462
        for (Uint32 i = 0; i < renderer->claimedWindowCount; i += 1) {
3463
            if (renderer->claimedWindows[i]->window == window) {
3464
                renderer->claimedWindows[i] = renderer->claimedWindows[renderer->claimedWindowCount - 1];
3465
                renderer->claimedWindowCount -= 1;
3466
                break;
3467
            }
3468
        }
3469
        SDL_UnlockMutex(renderer->windowLock);
3470

3471
        SDL_free(windowData);
3472

3473
        SDL_ClearProperty(SDL_GetWindowProperties(window), WINDOW_PROPERTY_DATA);
3474
    }
3475
}
3476

3477
static SDL_GPUTexture *METAL_AcquireSwapchainTexture(
3478
    SDL_GPUCommandBuffer *commandBuffer,
3479
    SDL_Window *window,
3480
    Uint32 *pWidth,
3481
    Uint32 *pHeight)
3482
{
3483
    @autoreleasepool {
3484
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
3485
        MetalWindowData *windowData;
3486
        CGSize drawableSize;
3487

3488
        windowData = METAL_INTERNAL_FetchWindowData(window);
3489
        if (windowData == NULL) {
3490
            return NULL;
3491
        }
3492

3493
        // Get the drawable and its underlying texture
3494
        windowData->drawable = [windowData->layer nextDrawable];
3495
        windowData->texture.handle = [windowData->drawable texture];
3496

3497
        // Update the window size
3498
        drawableSize = windowData->layer.drawableSize;
3499
        windowData->textureContainer.header.info.width = (Uint32)drawableSize.width;
3500
        windowData->textureContainer.header.info.height = (Uint32)drawableSize.height;
3501

3502
        // Send the dimensions to the out parameters.
3503
        *pWidth = (Uint32)drawableSize.width;
3504
        *pHeight = (Uint32)drawableSize.height;
3505

3506
        // Set up presentation
3507
        if (metalCommandBuffer->windowDataCount == metalCommandBuffer->windowDataCapacity) {
3508
            metalCommandBuffer->windowDataCapacity += 1;
3509
            metalCommandBuffer->windowDatas = SDL_realloc(
3510
                metalCommandBuffer->windowDatas,
3511
                metalCommandBuffer->windowDataCapacity * sizeof(MetalWindowData *));
3512
        }
3513
        metalCommandBuffer->windowDatas[metalCommandBuffer->windowDataCount] = windowData;
3514
        metalCommandBuffer->windowDataCount += 1;
3515

3516
        // Return the swapchain texture
3517
        return (SDL_GPUTexture *)&windowData->textureContainer;
3518
    }
3519
}
3520

3521
static SDL_GPUTextureFormat METAL_GetSwapchainTextureFormat(
3522
    SDL_GPURenderer *driverData,
3523
    SDL_Window *window)
3524
{
3525
    MetalWindowData *windowData = METAL_INTERNAL_FetchWindowData(window);
3526

3527
    if (windowData == NULL) {
3528
        SDL_LogError(SDL_LOG_CATEGORY_GPU, "Cannot get swapchain format, window has not been claimed!");
3529
        return 0;
3530
    }
3531

3532
    return windowData->textureContainer.header.info.format;
3533
}
3534

3535
static bool METAL_SetSwapchainParameters(
3536
    SDL_GPURenderer *driverData,
3537
    SDL_Window *window,
3538
    SDL_GPUSwapchainComposition swapchainComposition,
3539
    SDL_GPUPresentMode presentMode)
3540
{
3541
    @autoreleasepool {
3542
        MetalWindowData *windowData = METAL_INTERNAL_FetchWindowData(window);
3543
        CGColorSpaceRef colorspace;
3544

3545
        if (windowData == NULL) {
3546
            SDL_LogError(SDL_LOG_CATEGORY_GPU, "Cannot set swapchain parameters, window has not been claimed!");
3547
            return false;
3548
        }
3549

3550
        if (!METAL_SupportsSwapchainComposition(driverData, window, swapchainComposition)) {
3551
            SDL_LogError(SDL_LOG_CATEGORY_GPU, "Swapchain composition not supported!");
3552
            return false;
3553
        }
3554

3555
        if (!METAL_SupportsPresentMode(driverData, window, presentMode)) {
3556
            SDL_LogError(SDL_LOG_CATEGORY_GPU, "Present mode not supported!");
3557
            return false;
3558
        }
3559

3560
        METAL_Wait(driverData);
3561

3562
#ifdef SDL_PLATFORM_MACOS
3563
        windowData->layer.displaySyncEnabled = (presentMode != SDL_GPU_PRESENTMODE_IMMEDIATE);
3564
#endif
3565
        windowData->layer.pixelFormat = SDLToMetal_SurfaceFormat[SwapchainCompositionToFormat[swapchainComposition]];
3566
#ifndef SDL_PLATFORM_TVOS
3567
        windowData->layer.wantsExtendedDynamicRangeContent = (swapchainComposition != SDL_GPU_SWAPCHAINCOMPOSITION_SDR);
3568
#endif
3569

3570
        colorspace = CGColorSpaceCreateWithName(SwapchainCompositionToColorSpace[swapchainComposition]);
3571
        windowData->layer.colorspace = colorspace;
3572
        CGColorSpaceRelease(colorspace);
3573

3574
        windowData->textureContainer.header.info.format = SwapchainCompositionToFormat[swapchainComposition];
3575

3576
        return true;
3577
    }
3578
}
3579

3580
// Submission
3581

3582
static void METAL_Submit(
3583
    SDL_GPUCommandBuffer *commandBuffer)
3584
{
3585
    @autoreleasepool {
3586
        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
3587
        MetalRenderer *renderer = metalCommandBuffer->renderer;
3588

3589
        SDL_LockMutex(renderer->submitLock);
3590

3591
        // Enqueue present requests, if applicable
3592
        for (Uint32 i = 0; i < metalCommandBuffer->windowDataCount; i += 1) {
3593
            [metalCommandBuffer->handle presentDrawable:metalCommandBuffer->windowDatas[i]->drawable];
3594
            metalCommandBuffer->windowDatas[i]->drawable = nil;
3595
        }
3596

3597
        // Notify the fence when the command buffer has completed
3598
        [metalCommandBuffer->handle addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
3599
          SDL_AtomicIncRef(&metalCommandBuffer->fence->complete);
3600
        }];
3601

3602
        // Submit the command buffer
3603
        [metalCommandBuffer->handle commit];
3604
        metalCommandBuffer->handle = nil;
3605

3606
        // Mark the command buffer as submitted
3607
        if (renderer->submittedCommandBufferCount >= renderer->submittedCommandBufferCapacity) {
3608
            renderer->submittedCommandBufferCapacity = renderer->submittedCommandBufferCount + 1;
3609

3610
            renderer->submittedCommandBuffers = SDL_realloc(
3611
                renderer->submittedCommandBuffers,
3612
                sizeof(MetalCommandBuffer *) * renderer->submittedCommandBufferCapacity);
3613
        }
3614
        renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount] = metalCommandBuffer;
3615
        renderer->submittedCommandBufferCount += 1;
3616

3617
        // Check if we can perform any cleanups
3618
        for (Sint32 i = renderer->submittedCommandBufferCount - 1; i >= 0; i -= 1) {
3619
            if (SDL_AtomicGet(&renderer->submittedCommandBuffers[i]->fence->complete)) {
3620
                METAL_INTERNAL_CleanCommandBuffer(
3621
                    renderer,
3622
                    renderer->submittedCommandBuffers[i]);
3623
            }
3624
        }
3625

3626
        METAL_INTERNAL_PerformPendingDestroys(renderer);
3627

3628
        SDL_UnlockMutex(renderer->submitLock);
3629
    }
3630
}
3631

3632
static SDL_GPUFence *METAL_SubmitAndAcquireFence(
3633
    SDL_GPUCommandBuffer *commandBuffer)
3634
{
3635
    MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
3636
    MetalFence *fence = metalCommandBuffer->fence;
3637

3638
    metalCommandBuffer->autoReleaseFence = 0;
3639
    METAL_Submit(commandBuffer);
3640

3641
    return (SDL_GPUFence *)fence;
3642
}
3643

3644
static void METAL_Wait(
3645
    SDL_GPURenderer *driverData)
3646
{
3647
    @autoreleasepool {
3648
        MetalRenderer *renderer = (MetalRenderer *)driverData;
3649
        MetalCommandBuffer *commandBuffer;
3650

3651
        /*
3652
         * Wait for all submitted command buffers to complete.
3653
         * Sort of equivalent to vkDeviceWaitIdle.
3654
         */
3655
        for (Uint32 i = 0; i < renderer->submittedCommandBufferCount; i += 1) {
3656
            while (!SDL_AtomicGet(&renderer->submittedCommandBuffers[i]->fence->complete)) {
3657
                // Spin!
3658
            }
3659
        }
3660

3661
        SDL_LockMutex(renderer->submitLock);
3662

3663
        for (Sint32 i = renderer->submittedCommandBufferCount - 1; i >= 0; i -= 1) {
3664
            commandBuffer = renderer->submittedCommandBuffers[i];
3665
            METAL_INTERNAL_CleanCommandBuffer(renderer, commandBuffer);
3666
        }
3667

3668
        METAL_INTERNAL_PerformPendingDestroys(renderer);
3669

3670
        SDL_UnlockMutex(renderer->submitLock);
3671
    }
3672
}
3673

3674
// Format Info
3675

3676
static bool METAL_SupportsTextureFormat(
3677
    SDL_GPURenderer *driverData,
3678
    SDL_GPUTextureFormat format,
3679
    SDL_GPUTextureType type,
3680
    SDL_GPUTextureUsageFlags usage)
3681
{
3682
    @autoreleasepool {
3683
        MetalRenderer *renderer = (MetalRenderer *)driverData;
3684

3685
        // Only depth textures can be used as... depth textures
3686
        if ((usage & SDL_GPU_TEXTUREUSAGE_DEPTH_STENCIL_TARGET)) {
3687
            if (!IsDepthFormat(format)) {
3688
                return false;
3689
            }
3690
        }
3691

3692
        switch (format) {
3693
        // Apple GPU exclusive
3694
        case SDL_GPU_TEXTUREFORMAT_B5G6R5_UNORM:
3695
        case SDL_GPU_TEXTUREFORMAT_B5G5R5A1_UNORM:
3696
        case SDL_GPU_TEXTUREFORMAT_B4G4R4A4_UNORM:
3697
            return [renderer->device supportsFamily:MTLGPUFamilyApple1];
3698

3699
        // Requires BC compression support
3700
        case SDL_GPU_TEXTUREFORMAT_BC1_UNORM:
3701
        case SDL_GPU_TEXTUREFORMAT_BC2_UNORM:
3702
        case SDL_GPU_TEXTUREFORMAT_BC3_UNORM:
3703
        case SDL_GPU_TEXTUREFORMAT_BC7_UNORM:
3704
        case SDL_GPU_TEXTUREFORMAT_BC3_UNORM_SRGB:
3705
        case SDL_GPU_TEXTUREFORMAT_BC7_UNORM_SRGB:
3706
#ifdef SDL_PLATFORM_MACOS
3707
            if (@available(macOS 11.0, *)) {
3708
                return (
3709
                    [renderer->device supportsBCTextureCompression] &&
3710
                    !(usage & SDL_GPU_TEXTUREUSAGE_COLOR_TARGET));
3711
            } else {
3712
                return false;
3713
            }
3714
#else
3715
            // FIXME: iOS 16.4+ allows these formats!
3716
            return false;
3717
#endif
3718

3719
        // Requires D24S8 support
3720
        case SDL_GPU_TEXTUREFORMAT_D24_UNORM:
3721
        case SDL_GPU_TEXTUREFORMAT_D24_UNORM_S8_UINT:
3722
#ifdef SDL_PLATFORM_MACOS
3723
            return [renderer->device isDepth24Stencil8PixelFormatSupported];
3724
#else
3725
            return false;
3726
#endif
3727

3728
        default:
3729
            return true;
3730
        }
3731
    }
3732
}
3733

3734
// Device Creation
3735

3736
static bool METAL_PrepareDriver(SDL_VideoDevice *_this)
3737
{
3738
    // FIXME: Add a macOS / iOS version check! Maybe support >= 10.14?
3739
    return (_this->Metal_CreateView != NULL);
3740
}
3741

3742
static void METAL_INTERNAL_InitBlitResources(
3743
    MetalRenderer *renderer)
3744
{
3745
    SDL_GPUShaderCreateInfo shaderModuleCreateInfo;
3746
    SDL_GPUSamplerCreateInfo samplerCreateInfo;
3747

3748
    // Allocate the dynamic blit pipeline list
3749
    renderer->blitPipelineCapacity = 2;
3750
    renderer->blitPipelineCount = 0;
3751
    renderer->blitPipelines = SDL_malloc(
3752
        renderer->blitPipelineCapacity * sizeof(BlitPipelineCacheEntry));
3753

3754
    // Fullscreen vertex shader
3755
    SDL_zero(shaderModuleCreateInfo);
3756
    shaderModuleCreateInfo.code = FullscreenVert_metallib;
3757
    shaderModuleCreateInfo.codeSize = FullscreenVert_metallib_len;
3758
    shaderModuleCreateInfo.stage = SDL_GPU_SHADERSTAGE_VERTEX;
3759
    shaderModuleCreateInfo.format = SDL_GPU_SHADERFORMAT_METALLIB;
3760
    shaderModuleCreateInfo.entryPointName = "FullscreenVert";
3761

3762
    renderer->blitVertexShader = METAL_CreateShader(
3763
        (SDL_GPURenderer *)renderer,
3764
        &shaderModuleCreateInfo);
3765

3766
    if (renderer->blitVertexShader == NULL) {
3767
        SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to compile vertex shader for blit!");
3768
    }
3769

3770
    // BlitFrom2D fragment shader
3771
    shaderModuleCreateInfo.code = BlitFrom2D_metallib;
3772
    shaderModuleCreateInfo.codeSize = BlitFrom2D_metallib_len;
3773
    shaderModuleCreateInfo.stage = SDL_GPU_SHADERSTAGE_FRAGMENT;
3774
    shaderModuleCreateInfo.entryPointName = "BlitFrom2D";
3775
    shaderModuleCreateInfo.samplerCount = 1;
3776
    shaderModuleCreateInfo.uniformBufferCount = 1;
3777

3778
    renderer->blitFrom2DShader = METAL_CreateShader(
3779
        (SDL_GPURenderer *)renderer,
3780
        &shaderModuleCreateInfo);
3781

3782
    if (renderer->blitFrom2DShader == NULL) {
3783
        SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to compile BlitFrom2D fragment shader!");
3784
    }
3785

3786
    // BlitFrom2DArray fragment shader
3787
    shaderModuleCreateInfo.code = BlitFrom2DArray_metallib;
3788
    shaderModuleCreateInfo.codeSize = BlitFrom2DArray_metallib_len;
3789
    shaderModuleCreateInfo.entryPointName = "BlitFrom2DArray";
3790

3791
    renderer->blitFrom2DArrayShader = METAL_CreateShader(
3792
        (SDL_GPURenderer *)renderer,
3793
        &shaderModuleCreateInfo);
3794

3795
    if (renderer->blitFrom2DArrayShader == NULL) {
3796
        SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to compile BlitFrom2DArray fragment shader!");
3797
    }
3798

3799
    // BlitFrom3D fragment shader
3800
    shaderModuleCreateInfo.code = BlitFrom3D_metallib;
3801
    shaderModuleCreateInfo.codeSize = BlitFrom3D_metallib_len;
3802
    shaderModuleCreateInfo.entryPointName = "BlitFrom3D";
3803

3804
    renderer->blitFrom3DShader = METAL_CreateShader(
3805
        (SDL_GPURenderer *)renderer,
3806
        &shaderModuleCreateInfo);
3807

3808
    if (renderer->blitFrom3DShader == NULL) {
3809
        SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to compile BlitFrom3D fragment shader!");
3810
    }
3811

3812
    // BlitFromCube fragment shader
3813
    shaderModuleCreateInfo.code = BlitFromCube_metallib;
3814
    shaderModuleCreateInfo.codeSize = BlitFromCube_metallib_len;
3815
    shaderModuleCreateInfo.entryPointName = "BlitFromCube";
3816

3817
    renderer->blitFromCubeShader = METAL_CreateShader(
3818
        (SDL_GPURenderer *)renderer,
3819
        &shaderModuleCreateInfo);
3820

3821
    if (renderer->blitFromCubeShader == NULL) {
3822
        SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to compile BlitFromCube fragment shader!");
3823
    }
3824

3825
    // Create samplers
3826
    samplerCreateInfo.addressModeU = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE;
3827
    samplerCreateInfo.addressModeV = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE;
3828
    samplerCreateInfo.addressModeW = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE;
3829
    samplerCreateInfo.anisotropyEnable = 0;
3830
    samplerCreateInfo.compareEnable = 0;
3831
    samplerCreateInfo.magFilter = SDL_GPU_FILTER_NEAREST;
3832
    samplerCreateInfo.minFilter = SDL_GPU_FILTER_NEAREST;
3833
    samplerCreateInfo.mipmapMode = SDL_GPU_SAMPLERMIPMAPMODE_NEAREST;
3834
    samplerCreateInfo.mipLodBias = 0.0f;
3835
    samplerCreateInfo.minLod = 0;
3836
    samplerCreateInfo.maxLod = 1000;
3837
    samplerCreateInfo.maxAnisotropy = 1.0f;
3838
    samplerCreateInfo.compareOp = SDL_GPU_COMPAREOP_ALWAYS;
3839

3840
    renderer->blitNearestSampler = METAL_CreateSampler(
3841
        (SDL_GPURenderer *)renderer,
3842
        &samplerCreateInfo);
3843

3844
    if (renderer->blitNearestSampler == NULL) {
3845
        SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to create blit nearest sampler!");
3846
    }
3847

3848
    samplerCreateInfo.magFilter = SDL_GPU_FILTER_LINEAR;
3849
    samplerCreateInfo.minFilter = SDL_GPU_FILTER_LINEAR;
3850
    samplerCreateInfo.mipmapMode = SDL_GPU_SAMPLERMIPMAPMODE_LINEAR;
3851

3852
    renderer->blitLinearSampler = METAL_CreateSampler(
3853
        (SDL_GPURenderer *)renderer,
3854
        &samplerCreateInfo);
3855

3856
    if (renderer->blitLinearSampler == NULL) {
3857
        SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to create blit linear sampler!");
3858
    }
3859
}
3860

3861
static void METAL_INTERNAL_DestroyBlitResources(
3862
    SDL_GPURenderer *driverData)
3863
{
3864
    MetalRenderer *renderer = (MetalRenderer *)driverData;
3865
    METAL_ReleaseSampler(driverData, renderer->blitLinearSampler);
3866
    METAL_ReleaseSampler(driverData, renderer->blitNearestSampler);
3867
    METAL_ReleaseShader(driverData, renderer->blitVertexShader);
3868
    METAL_ReleaseShader(driverData, renderer->blitFrom2DShader);
3869
    METAL_ReleaseShader(driverData, renderer->blitFrom2DArrayShader);
3870
    METAL_ReleaseShader(driverData, renderer->blitFrom3DShader);
3871
    METAL_ReleaseShader(driverData, renderer->blitFromCubeShader);
3872

3873
    for (Uint32 i = 0; i < renderer->blitPipelineCount; i += 1) {
3874
        METAL_ReleaseGraphicsPipeline(driverData, renderer->blitPipelines[i].pipeline);
3875
    }
3876
    SDL_free(renderer->blitPipelines);
3877
}
3878

3879
static SDL_GPUDevice *METAL_CreateDevice(bool debugMode, bool preferLowPower, SDL_PropertiesID props)
3880
{
3881
    @autoreleasepool {
3882
        MetalRenderer *renderer;
3883

3884
        // Allocate and zero out the renderer
3885
        renderer = (MetalRenderer *)SDL_calloc(1, sizeof(MetalRenderer));
3886

3887
        // Create the Metal device and command queue
3888
#ifdef SDL_PLATFORM_MACOS
3889
        if (preferLowPower) {
3890
            NSArray<id<MTLDevice>> *devices = MTLCopyAllDevices();
3891
            for (id<MTLDevice> device in devices) {
3892
                if (device.isLowPower) {
3893
                    renderer->device = device;
3894
                    break;
3895
                }
3896
            }
3897
        }
3898
#endif
3899
        if (renderer->device == NULL) {
3900
            renderer->device = MTLCreateSystemDefaultDevice();
3901
        }
3902
        renderer->queue = [renderer->device newCommandQueue];
3903

3904
        // Print driver info
3905
        SDL_LogInfo(SDL_LOG_CATEGORY_GPU, "SDL_GPU Driver: Metal");
3906
        SDL_LogInfo(
3907
            SDL_LOG_CATEGORY_GPU,
3908
            "Metal Device: %s",
3909
            [renderer->device.name UTF8String]);
3910

3911
        // Remember debug mode
3912
        renderer->debugMode = debugMode;
3913

3914
        // Set up colorspace array
3915
        SwapchainCompositionToColorSpace[0] = kCGColorSpaceSRGB;
3916
        SwapchainCompositionToColorSpace[1] = kCGColorSpaceSRGB;
3917
        SwapchainCompositionToColorSpace[2] = kCGColorSpaceExtendedLinearSRGB;
3918
        if (@available(macOS 11.0, *)) {
3919
            SwapchainCompositionToColorSpace[3] = kCGColorSpaceITUR_2100_PQ;
3920
        } else {
3921
            SwapchainCompositionToColorSpace[3] = NULL;
3922
        }
3923

3924
        // Create mutexes
3925
        renderer->submitLock = SDL_CreateMutex();
3926
        renderer->acquireCommandBufferLock = SDL_CreateMutex();
3927
        renderer->acquireUniformBufferLock = SDL_CreateMutex();
3928
        renderer->disposeLock = SDL_CreateMutex();
3929
        renderer->fenceLock = SDL_CreateMutex();
3930
        renderer->windowLock = SDL_CreateMutex();
3931

3932
        // Create command buffer pool
3933
        METAL_INTERNAL_AllocateCommandBuffers(renderer, 2);
3934

3935
        // Create fence pool
3936
        renderer->availableFenceCapacity = 2;
3937
        renderer->availableFences = SDL_malloc(
3938
            sizeof(MetalFence *) * renderer->availableFenceCapacity);
3939

3940
        // Create uniform buffer pool
3941
        renderer->uniformBufferPoolCapacity = 32;
3942
        renderer->uniformBufferPoolCount = 32;
3943
        renderer->uniformBufferPool = SDL_malloc(
3944
            renderer->uniformBufferPoolCapacity * sizeof(MetalUniformBuffer *));
3945

3946
        for (Uint32 i = 0; i < renderer->uniformBufferPoolCount; i += 1) {
3947
            renderer->uniformBufferPool[i] = METAL_INTERNAL_CreateUniformBuffer(
3948
                renderer,
3949
                UNIFORM_BUFFER_SIZE);
3950
        }
3951

3952
        // Create deferred destroy arrays
3953
        renderer->bufferContainersToDestroyCapacity = 2;
3954
        renderer->bufferContainersToDestroyCount = 0;
3955
        renderer->bufferContainersToDestroy = SDL_malloc(
3956
            renderer->bufferContainersToDestroyCapacity * sizeof(MetalBufferContainer *));
3957

3958
        renderer->textureContainersToDestroyCapacity = 2;
3959
        renderer->textureContainersToDestroyCount = 0;
3960
        renderer->textureContainersToDestroy = SDL_malloc(
3961
            renderer->textureContainersToDestroyCapacity * sizeof(MetalTextureContainer *));
3962

3963
        // Create claimed window list
3964
        renderer->claimedWindowCapacity = 1;
3965
        renderer->claimedWindows = SDL_malloc(
3966
            sizeof(MetalWindowData *) * renderer->claimedWindowCapacity);
3967

3968
        // Initialize blit resources
3969
        METAL_INTERNAL_InitBlitResources(renderer);
3970

3971
        SDL_GPUDevice *result = SDL_malloc(sizeof(SDL_GPUDevice));
3972
        ASSIGN_DRIVER(METAL)
3973
        result->driverData = (SDL_GPURenderer *)renderer;
3974
        renderer->sdlGPUDevice = result;
3975

3976
        return result;
3977
    }
3978
}
3979

3980
SDL_GPUBootstrap MetalDriver = {
3981
    "metal",
3982
    SDL_GPU_DRIVER_METAL,
3983
    SDL_GPU_SHADERFORMAT_MSL | SDL_GPU_SHADERFORMAT_METALLIB,
3984
    METAL_PrepareDriver,
3985
    METAL_CreateDevice
3986
};
3987

3988
#endif // SDL_GPU_METAL
3989

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.