1
// Copyright (c) Microsoft Corporation. All rights reserved.
2
// Licensed under the MIT License.
4
import { WebGpuBackend } from '../backend-webgpu';
5
import { LOG_DEBUG } from '../log';
7
import { GpuData, GpuDataId, GpuDataType } from './types';
10
* manages GpuDataId -> GpuBuffer
12
export interface GpuDataManager {
14
* copy data from CPU to GPU.
16
upload(id: GpuDataId, data: Uint8Array): void;
18
* copy data from GPU to GPU.
20
memcpy(sourceId: GpuDataId, destinationId: GpuDataId): void;
22
* create new data on GPU.
24
create(size: number, usage?: number): GpuData;
28
get(id: GpuDataId): GpuData | undefined;
30
* release the data on GPU by ID.
32
* @return size of the data released
34
release(id: GpuDataId): number;
36
* copy data from GPU to CPU.
38
download(id: GpuDataId, getTargetBuffer: () => Uint8Array): Promise<void>;
41
* refresh the buffers that marked for release.
43
* when release() is called, the buffer is not released immediately. this is because we need to wait for the commands
44
* to be submitted to the GPU. this function is called after the commands are submitted so that the buffers can be
47
refreshPendingBuffers(): void;
50
* register an external buffer for IO Binding. If the buffer is already registered, return the existing GPU data ID.
52
* GPU data manager only manages a mapping between the buffer and the GPU data ID. It will not manage the lifecycle of
53
* the external buffer.
55
registerExternalBuffer(buffer: GPUBuffer, originalSize: number, previousBuffer?: GPUBuffer): number;
58
* unregister an external buffer for IO Binding.
60
unregisterExternalBuffer(buffer: GPUBuffer): void;
63
* destroy all gpu buffers.
68
* release session related data.
69
* @param sessionId - specify the session ID.
71
onReleaseSession(sessionId: number): void;
74
interface StorageCacheValue {
79
const bucketFreelist: Map<number, number> = new Map([
103
// we don't want to cache the bucket sizes below but not caching them
104
// results in some major performance hits for models like sd-turbo.
110
const bucketArr: number[] = [];
113
* normalize the buffer size so that it fits the 128-bits (16 bytes) alignment.
115
const calcNormalizedBufferSize = (size: number) => Math.ceil(size / 16) * 16;
118
* calculate the buffer size so that it fits into buckets.
120
const calcBucketBufferSize = (size: number) => {
121
for (let idx = 0; idx < bucketArr.length; idx++) {
122
const sizeForBucket = bucketArr[idx];
123
if (size <= sizeForBucket) {
124
return sizeForBucket;
127
// not in bucket list -> caller will not cache, round up to 16.
128
return Math.ceil(size / 16) * 16;
132
const createNewGpuDataId = () => guid++;
135
* exported standard download function. This function is used by the session to download the data from GPU, and also by
136
* factory to create GPU tensors with the capacity of downloading data from GPU.
138
* @param backend - the WebGPU backend
139
* @param gpuBuffer - the GPU buffer to download
140
* @param originalSize - the original size of the data
141
* @param getTargetBuffer - optional. If provided, the data will be copied to the target buffer. Otherwise, a new buffer
142
* will be created and returned.
144
export const downloadGpuData = async (
145
backend: WebGpuBackend,
146
gpuBuffer: GPUBuffer,
147
originalSize: number,
148
getTargetBuffer?: () => Uint8Array,
149
): Promise<Uint8Array> => {
150
const bufferSize = calcNormalizedBufferSize(originalSize);
151
const gpuReadBuffer = backend.device.createBuffer(
152
// eslint-disable-next-line no-bitwise
153
{ size: bufferSize, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ },
156
const commandEncoder = backend.getCommandEncoder();
157
backend.endComputePass();
158
commandEncoder.copyBufferToBuffer(
159
gpuBuffer /* source buffer */,
160
0 /* source offset */,
161
gpuReadBuffer /* destination buffer */,
162
0 /* destination offset */,
163
bufferSize /* size */,
167
await gpuReadBuffer.mapAsync(GPUMapMode.READ);
169
const arrayBuffer = gpuReadBuffer.getMappedRange();
170
if (getTargetBuffer) {
171
// if we already have a CPU buffer to accept the data, no need to clone the ArrayBuffer.
172
const targetBuffer = getTargetBuffer();
173
targetBuffer.set(new Uint8Array(arrayBuffer, 0, originalSize));
176
// the mapped ArrayBuffer will be released when the GPU buffer is destroyed. Need to clone the
178
return new Uint8Array(arrayBuffer.slice(0, originalSize));
181
gpuReadBuffer.destroy();
185
class GpuDataManagerImpl implements GpuDataManager {
186
// GPU Data ID => GPU Data ( storage buffer )
187
private storageCache: Map<GpuDataId, StorageCacheValue>;
189
// pending buffers for uploading ( data is unmapped )
190
private buffersForUploadingPending: GPUBuffer[];
191
// pending buffers for computing
192
private buffersPending: GPUBuffer[];
194
// The reusable storage buffers for computing.
195
private freeBuffers: Map<number, GPUBuffer[]>;
196
// The reusable uniform buffers
197
private freeUniformBuffers: Map<number, GPUBuffer[]>;
199
// The external buffers registered users for IO Binding.
200
private externalBuffers: Map<GPUBuffer, GpuDataId>;
202
// The pendingBuffers for capture graph.
203
// a SessionID -> GPUBuffer[] mapping.
204
private capturedPendingBuffers: Map<number, GPUBuffer[]>;
206
constructor(private backend: WebGpuBackend) {
207
this.storageCache = new Map();
208
this.freeBuffers = new Map();
209
this.freeUniformBuffers = new Map();
210
this.buffersForUploadingPending = [];
211
this.buffersPending = [];
212
this.externalBuffers = new Map();
213
this.capturedPendingBuffers = new Map();
215
for (const [key] of bucketFreelist) {
217
this.freeBuffers.set(key, []);
218
this.freeUniformBuffers.set(key, []);
222
upload(id: GpuDataId, data: Uint8Array): void {
223
const srcArrayBuffer = data.buffer;
224
const srcOffset = data.byteOffset;
225
const srcLength = data.byteLength;
226
const size = calcNormalizedBufferSize(srcLength);
228
// get destination gpu buffer
229
const gpuDataCache = this.storageCache.get(id);
231
throw new Error('gpu data for uploading does not exist');
233
if (gpuDataCache.originalSize !== srcLength) {
234
throw new Error(`inconsistent data size. gpu data size=${gpuDataCache.originalSize}, data size=${srcLength}`);
238
const gpuBufferForUploading = this.backend.device.createBuffer(
239
// eslint-disable-next-line no-bitwise
240
{ mappedAtCreation: true, size, usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC },
243
// copy (upload) data
244
const arrayBuffer = gpuBufferForUploading.getMappedRange();
245
new Uint8Array(arrayBuffer).set(new Uint8Array(srcArrayBuffer, srcOffset, srcLength));
246
gpuBufferForUploading.unmap();
249
const commandEncoder = this.backend.getCommandEncoder();
250
this.backend.endComputePass();
251
commandEncoder.copyBufferToBuffer(gpuBufferForUploading, 0, gpuDataCache.gpuData.buffer, 0, size);
253
LOG_DEBUG('verbose', () => `[WebGPU] GpuDataManager.upload(id=${id})`);
255
this.buffersForUploadingPending.push(gpuBufferForUploading);
258
memcpy(sourceId: GpuDataId, destinationId: GpuDataId): void {
259
// get source gpu buffer
260
const sourceGpuDataCache = this.storageCache.get(sourceId);
261
if (!sourceGpuDataCache) {
262
throw new Error('source gpu data for memcpy does not exist');
264
// get destination gpu buffer
265
const destinationGpuDataCache = this.storageCache.get(destinationId);
266
if (!destinationGpuDataCache) {
267
throw new Error('destination gpu data for memcpy does not exist');
269
if (sourceGpuDataCache.originalSize !== destinationGpuDataCache.originalSize) {
270
throw new Error('inconsistent source and destination gpu data size');
273
const size = calcNormalizedBufferSize(sourceGpuDataCache.originalSize);
276
const commandEncoder = this.backend.getCommandEncoder();
277
this.backend.endComputePass();
278
commandEncoder.copyBufferToBuffer(
279
sourceGpuDataCache.gpuData.buffer,
281
destinationGpuDataCache.gpuData.buffer,
287
registerExternalBuffer(buffer: GPUBuffer, originalSize: number, previousBuffer?: GPUBuffer): number {
288
let id: number | undefined;
289
if (previousBuffer) {
290
id = this.externalBuffers.get(previousBuffer);
291
if (id === undefined) {
292
throw new Error('previous buffer is not registered');
294
if (buffer === previousBuffer) {
298
`[WebGPU] GpuDataManager.registerExternalBuffer(size=${originalSize}) => id=${
300
}, buffer is the same, skip.`,
303
} else if (this.backend.capturedCommandList.has(this.backend.currentSessionId!)) {
304
throw new Error(`Registering a different external buffer under graph capture mode is not supported yet.
305
Please use the previous external buffer!`);
307
this.externalBuffers.delete(previousBuffer);
309
id = createNewGpuDataId();
312
this.storageCache.set(id, { gpuData: { id, type: GpuDataType.default, buffer }, originalSize });
313
this.externalBuffers.set(buffer, id);
316
() => `[WebGPU] GpuDataManager.registerExternalBuffer(size=${originalSize}) => id=${id}, registered.`,
321
unregisterExternalBuffer(buffer: GPUBuffer): void {
322
const id = this.externalBuffers.get(buffer);
323
if (id !== undefined) {
324
this.storageCache.delete(id);
325
this.externalBuffers.delete(buffer);
326
LOG_DEBUG('verbose', () => `[WebGPU] GpuDataManager.unregisterExternalBuffer() => id=${id}`);
330
// eslint-disable-next-line no-bitwise
331
create(size: number, usage = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST): GpuData {
332
const bufferSize = calcBucketBufferSize(size);
335
// Currently, only storage buffers are reused.
336
// eslint-disable-next-line no-bitwise
337
const isStorage = (usage & GPUBufferUsage.STORAGE) === GPUBufferUsage.STORAGE;
338
// eslint-disable-next-line no-bitwise
339
const isUniform = (usage & GPUBufferUsage.UNIFORM) === GPUBufferUsage.UNIFORM;
340
if (isStorage || isUniform) {
341
const freeBuffers = isStorage ? this.freeBuffers : this.freeUniformBuffers;
342
const buffers = freeBuffers.get(bufferSize);
344
// no such bucket/freelist - create gpu buffer
345
gpuBuffer = this.backend.device.createBuffer({ size: bufferSize, usage });
347
if (buffers.length > 0) {
348
// in freelist, use it
349
gpuBuffer = buffers.pop() as GPUBuffer;
351
// bucket empty, create gpu buffer
352
gpuBuffer = this.backend.device.createBuffer({ size: bufferSize, usage });
357
gpuBuffer = this.backend.device.createBuffer({ size: bufferSize, usage });
360
const gpuData = { id: createNewGpuDataId(), type: GpuDataType.default, buffer: gpuBuffer };
361
this.storageCache.set(gpuData.id, { gpuData, originalSize: size });
363
LOG_DEBUG('verbose', () => `[WebGPU] GpuDataManager.create(size=${size}) => id=${gpuData.id}`);
367
get(id: GpuDataId): GpuData | undefined {
368
return this.storageCache.get(id)?.gpuData;
371
release(id: GpuDataId): number {
372
const cachedData = this.storageCache.get(id);
374
throw new Error('releasing data does not exist');
377
LOG_DEBUG('verbose', () => `[WebGPU] GpuDataManager.release(id=${id}), gpuDataId=${cachedData.gpuData.id}`);
379
this.storageCache.delete(id);
380
this.buffersPending.push(cachedData.gpuData.buffer);
381
// cachedData.gpuData.buffer.destroy();
383
return cachedData.originalSize;
386
async download(id: GpuDataId, getTargetBuffer: () => Uint8Array): Promise<void> {
387
const cachedData = this.storageCache.get(id);
389
throw new Error('data does not exist');
391
await downloadGpuData(this.backend, cachedData.gpuData.buffer, cachedData.originalSize, getTargetBuffer);
394
refreshPendingBuffers(): void {
395
for (const buffer of this.buffersForUploadingPending) {
396
// upload buffer is only useful in the session creation time. So we don't need to reuse them in session running.
399
this.buffersForUploadingPending = [];
401
if (this.buffersPending.length === 0) {
405
if (this.backend.sessionStatus === 'default') {
406
for (const buffer of this.buffersPending) {
407
const maxInFreeList = bucketFreelist.get(buffer.size);
409
// eslint-disable-next-line no-bitwise
410
if ((buffer.usage & GPUBufferUsage.STORAGE) === GPUBufferUsage.STORAGE) {
411
// Put the pending buffer to freeBuffers list instead of really destroying it for buffer reusing.
412
const freelist = this.freeBuffers.get(buffer.size) || [];
413
if (maxInFreeList === undefined || freelist.length >= maxInFreeList) {
416
freelist.push(buffer);
418
// eslint-disable-next-line no-bitwise
419
} else if ((buffer.usage & GPUBufferUsage.UNIFORM) === GPUBufferUsage.UNIFORM) {
420
// Put the pending buffer to freeUniformBuffers list instead of really destroying it for buffer reusing.
421
const freelist = this.freeUniformBuffers.get(buffer.size) || [];
422
if (maxInFreeList === undefined || freelist.length >= maxInFreeList) {
425
freelist.push(buffer);
431
this.buffersPending = [];
433
// Don't release intermediate tensors in non-default mode.
434
// TODO: reuse the storage buffers in non-default mode.
435
let capturedBuffers = this.capturedPendingBuffers.get(this.backend.currentSessionId!);
436
if (!capturedBuffers) {
437
capturedBuffers = [];
438
this.capturedPendingBuffers.set(this.backend.currentSessionId!, capturedBuffers);
440
for (const buffer of this.buffersPending) {
441
capturedBuffers.push(buffer);
443
this.buffersPending = [];
448
this.freeBuffers.forEach((buffers) => {
449
buffers.forEach((buffer) => {
453
this.freeUniformBuffers.forEach((buffers) => {
454
buffers.forEach((buffer) => {
459
this.storageCache.forEach((storage) => {
460
storage.gpuData.buffer.destroy();
463
this.capturedPendingBuffers.forEach((buffers) => {
464
buffers.forEach((buffer) => {
468
this.storageCache = new Map();
469
this.freeBuffers = new Map();
470
this.freeUniformBuffers = new Map();
471
this.capturedPendingBuffers = new Map();
474
onReleaseSession(sessionId: number) {
475
// release the captured pending buffers.
476
const pendingBuffers = this.capturedPendingBuffers.get(sessionId);
477
if (pendingBuffers) {
478
pendingBuffers.forEach((buffer) => {
481
this.capturedPendingBuffers.delete(sessionId);
486
export const createGpuDataManager = (...args: ConstructorParameters<typeof GpuDataManagerImpl>): GpuDataManager =>
487
new GpuDataManagerImpl(...args);