onnxruntime

Форк
0
/
gpu-data-manager.ts 
487 строк · 15.9 Кб
1
// Copyright (c) Microsoft Corporation. All rights reserved.
2
// Licensed under the MIT License.
3

4
import { WebGpuBackend } from '../backend-webgpu';
5
import { LOG_DEBUG } from '../log';
6

7
import { GpuData, GpuDataId, GpuDataType } from './types';
8

9
/**
10
 * manages GpuDataId -> GpuBuffer
11
 */
12
export interface GpuDataManager {
13
  /**
14
   * copy data from CPU to GPU.
15
   */
16
  upload(id: GpuDataId, data: Uint8Array): void;
17
  /**
18
   * copy data from GPU to GPU.
19
   */
20
  memcpy(sourceId: GpuDataId, destinationId: GpuDataId): void;
21
  /**
22
   * create new data on GPU.
23
   */
24
  create(size: number, usage?: number): GpuData;
25
  /**
26
   * get GPU data by ID.
27
   */
28
  get(id: GpuDataId): GpuData | undefined;
29
  /**
30
   * release the data on GPU by ID.
31
   *
32
   * @return size of the data released
33
   */
34
  release(id: GpuDataId): number;
35
  /**
36
   * copy data from GPU to CPU.
37
   */
38
  download(id: GpuDataId, getTargetBuffer: () => Uint8Array): Promise<void>;
39

40
  /**
41
   * refresh the buffers that marked for release.
42
   *
43
   * when release() is called, the buffer is not released immediately. this is because we need to wait for the commands
44
   * to be submitted to the GPU. this function is called after the commands are submitted so that the buffers can be
45
   * actually released.
46
   */
47
  refreshPendingBuffers(): void;
48

49
  /**
50
   * register an external buffer for IO Binding. If the buffer is already registered, return the existing GPU data ID.
51
   *
52
   * GPU data manager only manages a mapping between the buffer and the GPU data ID. It will not manage the lifecycle of
53
   * the external buffer.
54
   */
55
  registerExternalBuffer(buffer: GPUBuffer, originalSize: number, previousBuffer?: GPUBuffer): number;
56

57
  /**
58
   * unregister an external buffer for IO Binding.
59
   */
60
  unregisterExternalBuffer(buffer: GPUBuffer): void;
61

62
  /**
63
   * destroy all gpu buffers.
64
   */
65
  dispose(): void;
66

67
  /**
68
   * release session related data.
69
   * @param sessionId - specify the session ID.
70
   */
71
  onReleaseSession(sessionId: number): void;
72
}
73

74
interface StorageCacheValue {
75
  gpuData: GpuData;
76
  originalSize: number;
77
}
78

79
const bucketFreelist: Map<number, number> = new Map([
80
  [64, 250],
81
  [128, 200],
82
  [256, 200],
83
  [512, 200],
84
  [2048, 230],
85
  [4096, 200],
86
  [8192, 50],
87
  [16384, 50],
88
  [32768, 50],
89
  [65536, 50],
90
  [131072, 50],
91
  [262144, 50],
92
  [524288, 50],
93
  [1048576, 50],
94
  [2097152, 30],
95
  [4194304, 20],
96
  [8388608, 10],
97
  [12582912, 10],
98
  [16777216, 10],
99
  [26214400, 15],
100
  [33554432, 22],
101
  [44236800, 2],
102
  [58982400, 6],
103
  // we don't want to cache the bucket sizes below but not caching them
104
  // results in some major performance hits for models like sd-turbo.
105
  [67108864, 6],
106
  [134217728, 6],
107
  [167772160, 6],
108
]);
109

110
const bucketArr: number[] = [];
111

112
/**
113
 * normalize the buffer size so that it fits the 128-bits (16 bytes) alignment.
114
 */
115
const calcNormalizedBufferSize = (size: number) => Math.ceil(size / 16) * 16;
116

117
/**
118
 * calculate the buffer size so that it fits into buckets.
119
 */
120
const calcBucketBufferSize = (size: number) => {
121
  for (let idx = 0; idx < bucketArr.length; idx++) {
122
    const sizeForBucket = bucketArr[idx];
123
    if (size <= sizeForBucket) {
124
      return sizeForBucket;
125
    }
126
  }
127
  // not in bucket list -> caller will not cache, round up to 16.
128
  return Math.ceil(size / 16) * 16;
129
};
130

131
let guid = 1;
132
const createNewGpuDataId = () => guid++;
133

134
/**
135
 * exported standard download function. This function is used by the session to download the data from GPU, and also by
136
 * factory to create GPU tensors with the capacity of downloading data from GPU.
137
 *
138
 * @param backend - the WebGPU backend
139
 * @param gpuBuffer - the GPU buffer to download
140
 * @param originalSize - the original size of the data
141
 * @param getTargetBuffer - optional. If provided, the data will be copied to the target buffer. Otherwise, a new buffer
142
 * will be created and returned.
143
 */
144
export const downloadGpuData = async (
145
  backend: WebGpuBackend,
146
  gpuBuffer: GPUBuffer,
147
  originalSize: number,
148
  getTargetBuffer?: () => Uint8Array,
149
): Promise<Uint8Array> => {
150
  const bufferSize = calcNormalizedBufferSize(originalSize);
151
  const gpuReadBuffer = backend.device.createBuffer(
152
    // eslint-disable-next-line no-bitwise
153
    { size: bufferSize, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ },
154
  );
155
  try {
156
    const commandEncoder = backend.getCommandEncoder();
157
    backend.endComputePass();
158
    commandEncoder.copyBufferToBuffer(
159
      gpuBuffer /* source buffer */,
160
      0 /* source offset */,
161
      gpuReadBuffer /* destination buffer */,
162
      0 /* destination offset */,
163
      bufferSize /* size */,
164
    );
165
    backend.flush();
166

167
    await gpuReadBuffer.mapAsync(GPUMapMode.READ);
168

169
    const arrayBuffer = gpuReadBuffer.getMappedRange();
170
    if (getTargetBuffer) {
171
      // if we already have a CPU buffer to accept the data, no need to clone the ArrayBuffer.
172
      const targetBuffer = getTargetBuffer();
173
      targetBuffer.set(new Uint8Array(arrayBuffer, 0, originalSize));
174
      return targetBuffer;
175
    } else {
176
      // the mapped ArrayBuffer will be released when the GPU buffer is destroyed. Need to clone the
177
      // ArrayBuffer.
178
      return new Uint8Array(arrayBuffer.slice(0, originalSize));
179
    }
180
  } finally {
181
    gpuReadBuffer.destroy();
182
  }
183
};
184

185
class GpuDataManagerImpl implements GpuDataManager {
186
  // GPU Data ID => GPU Data ( storage buffer )
187
  private storageCache: Map<GpuDataId, StorageCacheValue>;
188

189
  // pending buffers for uploading ( data is unmapped )
190
  private buffersForUploadingPending: GPUBuffer[];
191
  // pending buffers for computing
192
  private buffersPending: GPUBuffer[];
193

194
  // The reusable storage buffers for computing.
195
  private freeBuffers: Map<number, GPUBuffer[]>;
196
  // The reusable uniform buffers
197
  private freeUniformBuffers: Map<number, GPUBuffer[]>;
198

199
  // The external buffers registered users for IO Binding.
200
  private externalBuffers: Map<GPUBuffer, GpuDataId>;
201

202
  // The pendingBuffers for capture graph.
203
  // a SessionID -> GPUBuffer[] mapping.
204
  private capturedPendingBuffers: Map<number, GPUBuffer[]>;
205

206
  constructor(private backend: WebGpuBackend) {
207
    this.storageCache = new Map();
208
    this.freeBuffers = new Map();
209
    this.freeUniformBuffers = new Map();
210
    this.buffersForUploadingPending = [];
211
    this.buffersPending = [];
212
    this.externalBuffers = new Map();
213
    this.capturedPendingBuffers = new Map();
214

215
    for (const [key] of bucketFreelist) {
216
      bucketArr.push(key);
217
      this.freeBuffers.set(key, []);
218
      this.freeUniformBuffers.set(key, []);
219
    }
220
  }
221

222
  upload(id: GpuDataId, data: Uint8Array): void {
223
    const srcArrayBuffer = data.buffer;
224
    const srcOffset = data.byteOffset;
225
    const srcLength = data.byteLength;
226
    const size = calcNormalizedBufferSize(srcLength);
227

228
    // get destination gpu buffer
229
    const gpuDataCache = this.storageCache.get(id);
230
    if (!gpuDataCache) {
231
      throw new Error('gpu data for uploading does not exist');
232
    }
233
    if (gpuDataCache.originalSize !== srcLength) {
234
      throw new Error(`inconsistent data size. gpu data size=${gpuDataCache.originalSize}, data size=${srcLength}`);
235
    }
236

237
    // create gpu buffer
238
    const gpuBufferForUploading = this.backend.device.createBuffer(
239
      // eslint-disable-next-line no-bitwise
240
      { mappedAtCreation: true, size, usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC },
241
    );
242

243
    // copy (upload) data
244
    const arrayBuffer = gpuBufferForUploading.getMappedRange();
245
    new Uint8Array(arrayBuffer).set(new Uint8Array(srcArrayBuffer, srcOffset, srcLength));
246
    gpuBufferForUploading.unmap();
247

248
    // GPU copy
249
    const commandEncoder = this.backend.getCommandEncoder();
250
    this.backend.endComputePass();
251
    commandEncoder.copyBufferToBuffer(gpuBufferForUploading, 0, gpuDataCache.gpuData.buffer, 0, size);
252

253
    LOG_DEBUG('verbose', () => `[WebGPU] GpuDataManager.upload(id=${id})`);
254

255
    this.buffersForUploadingPending.push(gpuBufferForUploading);
256
  }
257

258
  memcpy(sourceId: GpuDataId, destinationId: GpuDataId): void {
259
    // get source gpu buffer
260
    const sourceGpuDataCache = this.storageCache.get(sourceId);
261
    if (!sourceGpuDataCache) {
262
      throw new Error('source gpu data for memcpy does not exist');
263
    }
264
    // get destination gpu buffer
265
    const destinationGpuDataCache = this.storageCache.get(destinationId);
266
    if (!destinationGpuDataCache) {
267
      throw new Error('destination gpu data for memcpy does not exist');
268
    }
269
    if (sourceGpuDataCache.originalSize !== destinationGpuDataCache.originalSize) {
270
      throw new Error('inconsistent source and destination gpu data size');
271
    }
272

273
    const size = calcNormalizedBufferSize(sourceGpuDataCache.originalSize);
274

275
    // GPU copy
276
    const commandEncoder = this.backend.getCommandEncoder();
277
    this.backend.endComputePass();
278
    commandEncoder.copyBufferToBuffer(
279
      sourceGpuDataCache.gpuData.buffer,
280
      0,
281
      destinationGpuDataCache.gpuData.buffer,
282
      0,
283
      size,
284
    );
285
  }
286

287
  registerExternalBuffer(buffer: GPUBuffer, originalSize: number, previousBuffer?: GPUBuffer): number {
288
    let id: number | undefined;
289
    if (previousBuffer) {
290
      id = this.externalBuffers.get(previousBuffer);
291
      if (id === undefined) {
292
        throw new Error('previous buffer is not registered');
293
      }
294
      if (buffer === previousBuffer) {
295
        LOG_DEBUG(
296
          'verbose',
297
          () =>
298
            `[WebGPU] GpuDataManager.registerExternalBuffer(size=${originalSize}) => id=${
299
              id
300
            }, buffer is the same, skip.`,
301
        );
302
        return id;
303
      } else if (this.backend.capturedCommandList.has(this.backend.currentSessionId!)) {
304
        throw new Error(`Registering a different external buffer under graph capture mode is not supported yet.
305
             Please use the previous external buffer!`);
306
      }
307
      this.externalBuffers.delete(previousBuffer);
308
    } else {
309
      id = createNewGpuDataId();
310
    }
311

312
    this.storageCache.set(id, { gpuData: { id, type: GpuDataType.default, buffer }, originalSize });
313
    this.externalBuffers.set(buffer, id);
314
    LOG_DEBUG(
315
      'verbose',
316
      () => `[WebGPU] GpuDataManager.registerExternalBuffer(size=${originalSize}) => id=${id}, registered.`,
317
    );
318
    return id;
319
  }
320

321
  unregisterExternalBuffer(buffer: GPUBuffer): void {
322
    const id = this.externalBuffers.get(buffer);
323
    if (id !== undefined) {
324
      this.storageCache.delete(id);
325
      this.externalBuffers.delete(buffer);
326
      LOG_DEBUG('verbose', () => `[WebGPU] GpuDataManager.unregisterExternalBuffer() => id=${id}`);
327
    }
328
  }
329

330
  // eslint-disable-next-line no-bitwise
331
  create(size: number, usage = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST): GpuData {
332
    const bufferSize = calcBucketBufferSize(size);
333

334
    let gpuBuffer;
335
    // Currently, only storage buffers are reused.
336
    // eslint-disable-next-line no-bitwise
337
    const isStorage = (usage & GPUBufferUsage.STORAGE) === GPUBufferUsage.STORAGE;
338
    // eslint-disable-next-line no-bitwise
339
    const isUniform = (usage & GPUBufferUsage.UNIFORM) === GPUBufferUsage.UNIFORM;
340
    if (isStorage || isUniform) {
341
      const freeBuffers = isStorage ? this.freeBuffers : this.freeUniformBuffers;
342
      const buffers = freeBuffers.get(bufferSize);
343
      if (!buffers) {
344
        // no such bucket/freelist - create gpu buffer
345
        gpuBuffer = this.backend.device.createBuffer({ size: bufferSize, usage });
346
      } else {
347
        if (buffers.length > 0) {
348
          // in freelist, use it
349
          gpuBuffer = buffers.pop() as GPUBuffer;
350
        } else {
351
          // bucket empty, create gpu buffer
352
          gpuBuffer = this.backend.device.createBuffer({ size: bufferSize, usage });
353
        }
354
      }
355
    } else {
356
      // create gpu buffer
357
      gpuBuffer = this.backend.device.createBuffer({ size: bufferSize, usage });
358
    }
359

360
    const gpuData = { id: createNewGpuDataId(), type: GpuDataType.default, buffer: gpuBuffer };
361
    this.storageCache.set(gpuData.id, { gpuData, originalSize: size });
362

363
    LOG_DEBUG('verbose', () => `[WebGPU] GpuDataManager.create(size=${size}) => id=${gpuData.id}`);
364
    return gpuData;
365
  }
366

367
  get(id: GpuDataId): GpuData | undefined {
368
    return this.storageCache.get(id)?.gpuData;
369
  }
370

371
  release(id: GpuDataId): number {
372
    const cachedData = this.storageCache.get(id);
373
    if (!cachedData) {
374
      throw new Error('releasing data does not exist');
375
    }
376

377
    LOG_DEBUG('verbose', () => `[WebGPU] GpuDataManager.release(id=${id}), gpuDataId=${cachedData.gpuData.id}`);
378

379
    this.storageCache.delete(id);
380
    this.buffersPending.push(cachedData.gpuData.buffer);
381
    // cachedData.gpuData.buffer.destroy();
382

383
    return cachedData.originalSize;
384
  }
385

386
  async download(id: GpuDataId, getTargetBuffer: () => Uint8Array): Promise<void> {
387
    const cachedData = this.storageCache.get(id);
388
    if (!cachedData) {
389
      throw new Error('data does not exist');
390
    }
391
    await downloadGpuData(this.backend, cachedData.gpuData.buffer, cachedData.originalSize, getTargetBuffer);
392
  }
393

394
  refreshPendingBuffers(): void {
395
    for (const buffer of this.buffersForUploadingPending) {
396
      // upload buffer is only useful in the session creation time. So we don't need to reuse them in session running.
397
      buffer.destroy();
398
    }
399
    this.buffersForUploadingPending = [];
400

401
    if (this.buffersPending.length === 0) {
402
      return;
403
    }
404

405
    if (this.backend.sessionStatus === 'default') {
406
      for (const buffer of this.buffersPending) {
407
        const maxInFreeList = bucketFreelist.get(buffer.size);
408

409
        // eslint-disable-next-line no-bitwise
410
        if ((buffer.usage & GPUBufferUsage.STORAGE) === GPUBufferUsage.STORAGE) {
411
          // Put the pending buffer to freeBuffers list instead of really destroying it for buffer reusing.
412
          const freelist = this.freeBuffers.get(buffer.size) || [];
413
          if (maxInFreeList === undefined || freelist.length >= maxInFreeList) {
414
            buffer.destroy();
415
          } else {
416
            freelist.push(buffer);
417
          }
418
          // eslint-disable-next-line no-bitwise
419
        } else if ((buffer.usage & GPUBufferUsage.UNIFORM) === GPUBufferUsage.UNIFORM) {
420
          // Put the pending buffer to freeUniformBuffers list instead of really destroying it for buffer reusing.
421
          const freelist = this.freeUniformBuffers.get(buffer.size) || [];
422
          if (maxInFreeList === undefined || freelist.length >= maxInFreeList) {
423
            buffer.destroy();
424
          } else {
425
            freelist.push(buffer);
426
          }
427
        } else {
428
          buffer.destroy();
429
        }
430
      }
431
      this.buffersPending = [];
432
    } else {
433
      // Don't release intermediate tensors in non-default mode.
434
      // TODO: reuse the storage buffers in non-default mode.
435
      let capturedBuffers = this.capturedPendingBuffers.get(this.backend.currentSessionId!);
436
      if (!capturedBuffers) {
437
        capturedBuffers = [];
438
        this.capturedPendingBuffers.set(this.backend.currentSessionId!, capturedBuffers);
439
      }
440
      for (const buffer of this.buffersPending) {
441
        capturedBuffers.push(buffer);
442
      }
443
      this.buffersPending = [];
444
    }
445
  }
446

447
  dispose() {
448
    this.freeBuffers.forEach((buffers) => {
449
      buffers.forEach((buffer) => {
450
        buffer.destroy();
451
      });
452
    });
453
    this.freeUniformBuffers.forEach((buffers) => {
454
      buffers.forEach((buffer) => {
455
        buffer.destroy();
456
      });
457
    });
458

459
    this.storageCache.forEach((storage) => {
460
      storage.gpuData.buffer.destroy();
461
    });
462

463
    this.capturedPendingBuffers.forEach((buffers) => {
464
      buffers.forEach((buffer) => {
465
        buffer.destroy();
466
      });
467
    });
468
    this.storageCache = new Map();
469
    this.freeBuffers = new Map();
470
    this.freeUniformBuffers = new Map();
471
    this.capturedPendingBuffers = new Map();
472
  }
473

474
  onReleaseSession(sessionId: number) {
475
    // release the captured pending buffers.
476
    const pendingBuffers = this.capturedPendingBuffers.get(sessionId);
477
    if (pendingBuffers) {
478
      pendingBuffers.forEach((buffer) => {
479
        buffer.destroy();
480
      });
481
      this.capturedPendingBuffers.delete(sessionId);
482
    }
483
  }
484
}
485

486
export const createGpuDataManager = (...args: ConstructorParameters<typeof GpuDataManagerImpl>): GpuDataManager =>
487
  new GpuDataManagerImpl(...args);
488

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.