3
import * as d3 from "https://cdn.skypack.dev/d3@5";
4
import {axisLeft} from "https://cdn.skypack.dev/d3-axis@1";
5
import {scaleLinear} from "https://cdn.skypack.dev/d3-scale@1";
6
import {zoom, zoomIdentity} from "https://cdn.skypack.dev/d3-zoom@1";
7
import {brushX} from "https://cdn.skypack.dev/d3-brush@1";
9
const schemeTableau10 = [
22
function version_space() {
24
return (addr, increment) => {
25
if (!(addr in version)) {
28
const r = version[addr];
36
function Segment(addr, size, stream, frames, version) {
37
return {addr, size, stream, version, frames};
40
function Block(addr, size, requested_size, frames, free_requested, version) {
41
return {addr, size, requested_size, frames, free_requested, version};
44
function EventSelector(outer, events, stack_info, memory_view) {
45
const events_div = outer
49
'grid-column: 1; grid-row: 1; overflow: auto; font-family: monospace',
52
const events_selection = events_div
57
.text(e => formatEvent(e))
60
let selected_event_idx = null;
64
if (selected_event_idx !== null) {
65
const selected_event = d3.select(
66
events_div.node().children[selected_event_idx],
68
selected_event.attr('style', '');
71
const div = d3.select(events_div.node().children[idx]);
72
div.attr('style', `background-color: ${schemeTableau10[5]}`);
73
const [reserved, allocated] = memory_view.draw(idx);
74
const enter = () => eventStack(div.datum(), allocated, reserved);
75
stack_info.highlight(enter);
76
div.node().scrollIntoViewIfNeeded(false);
80
selected_event_idx = idx;
83
d3.select('body').on('keydown', _e => {
84
const key = d3.event.key;
85
const actions = {ArrowDown: 1, ArrowUp: -1};
86
if (selected_event_idx !== null && key in actions) {
87
const new_idx = selected_event_idx + actions[key];
88
es.select(Math.max(0, Math.min(new_idx, events.length - 1)));
89
d3.event.preventDefault();
95
t => eventStack(t.datum()),
97
d => es.select(d.datum().idx),
103
function formatSize(num) {
106
const units = ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi'];
107
for (const unit of units) {
108
if (Math.abs(num) < 1024.0) {
109
return `${num.toFixed(1)}${unit}B (${orig} bytes)`;
113
return `${num.toFixed(1)}YiB`;
115
function formatAddr(event) {
116
const prefix = event.action.startsWith('segment') ? 's' : 'b';
117
return `${prefix}${event.addr.toString(16)}_${event.version}`;
119
function formatEvent(event) {
121
event.stream === null ? '' : `\n (stream ${event.stream})`;
122
switch (event.action) {
124
return `OOM (requested ${formatSize(event.size)}, CUDA has ${formatSize(
126
)} memory free)${stream}`;
130
return `${event.action.padEnd(14)} ${formatAddr(event).padEnd(
132
)} ${formatSize(event.size)}${stream}`;
136
function eventStack(e, allocated, reserved) {
137
let event = formatEvent(e);
138
if (reserved !== undefined) {
139
event = `(${formatSize(allocated)} allocated / ${formatSize(
141
)} reserved)\n${event}`;
143
return event + '\n' + format_frames(e.frames);
146
function hashCode(num) {
147
const numStr = num.toString();
149
for (let i = 0; i < numStr.length; i++) {
150
const charCode = numStr.charCodeAt(i);
151
hash = (hash << 5) - hash + charCode;
157
function addStroke(d) {
158
d.attr('stroke', 'red')
159
.attr('stroke-width', '2')
160
.attr('vector-effect', 'non-scaling-stroke');
163
function removeStroke(d) {
164
d.attr('stroke', '');
167
function calculate_fragmentation(blocks, sorted_segments) {
168
const sorted_blocks = Object.values(blocks).sort((a, b) => a.addr - b.addr);
171
let sum_squared_free = 0;
172
for (const seg of sorted_segments) {
174
total_size += seg.size;
176
block_i < sorted_blocks.length &&
177
sorted_blocks[block_i].addr < seg.addr + seg.size
179
const block = sorted_blocks[block_i];
180
if (block.addr > addr) {
181
sum_squared_free += (block.addr - addr) ** 2;
183
addr = block.addr + block.size;
186
if (addr < seg.addr + seg.size) {
187
sum_squared_free += (seg.addr + seg.size - addr) ** 2;
190
console.log(sum_squared_free / total_size ** 2);
193
function MemoryView(outer, stack_info, snapshot, device) {
196
.attr('style', 'grid-column: 2; grid-row: 1; width: 100%; height: 100%;')
197
.attr('viewBox', '0 0 200 100')
198
.attr('preserveAspectRatio', 'xMinYMin meet');
199
const g = svg.append('g');
200
const seg_zoom = zoom();
201
seg_zoom.on('zoom', () => {
202
g.attr('transform', d3.event.transform);
206
const sorted_segments = [];
207
const block_map = {};
208
for (const seg of snapshot.segments) {
209
if (seg.device !== device) {
212
sorted_segments.push(
221
for (const b of seg.blocks) {
222
if (b.state !== 'active_pending_free' && b.state !== 'active_allocated') {
225
block_map[b.addr] = Block(
230
b.state === 'active_pending_free',
235
sorted_segments.sort((x, y) => x.addr - y.addr);
237
function simulate_memory(idx) {
239
const l_segments = sorted_segments.map(x => {
242
const l_block_map = {...block_map};
244
function map_segment(merge, seg) {
245
let idx = l_segments.findIndex(e => e.addr > seg.addr);
247
l_segments.splice(idx, 0, seg);
251
idx = l_segments.length;
253
l_segments.splice(idx, 0, seg);
254
if (idx + 1 < l_segments.length) {
255
const next = l_segments[idx + 1];
256
if (seg.addr + seg.size === next.addr && seg.stream === next.stream) {
257
seg.size += next.size;
258
l_segments.splice(idx + 1, 1);
262
const prev = l_segments[idx - 1];
263
if (prev.addr + prev.size === seg.addr && prev.stream === seg.stream) {
264
prev.size += seg.size;
265
l_segments.splice(idx, 1);
269
function unmap_segment(merge, seg) {
272
l_segments.findIndex(x => x.addr === seg.addr),
277
const seg_end = seg.addr + seg.size;
278
const idx = l_segments.findIndex(
279
e => e.addr <= seg.addr && seg_end <= e.addr + e.size,
281
const existing = l_segments[idx];
282
const existing_end = existing.addr + existing.size;
283
if (existing.addr === seg.addr) {
284
existing.addr += seg.size;
285
existing.size -= seg.size;
286
if (existing.size === 0) {
287
l_segments.splice(idx, 1);
289
} else if (existing_end === seg_end) {
290
existing.size -= seg.size;
292
existing.size = seg.addr - existing.addr;
294
seg.size = existing_end - seg_end;
295
l_segments.splice(idx + 1, 0, seg);
298
const events = snapshot.device_traces[device];
299
for (let i = events.length - 1; i > idx; i--) {
300
const event = events[i];
301
switch (event.action) {
303
l_block_map[event.addr] = Block(
312
case 'free_requested':
313
l_block_map[event.addr].free_requested = false;
315
case 'free_completed':
316
l_block_map[event.addr] = Block(
326
delete l_block_map[event.addr];
329
case 'segment_unmap':
331
event.action === 'segment_unmap',
341
case 'segment_alloc':
344
event.action === 'segment_map',
360
const new_blocks = Object.values(l_block_map);
361
return [l_segments, new_blocks];
366
const [segments_unsorted, blocks] = simulate_memory(idx);
367
g.selectAll('g').remove();
369
const segment_d = g.append('g');
370
const block_g = g.append('g');
371
const block_r = g.append('g');
373
segment_d.selectAll('rect').remove();
374
block_g.selectAll('rect').remove();
375
block_r.selectAll('rect').remove();
376
const segments = [...segments_unsorted].sort((x, y) =>
377
x.size === y.size ? x.addr - y.addr : x.size - y.size,
380
const segments_by_addr = [...segments].sort((x, y) => x.addr - y.addr);
382
const max_size = segments.length === 0 ? 0 : segments.at(-1).size;
384
const xScale = scaleLinear().domain([0, max_size]).range([0, 200]);
385
const padding = xScale.invert(1);
388
let cur_row_size = 0;
389
for (const seg of segments) {
391
seg.internal_free = 0;
392
if (cur_row_size + seg.size > max_size) {
396
seg.offset = cur_row_size;
398
cur_row_size += seg.size + padding;
401
const num_rows = cur_row + 1;
403
const yScale = scaleLinear().domain([0, num_rows]).range([0, 100]);
405
const segments_selection = segment_d
410
.attr('x', x => xScale(x.offset))
411
.attr('y', x => yScale(x.row))
412
.attr('width', x => xScale(x.size))
413
.attr('height', yScale(4 / 5))
414
.attr('stroke', 'black')
415
.attr('stroke-width', '1')
416
.attr('vector-effect', 'non-scaling-stroke')
417
.attr('fill', 'white');
424
const free = t.size - t.occupied;
426
if (t.internal_free > 0) {
427
internal = ` (${(t.internal_free / free) * 100}% internal)`;
430
`s${t.addr.toString(16)}_${t.version}: segment ${formatSize(
433
`${formatSize(free)} free${internal} (stream ${
435
})\n${format_frames(t.frames)}`
439
d.attr('stroke', 'black')
440
.attr('stroke-width', '1')
441
.attr('vector-effect', 'non-scaling-stroke');
445
function find_segment(addr) {
447
let right = segments_by_addr.length - 1;
448
while (left <= right) {
449
const mid = Math.floor((left + right) / 2);
450
if (addr < segments_by_addr[mid].addr) {
454
segments_by_addr[mid].addr + segments_by_addr[mid].size
458
return segments_by_addr[mid];
464
for (const b of blocks) {
465
b.segment = find_segment(b.addr);
466
b.segment.occupied += b.requested_size;
467
b.segment.internal_free += b.size - b.requested_size;
470
const block_selection = block_g
475
.attr('x', x => xScale(x.segment.offset + (x.addr - x.segment.addr)))
476
.attr('y', x => yScale(x.segment.row))
477
.attr('width', x => xScale(x.requested_size))
478
.attr('height', yScale(4 / 5))
479
.attr('fill', (x, _i) =>
483
Math.abs(hashCode(x.addr)) % schemeTableau10.length
493
if (t.free_requested) {
494
requested = ' (block freed but waiting due to record_stream)';
497
`b${t.addr.toString(16)}_${t.version} ` +
498
`${formatSize(t.requested_size)} allocation${requested} (stream ${
501
format_frames(t.frames)
507
const free_selection = block_r
514
x.segment.offset + (x.addr - x.segment.addr) + x.requested_size,
517
.attr('y', x => yScale(x.segment.row))
518
.attr('width', x => xScale(x.size - x.requested_size))
519
.attr('height', yScale(4 / 5))
520
.attr('fill', (_x, _i) => 'red');
528
`Free space lost due to rounding ${formatSize(
529
t.size - t.requested_size,
531
` (stream ${t.segment.stream})\n` +
532
format_frames(t.frames)
538
const reserved = segments.reduce((x, y) => x + y.size, 0);
539
const allocated = blocks.reduce((x, y) => x + y.requested_size, 0);
540
return [reserved, allocated];
545
function StackInfo(outer) {
546
const stack_trace = outer
548
.attr('style', 'grid-column: 1 / 3; grid-row: 2; overflow: auto');
551
stack_trace.text('');
556
register(dom, enter, leave = _e => {}, select = _e => {}) {
558
.on('mouseover', _e => {
560
stack_trace.text(enter(d3.select(d3.event.target)));
562
.on('mousedown', _e => {
563
const obj = d3.select(d3.event.target);
565
enter: () => stack_trace.text(enter(obj)),
566
leave: () => leave(obj),
570
.on('mouseleave', _e => {
571
leave(d3.select(d3.event.target));
575
highlight(enter, leave = () => {}) {
576
selected = {enter: () => stack_trace.text(enter()), leave};
582
function create_segment_view(dst, snapshot, device) {
587
'display: grid; grid-template-columns: 1fr 2fr; grid-template-rows: 2fr 1fr; height: 100%; gap: 10px',
590
const events = snapshot.device_traces[device];
591
const stack_info = StackInfo(outer);
592
const memory_view = MemoryView(outer, stack_info, snapshot, device);
593
const event_selector = EventSelector(outer, events, stack_info, memory_view);
595
window.requestAnimationFrame(function () {
596
event_selector.select(events.length > 0 ? events.length - 1 : null);
600
function annotate_snapshot(snapshot) {
601
snapshot.segment_version = version_space();
602
snapshot.block_version = version_space();
603
snapshot.categories = [];
604
const empty_list = [];
606
const stream_names = {0: 0};
607
function stream_name(s) {
608
if (!(s in stream_names)) {
609
stream_names[s] = next_stream++;
611
return stream_names[s];
613
const new_traces = [];
614
for (const device_trace of snapshot.device_traces) {
615
const new_trace = [];
616
new_traces.push(new_trace);
617
for (const t of device_trace) {
618
if (!('frames' in t)) {
619
t.frames = empty_list;
624
t.stream = stream_name(t.stream);
626
case 'free_completed':
627
t.version = snapshot.block_version(t.addr, true);
628
if (new_trace.length > 0) {
630
const prev = new_trace.at(-1);
631
if (prev.action === 'free_requested' && prev.addr === t.addr) {
632
prev.action = 'free';
637
case 'free_requested':
639
t.version = snapshot.block_version(t.addr, false);
642
case 'segment_unmap':
643
t.version = snapshot.segment_version(t.addr, true);
645
case 'segment_alloc':
647
t.version = snapshot.segment_version(t.addr, false);
652
if ('category' in t && !snapshot.categories.includes(t.category)) {
653
snapshot.categories.push(t.category);
655
t.idx = new_trace.length;
659
snapshot.device_traces = new_traces;
661
if (next_stream == 1) {
662
for (const device_trace of snapshot.device_traces) {
663
for (const t of device_trace) {
669
for (const seg of snapshot.segments) {
670
seg.stream = stream_name(seg.stream);
671
seg.version = snapshot.segment_version(seg.address, false);
672
let addr = seg.address;
673
for (const b of seg.blocks) {
675
if (!('frames' in b)) {
678
if ('history' in b) {
679
b.frames = b.history[0].frames || empty_list;
680
b.requested_size = b.requested_size || b.history[0].real_size;
682
b.frames = empty_list;
683
b.requested_size = b.requested_size || b.size;
686
b.version = snapshot.block_version(b.addr, false);
692
snapshot.categories.length > 0 &&
693
!snapshot.categories.includes('unknown')
695
snapshot.categores.push('unknown');
699
function elideRepeats(frames) {
701
const length = frames.length;
702
for (let i = 0; i < length; ) {
705
while (j < length && f === frames[j]) {
716
result.push(f, `<repeats ${j - i - 1} times>`);
723
function frameFilter({name, filename}) {
724
const omitFunctions = [
726
'CapturedTraceback::gather',
735
const omitFilenames = [
742
'Objects/methodobject.c',
745
'cpython/abstract.h',
748
for (const of of omitFunctions) {
749
if (name.includes(of)) {
754
for (const of of omitFilenames) {
755
if (filename.includes(of)) {
763
function format_frames(frames) {
764
if (frames.length === 0) {
766
`This block has no frames. Potential causes:\n` +
767
`1) This block was allocated before _record_memory_history was enabled.\n` +
768
`2) The context or stacks passed to _record_memory_history does not include this block. Consider changing context to 'state', 'alloc', or 'all', or changing stacks to 'all'.\n` +
769
`3) This event occurred during backward, which has no python frames, and memory history did not include C++ frames. Use stacks='all' to record both C++ and python frames.`
772
const frame_strings = frames
774
.map(f => `${f.filename}:${f.line}:${f.name}`);
775
return elideRepeats(frame_strings).join('\n');
778
function process_alloc_data(snapshot, device, plot_segments, max_entries) {
780
const initially_allocated = [];
782
const addr_to_alloc = {};
784
const alloc = plot_segments ? 'segment_alloc' : 'alloc';
785
const [free, free_completed] = plot_segments
786
? ['segment_free', 'segment_free']
787
: ['free', 'free_completed'];
788
for (const e of snapshot.device_traces[device]) {
792
addr_to_alloc[e.addr] = elements.length - 1;
793
actions.push(elements.length - 1);
797
if (e.addr in addr_to_alloc) {
798
actions.push(addr_to_alloc[e.addr]);
799
delete addr_to_alloc[e.addr];
802
initially_allocated.push(elements.length - 1);
803
actions.push(elements.length - 1);
810
for (const seg of snapshot.segments) {
811
if (seg.device !== device) {
815
if (!(seg.address in addr_to_alloc)) {
819
size: seg.total_size,
822
version: seg.version,
824
elements.push(element);
825
initially_allocated.push(elements.length - 1);
828
for (const b of seg.blocks) {
829
if (b.state === 'active_allocated' && !(b.addr in addr_to_alloc)) {
833
size: b.requested_size,
838
elements.push(element);
839
initially_allocated.push(elements.length - 1);
844
initially_allocated.reverse();
848
if (actions.length === 0 && initially_allocated.length > 0) {
849
actions.push(initially_allocated.pop());
853
const current_data = [];
858
let total_summarized_mem = 0;
861
const max_at_time = [];
863
const summarized_mem = {
866
offsets: [total_mem],
870
const summarized_elems = {};
872
function advance(n) {
873
summarized_mem.timesteps.push(timestep);
874
summarized_mem.offsets.push(total_mem);
875
summarized_mem.size.push(total_summarized_mem);
877
for (let i = 0; i < n; i++) {
878
max_at_time.push(total_mem + total_summarized_mem);
882
const sizes = elements
883
.map((x, i) => [x.size, i])
884
.sort(([x, _xi], [y, _yi]) => y - x);
886
const draw_elem = {};
887
for (const [_s, e] of sizes.slice(0, max_entries)) {
891
function add_allocation(elem) {
892
const element_obj = elements[elem];
893
const size = element_obj.size;
896
if (snapshot.categories.length > 0) {
897
color = snapshot.categories.indexOf(element_obj.category || 'unknown');
901
timesteps: [timestep],
902
offsets: [total_mem],
906
current_data.push(e);
909
element_obj.max_allocated_mem = total_mem + total_summarized_mem;
912
for (const elem of initially_allocated) {
913
if (elem in draw_elem) {
914
add_allocation(elem);
916
total_summarized_mem += elements[elem].size;
917
summarized_elems[elem] = true;
921
for (const elem of actions) {
922
const size = elements[elem].size;
923
if (!(elem in draw_elem)) {
924
if (elem in summarized_elems) {
926
total_summarized_mem -= size;
927
summarized_elems[elem] = null;
929
total_summarized_mem += size;
930
summarized_elems[elem] = true;
935
const idx = current.findLastIndex(x => x === elem);
939
add_allocation(elem);
943
const removed = current_data[idx];
944
removed.timesteps.push(timestep);
945
removed.offsets.push(removed.offsets.at(-1));
946
current.splice(idx, 1);
947
current_data.splice(idx, 1);
949
if (idx < current.length) {
950
for (let j = idx; j < current.length; j++) {
951
const e = current_data[j];
952
e.timesteps.push(timestep);
953
e.offsets.push(e.offsets.at(-1));
954
e.timesteps.push(timestep + 3);
955
e.offsets.push(e.offsets.at(-1) - size);
961
max_size = Math.max(total_mem + total_summarized_mem, max_size);
964
for (const elem of current_data) {
965
elem.timesteps.push(timestep);
966
elem.offsets.push(elem.offsets.at(-1));
968
data.push(summarized_mem);
972
allocations_over_time: data,
975
elements_length: elements.length,
976
context_for_id: id => {
977
const elem = elements[id];
978
let text = `Addr: ${formatAddr(elem)}`;
979
text = `${text}, Size: ${formatSize(elem.size)} allocation`;
980
text = `${text}, Total memory used after allocation: ${formatSize(
981
elem.max_allocated_mem,
983
if (elem.stream !== null) {
984
text = `${text}, stream ${elem.stream}`;
986
if (elem.timestamp !== null) {
987
var d = new Date(elem.time_us / 1000);
988
text = `${text}, timestamp ${d}`;
990
if (!elem.action.includes('alloc')) {
991
text = `${text}\nalloc not recorded, stack trace for free:`;
993
text = `${text}\n${format_frames(elem.frames)}`;
1005
colors = schemeTableau10,
1007
function format_points(d) {
1008
const size = d.size;
1009
const xs = d.timesteps.map(t => xscale(t));
1010
const bottom = d.offsets.map(t => yscale(t));
1011
const m = Array.isArray(size)
1012
? (t, i) => yscale(t + size[i])
1013
: t => yscale(t + size);
1014
const top = d.offsets.map(m);
1015
const p0 = xs.map((x, i) => `${x},${bottom[i]}`);
1016
const p1 = xs.map((x, i) => `${x},${top[i]}`).reverse();
1017
return `${p0.join(' ')} ${p1.join(' ')}`;
1020
const max_timestep = data.max_at_time.length;
1021
const max_size = data.max_size;
1023
const plot_width = width - left_pad;
1024
const plot_height = height;
1026
const yscale = scaleLinear().domain([0, max_size]).range([plot_height, 0]);
1027
const yaxis = axisLeft(yscale).tickFormat(d3.format('.3s'));
1028
const xscale = scaleLinear().domain([0, max_timestep]).range([0, plot_width]);
1029
const plot_coordinate_space = svg
1031
.attr('transform', `translate(${left_pad}, ${0})`);
1032
const plot_outer = plot_coordinate_space.append('g');
1034
function view_rect(a) {
1039
.attr('width', plot_width)
1040
.attr('height', plot_height)
1041
.attr('fill', 'white');
1044
view_rect(plot_outer);
1046
const cp = svg.append('clipPath').attr('id', 'clip');
1048
plot_outer.attr('clip-path', 'url(#clip)');
1050
const zoom_group = plot_outer.append('g');
1051
const scrub_group = zoom_group.append('g');
1053
const plot = scrub_group
1054
.selectAll('polygon')
1055
.data(data.allocations_over_time)
1058
.attr('points', format_points)
1059
.attr('fill', d => colors[d.color % colors.length]);
1061
const axis = plot_coordinate_space.append('g').call(yaxis);
1063
function handleZoom() {
1064
const t = d3.event.transform;
1065
zoom_group.attr('transform', t);
1066
axis.call(yaxis.scale(d3.event.transform.rescaleY(yscale)));
1069
const thezoom = zoom().on('zoom', handleZoom);
1070
plot_outer.call(thezoom);
1073
select_window: (stepbegin, stepend, max) => {
1074
const begin = xscale(stepbegin);
1075
const size = xscale(stepend) - xscale(stepbegin);
1076
const scale = plot_width / size;
1077
const translate = -begin;
1078
const yscale = max_size / max;
1081
`scale(${scale / yscale}, 1) translate(${translate}, 0)`,
1087
.translate(0, -(plot_height - plot_height / yscale)),
1090
set_delegate: delegate => {
1092
.on('mouseover', function (_e, _d) {
1093
delegate.set_selected(d3.select(this));
1095
.on('mousedown', function (_e, _d) {
1096
delegate.default_selected = d3.select(this);
1098
.on('mouseleave', function (_e, _d) {
1099
delegate.set_selected(delegate.default_selected);
1105
function ContextViewer(text, data) {
1106
let current_selected = null;
1109
default_selected: null,
1110
set_selected: d => {
1111
if (current_selected !== null) {
1112
current_selected.attr('stroke', null).attr('stroke-width', null);
1117
const dd = d.datum();
1118
if (dd.elem === 'summarized') {
1120
'Small tensors that were not plotted to cutdown on render time.\n' +
1121
'Use detail slider to see smaller allocations.',
1124
text.text(`${dd.elem} ${data.context_for_id(dd.elem)}`);
1126
d.attr('stroke', 'black')
1127
.attr('stroke-width', 1)
1128
.attr('vector-effect', 'non-scaling-stroke');
1130
current_selected = d;
1135
function MiniMap(mini_svg, plot, data, left_pad, width, height = 70) {
1136
const max_at_time = data.max_at_time;
1137
const plot_width = width - left_pad;
1138
const yscale = scaleLinear().domain([0, data.max_size]).range([height, 0]);
1139
const minixscale = scaleLinear()
1140
.domain([0, max_at_time.length])
1141
.range([left_pad, width]);
1143
const mini_points = [
1144
[max_at_time.length, 0],
1148
for (const [i, m] of max_at_time.entries()) {
1149
const [_lastx, lasty] = mini_points[mini_points.length - 1];
1151
mini_points.push([i, lasty]);
1152
mini_points.push([i, m]);
1153
} else if (i === max_at_time.length - 1) {
1154
mini_points.push([i, m]);
1158
let points = mini_points.map(([t, o]) => `${minixscale(t)}, ${yscale(o)}`);
1159
points = points.join(' ');
1162
.attr('points', points)
1163
.attr('fill', schemeTableau10[0]);
1165
const xscale = scaleLinear()
1166
.domain([0, max_at_time.length])
1167
.range([0, plot_width]);
1169
const brush = brushX();
1174
brush.on('brush', function () {
1175
const [begin, end] = d3.event.selection.map(x => x - left_pad);
1177
const stepbegin = Math.floor(xscale.invert(begin));
1178
const stepend = Math.floor(xscale.invert(end));
1180
for (let i = stepbegin; i < stepend; i++) {
1181
max = Math.max(max, max_at_time[i]);
1183
plot.select_window(stepbegin, stepend, max);
1185
mini_svg.call(brush);
1189
function Legend(plot_svg, categories) {
1198
.attr('x', (c, i) => xstart)
1199
.attr('y', (c, i) => ystart + i * 15)
1202
.attr('fill', (c, i) => schemeTableau10[i % schemeTableau10.length]);
1209
.attr('x', (c, i) => xstart + 20)
1210
.attr('y', (c, i) => ystart + i * 15 + 8)
1211
.attr('font-family', 'helvetica')
1212
.attr('font-size', 10)
1217
function create_trace_view(
1221
plot_segments = false,
1222
max_entries = 15000,
1224
const left_pad = 70;
1225
const data = process_alloc_data(snapshot, device, plot_segments, max_entries);
1226
dst.selectAll('svg').remove();
1227
dst.selectAll('div').remove();
1229
const d = dst.append('div');
1231
.attr('type', 'range')
1233
.attr('max', data.elements_length)
1234
.attr('value', max_entries)
1235
.on('change', function () {
1236
create_trace_view(dst, snapshot, device, plot_segments, this.value);
1238
d.append('label').text('Detail');
1240
const grid_container = dst
1244
'display: grid; grid-template-columns: 1fr; grid-template-rows: 10fr 1fr 8fr; height: 100%; gap: 10px',
1247
const plot_svg = grid_container
1249
.attr('display', 'block')
1250
.attr('viewBox', '0 0 1024 576')
1251
.attr('preserveAspectRatio', 'none')
1252
.attr('style', 'grid-column: 1; grid-row: 1; width: 100%; height: 100%;');
1254
const plot = MemoryPlot(plot_svg, data, left_pad, 1024, 576);
1256
if (snapshot.categories.length !== 0) {
1257
Legend(plot_svg.append('g'), snapshot.categories);
1260
const mini_svg = grid_container
1262
.attr('display', 'block')
1263
.attr('viewBox', '0 0 1024 60')
1264
.attr('preserveAspectRatio', 'none')
1265
.attr('style', 'grid-column: 1; grid-row: 2; width: 100%; height: 100%;');
1267
MiniMap(mini_svg, plot, data, left_pad, 1024);
1268
const context_div = grid_container
1272
'grid-column: 1; grid-row: 3; width: 100%; height: 100%; overflow: auto;',
1274
const delegate = ContextViewer(context_div.append('pre').text('none'), data);
1275
plot.set_delegate(delegate);
1278
function create_settings_view(dst, snapshot, device) {
1279
dst.selectAll('svg').remove();
1280
dst.selectAll('div').remove();
1281
const settings_div = dst.append('div');
1282
settings_div.append('p').text('CUDA Caching Allocator Settings:');
1285
if ('allocator_settings' in snapshot) {
1288
.text(JSON.stringify(snapshot.allocator_settings, null, 2));
1290
settings_div.append('p').text('No allocator settings found.');
1294
function unpickle(buffer) {
1295
const bytebuffer = new Uint8Array(buffer);
1296
const decoder = new TextDecoder();
1304
const APPENDS = 'e'.charCodeAt(0);
1305
const BINGET = 'h'.charCodeAt(0);
1306
const BININT = 'J'.charCodeAt(0);
1307
const BININT1 = 'K'.charCodeAt(0);
1308
const BININT2 = 'M'.charCodeAt(0);
1309
const EMPTY_DICT = '}'.charCodeAt(0);
1310
const EMPTY_LIST = ']'.charCodeAt(0);
1313
const LONG_BINGET = 'j'.charCodeAt(0);
1314
const MARK = '('.charCodeAt(0);
1315
const MEMOIZE = 0x94;
1317
const SETITEMS = 'u'.charCodeAt(0);
1318
const SHORT_BINUNICODE = 0x8c;
1319
const STOP = '.'.charCodeAt(0);
1320
const TUPLE2 = 0x86;
1321
const APPEND = 'a'.charCodeAt(0);
1322
const NEWFALSE = 0x89;
1323
const BINPUT = 'q'.charCodeAt(0);
1324
const BINUNICODE = 'X'.charCodeAt(0);
1325
const EMPTY_TUPLE = ')'.charCodeAt(0);
1326
const NEWTRUE = 0x88;
1327
const NONE = 'N'.charCodeAt(0);
1328
const BINFLOAT = 'G'.charCodeAt(0);
1329
const TUPLE = 't'.charCodeAt(0);
1330
const TUPLE1 = 0x85;
1331
const TUPLE3 = 0x87;
1333
const LONG_BINPUT = 'r'.charCodeAt(0);
1334
const LIST = 'l'.charCodeAt(0);
1335
const DICT = 'd'.charCodeAt(0);
1336
const SETITEM = 's'.charCodeAt(0);
1338
const scratch_buffer = new ArrayBuffer(8);
1339
const scratch_bytes = new Uint8Array(scratch_buffer);
1340
const big = new BigInt64Array(scratch_buffer);
1341
const float64 = new Float64Array(scratch_buffer);
1343
function read_uint4() {
1345
bytebuffer[offset] +
1346
bytebuffer[offset + 1] * 256 +
1347
bytebuffer[offset + 2] * 65536 +
1348
bytebuffer[offset + 3] * 16777216;
1352
function setitems(d, mark) {
1353
for (let i = mark; i < stack.length; i += 2) {
1354
d[stack[i]] = stack[i + 1];
1356
stack.splice(mark, Infinity);
1360
const opcode = bytebuffer[offset++];
1364
const version = bytebuffer[offset++];
1365
if (version < 2 || version > 4) {
1366
throw new Error(`Unhandled version ${version}`);
1372
const v = stack.pop();
1373
stack.at(-1).push(v);
1378
const mark = marks.pop();
1379
const arr = stack[mark - 1];
1380
arr.push(...stack.splice(mark, Infinity));
1386
const mark = marks.pop();
1387
stack.push([...stack.splice(mark, Infinity)]);
1400
stack.push(memo[bytebuffer[offset++]]);
1404
let i32 = read_uint4();
1405
if (i32 > 0x7fffffff) {
1412
stack.push(bytebuffer[offset++]);
1416
const v = bytebuffer[offset] + bytebuffer[offset + 1] * 256;
1432
const s = bytebuffer[offset++];
1434
for (let i = 0; i < s; i++) {
1435
scratch_bytes[i] = bytebuffer[offset++];
1437
const fill = scratch_bytes[s - 1] >= 128 ? 0xff : 0x0;
1438
for (let i = s; i < 8; i++) {
1439
scratch_bytes[i] = fill;
1441
stack.push(Number(big[0]));
1443
let scratch_bytes_unbounded = [];
1444
for (let i = 0; i < s; i++) {
1445
scratch_bytes_unbounded.push(bytebuffer[offset++]);
1450
const negative = scratch_bytes_unbounded[s - 1] >= 128;
1455
for (let i = 0; i < s; i++) {
1456
const twos_complement = (0xff ^ scratch_bytes_unbounded[i]) + carry;
1457
carry = twos_complement > 0xff ? 1 : 0;
1458
scratch_bytes_unbounded[i] = 0xff & twos_complement;
1462
const hex_str = Array.from(scratch_bytes_unbounded.reverse(), byte => {
1463
return byte.toString(16).padStart(2, '0');
1466
const big_int = negative ? -BigInt(`0x${hex_str}`) : BigInt(`0x${hex_str}`);
1467
stack.push(big_int);
1473
const idx = read_uint4();
1474
stack.push(memo[idx]);
1478
marks.push(stack.length);
1481
memo[memo_id++] = stack.at(-1);
1484
memo[bytebuffer[offset++]] = stack.at(-1);
1487
memo[read_uint4()] = stack.at(-1);
1491
const mark = marks.pop();
1492
const d = stack[mark - 1];
1497
const v = stack.pop();
1498
const k = stack.pop();
1499
stack.at(-1)[k] = v;
1504
const mark = marks.pop();
1510
case SHORT_BINUNICODE:
1512
const n = bytebuffer[offset++];
1513
stack.push(decoder.decode(new Uint8Array(buffer, offset, n)));
1519
const n = read_uint4();
1520
stack.push(decoder.decode(new Uint8Array(buffer, offset, n)));
1530
stack.push([stack.pop()]);
1533
stack.push(stack.splice(-2, Infinity));
1536
stack.push(stack.splice(-3, Infinity));
1539
for (let i = 7; i >= 0; i--) {
1541
scratch_bytes[i] = bytebuffer[offset++];
1543
stack.push(float64[0]);
1546
throw new Error(`UNKNOWN OPCODE: ${opcode}`);
1551
function decode_base64(input) {
1552
function decode_char(i, shift) {
1553
const nChr = input.charCodeAt(i);
1555
nChr > 64 && nChr < 91
1557
: nChr > 96 && nChr < 123
1559
: nChr > 47 && nChr < 58
1568
const output = new Uint8Array((input.length / 4) * 3);
1569
for (let i = 0, j = 0; i < input.length; i += 4, j += 3) {
1571
decode_char(i, 18) +
1572
decode_char(i + 1, 12) +
1573
decode_char(i + 2, 6) +
1575
output[j] = u24 >> 16;
1576
output[j + 1] = (u24 >> 8) & 0xff;
1577
output[j + 2] = u24 & 0xff;
1579
return output.buffer;
1583
'Active Memory Timeline': create_trace_view,
1584
'Allocator State History': create_segment_view,
1585
'Active Cached Segment Timeline': (dst, snapshot, device) =>
1586
create_trace_view(dst, snapshot, device, true),
1587
'Allocator Settings': create_settings_view,
1590
const snapshot_cache = {};
1591
const snapshot_to_loader = {};
1592
const snapshot_to_url = {};
1593
const selection_to_div = {};
1604
const head = d3.select('head');
1605
head.append('style').text(style);
1606
const body = d3.select('body');
1607
const snapshot_select = body.append('select');
1608
const view = body.append('select');
1609
for (const x in kinds) {
1610
view.append('option').text(x);
1612
const gpu = body.append('select');
1614
function unpickle_and_annotate(data) {
1615
data = unpickle(data);
1617
annotate_snapshot(data);
1621
function snapshot_change(f) {
1622
const view_value = view.node().value;
1623
let device = Number(gpu.node().value);
1624
const snapshot = snapshot_cache[f];
1625
gpu.selectAll('option').remove();
1626
const has_segments = {};
1627
for (const s of snapshot.segments) {
1628
has_segments[s.device] = true;
1630
let device_valid = false;
1631
for (const [i, trace] of snapshot.device_traces.entries()) {
1632
if (trace.length > 0 || i in has_segments) {
1633
gpu.append('option').text(i);
1635
device_valid = true;
1636
gpu.node().selectedIndex = gpu.node().children.length - 1;
1640
if (!device_valid) {
1641
device = Number(gpu.node().value);
1643
const key = [f, view_value, device];
1644
if (!(key in selection_to_div)) {
1645
selection_to_div[key] = d3.select('body').append('div');
1646
kinds[view_value](selection_to_div[key], snapshot, device);
1648
const selected_div = selection_to_div[key];
1650
selected_div.attr('style', 'display: float; height: 100%');
1653
function selected_change() {
1654
for (const d of Object.values(selection_to_div)) {
1655
d.attr('style', 'display: none; height: 100%');
1657
const f = snapshot_select.node().value;
1661
if (!(f in snapshot_cache)) {
1662
snapshot_to_loader[f](f);
1668
snapshot_select.on('change', selected_change);
1669
view.on('change', selected_change);
1670
gpu.on('change', selected_change);
1672
body.on('dragover', e => {
1673
event.preventDefault();
1676
body.on('drop', () => {
1677
console.log(event.dataTransfer.files);
1678
Array.from(event.dataTransfer.files).forEach(file => {
1679
add_snapshot(file.name, unique_name => {
1680
const reader = new FileReader();
1681
reader.onload = e => {
1682
finished_loading(unique_name, e.target.result);
1684
reader.readAsArrayBuffer(file);
1687
event.preventDefault();
1688
snapshot_select.node().selectedIndex =
1689
snapshot_select.node().options.length - 1;
1693
selection_to_div[''] = body
1696
'Drag and drop a file to load a local snapshot. No data from the snapshot is uploaded.',
1699
let next_unique_n = 1;
1700
function add_snapshot(name, loader) {
1701
if (name in snapshot_to_loader) {
1702
name = `${name} (${next_unique_n++})`;
1704
snapshot_select.append('option').text(name);
1705
snapshot_to_loader[name] = loader;
1708
function finished_loading(name, data) {
1709
snapshot_cache[name] = unpickle_and_annotate(data);
1710
snapshot_change(name);
1713
export function add_remote_files(files) {
1715
add_snapshot(f.name, unique_name => {
1716
console.log('fetching', f.url);
1718
.then(x => x.arrayBuffer())
1719
.then(data => finished_loading(unique_name, data));
1722
if (files.length > 0) {
1727
export function add_local_files(files, view_value) {
1728
view.node().value = view_value;
1730
add_snapshot(f.name, unique_name => {
1731
finished_loading(unique_name, decode_base64(f.base64));
1734
if (files.length > 0) {