llm-course

Форк
0
/
Fine_tune_Llama_2_in_Google_Colab.ipynb 
3349 строк · 117.8 Кб
1
{
2
  "nbformat": 4,
3
  "nbformat_minor": 0,
4
  "metadata": {
5
    "colab": {
6
      "provenance": [],
7
      "gpuType": "T4",
8
      "authorship_tag": "ABX9TyN5b5wF0ITT+T1IRzUm6Zjj",
9
      "include_colab_link": true
10
    },
11
    "kernelspec": {
12
      "name": "python3",
13
      "display_name": "Python 3"
14
    },
15
    "language_info": {
16
      "name": "python"
17
    },
18
    "accelerator": "GPU",
19
    "widgets": {
20
      "application/vnd.jupyter.widget-state+json": {
21
        "52c4bf7418f74bc79a8c12fe35901974": {
22
          "model_module": "@jupyter-widgets/controls",
23
          "model_name": "HBoxModel",
24
          "model_module_version": "1.5.0",
25
          "state": {
26
            "_dom_classes": [],
27
            "_model_module": "@jupyter-widgets/controls",
28
            "_model_module_version": "1.5.0",
29
            "_model_name": "HBoxModel",
30
            "_view_count": null,
31
            "_view_module": "@jupyter-widgets/controls",
32
            "_view_module_version": "1.5.0",
33
            "_view_name": "HBoxView",
34
            "box_style": "",
35
            "children": [
36
              "IPY_MODEL_c5e609d111b34d408a53a4cd71bb43d5",
37
              "IPY_MODEL_0e0a20b5ed7a44e9834022e7eba2194d",
38
              "IPY_MODEL_b5627331e78e4eb28765ed20f32cf403"
39
            ],
40
            "layout": "IPY_MODEL_8084d4cb267f4a52b3d80ec34d291190"
41
          }
42
        },
43
        "c5e609d111b34d408a53a4cd71bb43d5": {
44
          "model_module": "@jupyter-widgets/controls",
45
          "model_name": "HTMLModel",
46
          "model_module_version": "1.5.0",
47
          "state": {
48
            "_dom_classes": [],
49
            "_model_module": "@jupyter-widgets/controls",
50
            "_model_module_version": "1.5.0",
51
            "_model_name": "HTMLModel",
52
            "_view_count": null,
53
            "_view_module": "@jupyter-widgets/controls",
54
            "_view_module_version": "1.5.0",
55
            "_view_name": "HTMLView",
56
            "description": "",
57
            "description_tooltip": null,
58
            "layout": "IPY_MODEL_a8dcdf1f7ab64242acb057e8b54ebf79",
59
            "placeholder": "​",
60
            "style": "IPY_MODEL_1ca492fddbaa4ea7a3226649154e01fd",
61
            "value": "Loading checkpoint shards: 100%"
62
          }
63
        },
64
        "0e0a20b5ed7a44e9834022e7eba2194d": {
65
          "model_module": "@jupyter-widgets/controls",
66
          "model_name": "FloatProgressModel",
67
          "model_module_version": "1.5.0",
68
          "state": {
69
            "_dom_classes": [],
70
            "_model_module": "@jupyter-widgets/controls",
71
            "_model_module_version": "1.5.0",
72
            "_model_name": "FloatProgressModel",
73
            "_view_count": null,
74
            "_view_module": "@jupyter-widgets/controls",
75
            "_view_module_version": "1.5.0",
76
            "_view_name": "ProgressView",
77
            "bar_style": "success",
78
            "description": "",
79
            "description_tooltip": null,
80
            "layout": "IPY_MODEL_a8eda8bfe08e4152a80c63830138c96d",
81
            "max": 2,
82
            "min": 0,
83
            "orientation": "horizontal",
84
            "style": "IPY_MODEL_1f258eacd6d0472385d41523b65dea8b",
85
            "value": 2
86
          }
87
        },
88
        "b5627331e78e4eb28765ed20f32cf403": {
89
          "model_module": "@jupyter-widgets/controls",
90
          "model_name": "HTMLModel",
91
          "model_module_version": "1.5.0",
92
          "state": {
93
            "_dom_classes": [],
94
            "_model_module": "@jupyter-widgets/controls",
95
            "_model_module_version": "1.5.0",
96
            "_model_name": "HTMLModel",
97
            "_view_count": null,
98
            "_view_module": "@jupyter-widgets/controls",
99
            "_view_module_version": "1.5.0",
100
            "_view_name": "HTMLView",
101
            "description": "",
102
            "description_tooltip": null,
103
            "layout": "IPY_MODEL_228b1bcf604f454f8060a250b58008a1",
104
            "placeholder": "​",
105
            "style": "IPY_MODEL_90b281e9c5ed4e77ab93e5879d0b15a3",
106
            "value": " 2/2 [01:13<00:00, 33.04s/it]"
107
          }
108
        },
109
        "8084d4cb267f4a52b3d80ec34d291190": {
110
          "model_module": "@jupyter-widgets/base",
111
          "model_name": "LayoutModel",
112
          "model_module_version": "1.2.0",
113
          "state": {
114
            "_model_module": "@jupyter-widgets/base",
115
            "_model_module_version": "1.2.0",
116
            "_model_name": "LayoutModel",
117
            "_view_count": null,
118
            "_view_module": "@jupyter-widgets/base",
119
            "_view_module_version": "1.2.0",
120
            "_view_name": "LayoutView",
121
            "align_content": null,
122
            "align_items": null,
123
            "align_self": null,
124
            "border": null,
125
            "bottom": null,
126
            "display": null,
127
            "flex": null,
128
            "flex_flow": null,
129
            "grid_area": null,
130
            "grid_auto_columns": null,
131
            "grid_auto_flow": null,
132
            "grid_auto_rows": null,
133
            "grid_column": null,
134
            "grid_gap": null,
135
            "grid_row": null,
136
            "grid_template_areas": null,
137
            "grid_template_columns": null,
138
            "grid_template_rows": null,
139
            "height": null,
140
            "justify_content": null,
141
            "justify_items": null,
142
            "left": null,
143
            "margin": null,
144
            "max_height": null,
145
            "max_width": null,
146
            "min_height": null,
147
            "min_width": null,
148
            "object_fit": null,
149
            "object_position": null,
150
            "order": null,
151
            "overflow": null,
152
            "overflow_x": null,
153
            "overflow_y": null,
154
            "padding": null,
155
            "right": null,
156
            "top": null,
157
            "visibility": null,
158
            "width": null
159
          }
160
        },
161
        "a8dcdf1f7ab64242acb057e8b54ebf79": {
162
          "model_module": "@jupyter-widgets/base",
163
          "model_name": "LayoutModel",
164
          "model_module_version": "1.2.0",
165
          "state": {
166
            "_model_module": "@jupyter-widgets/base",
167
            "_model_module_version": "1.2.0",
168
            "_model_name": "LayoutModel",
169
            "_view_count": null,
170
            "_view_module": "@jupyter-widgets/base",
171
            "_view_module_version": "1.2.0",
172
            "_view_name": "LayoutView",
173
            "align_content": null,
174
            "align_items": null,
175
            "align_self": null,
176
            "border": null,
177
            "bottom": null,
178
            "display": null,
179
            "flex": null,
180
            "flex_flow": null,
181
            "grid_area": null,
182
            "grid_auto_columns": null,
183
            "grid_auto_flow": null,
184
            "grid_auto_rows": null,
185
            "grid_column": null,
186
            "grid_gap": null,
187
            "grid_row": null,
188
            "grid_template_areas": null,
189
            "grid_template_columns": null,
190
            "grid_template_rows": null,
191
            "height": null,
192
            "justify_content": null,
193
            "justify_items": null,
194
            "left": null,
195
            "margin": null,
196
            "max_height": null,
197
            "max_width": null,
198
            "min_height": null,
199
            "min_width": null,
200
            "object_fit": null,
201
            "object_position": null,
202
            "order": null,
203
            "overflow": null,
204
            "overflow_x": null,
205
            "overflow_y": null,
206
            "padding": null,
207
            "right": null,
208
            "top": null,
209
            "visibility": null,
210
            "width": null
211
          }
212
        },
213
        "1ca492fddbaa4ea7a3226649154e01fd": {
214
          "model_module": "@jupyter-widgets/controls",
215
          "model_name": "DescriptionStyleModel",
216
          "model_module_version": "1.5.0",
217
          "state": {
218
            "_model_module": "@jupyter-widgets/controls",
219
            "_model_module_version": "1.5.0",
220
            "_model_name": "DescriptionStyleModel",
221
            "_view_count": null,
222
            "_view_module": "@jupyter-widgets/base",
223
            "_view_module_version": "1.2.0",
224
            "_view_name": "StyleView",
225
            "description_width": ""
226
          }
227
        },
228
        "a8eda8bfe08e4152a80c63830138c96d": {
229
          "model_module": "@jupyter-widgets/base",
230
          "model_name": "LayoutModel",
231
          "model_module_version": "1.2.0",
232
          "state": {
233
            "_model_module": "@jupyter-widgets/base",
234
            "_model_module_version": "1.2.0",
235
            "_model_name": "LayoutModel",
236
            "_view_count": null,
237
            "_view_module": "@jupyter-widgets/base",
238
            "_view_module_version": "1.2.0",
239
            "_view_name": "LayoutView",
240
            "align_content": null,
241
            "align_items": null,
242
            "align_self": null,
243
            "border": null,
244
            "bottom": null,
245
            "display": null,
246
            "flex": null,
247
            "flex_flow": null,
248
            "grid_area": null,
249
            "grid_auto_columns": null,
250
            "grid_auto_flow": null,
251
            "grid_auto_rows": null,
252
            "grid_column": null,
253
            "grid_gap": null,
254
            "grid_row": null,
255
            "grid_template_areas": null,
256
            "grid_template_columns": null,
257
            "grid_template_rows": null,
258
            "height": null,
259
            "justify_content": null,
260
            "justify_items": null,
261
            "left": null,
262
            "margin": null,
263
            "max_height": null,
264
            "max_width": null,
265
            "min_height": null,
266
            "min_width": null,
267
            "object_fit": null,
268
            "object_position": null,
269
            "order": null,
270
            "overflow": null,
271
            "overflow_x": null,
272
            "overflow_y": null,
273
            "padding": null,
274
            "right": null,
275
            "top": null,
276
            "visibility": null,
277
            "width": null
278
          }
279
        },
280
        "1f258eacd6d0472385d41523b65dea8b": {
281
          "model_module": "@jupyter-widgets/controls",
282
          "model_name": "ProgressStyleModel",
283
          "model_module_version": "1.5.0",
284
          "state": {
285
            "_model_module": "@jupyter-widgets/controls",
286
            "_model_module_version": "1.5.0",
287
            "_model_name": "ProgressStyleModel",
288
            "_view_count": null,
289
            "_view_module": "@jupyter-widgets/base",
290
            "_view_module_version": "1.2.0",
291
            "_view_name": "StyleView",
292
            "bar_color": null,
293
            "description_width": ""
294
          }
295
        },
296
        "228b1bcf604f454f8060a250b58008a1": {
297
          "model_module": "@jupyter-widgets/base",
298
          "model_name": "LayoutModel",
299
          "model_module_version": "1.2.0",
300
          "state": {
301
            "_model_module": "@jupyter-widgets/base",
302
            "_model_module_version": "1.2.0",
303
            "_model_name": "LayoutModel",
304
            "_view_count": null,
305
            "_view_module": "@jupyter-widgets/base",
306
            "_view_module_version": "1.2.0",
307
            "_view_name": "LayoutView",
308
            "align_content": null,
309
            "align_items": null,
310
            "align_self": null,
311
            "border": null,
312
            "bottom": null,
313
            "display": null,
314
            "flex": null,
315
            "flex_flow": null,
316
            "grid_area": null,
317
            "grid_auto_columns": null,
318
            "grid_auto_flow": null,
319
            "grid_auto_rows": null,
320
            "grid_column": null,
321
            "grid_gap": null,
322
            "grid_row": null,
323
            "grid_template_areas": null,
324
            "grid_template_columns": null,
325
            "grid_template_rows": null,
326
            "height": null,
327
            "justify_content": null,
328
            "justify_items": null,
329
            "left": null,
330
            "margin": null,
331
            "max_height": null,
332
            "max_width": null,
333
            "min_height": null,
334
            "min_width": null,
335
            "object_fit": null,
336
            "object_position": null,
337
            "order": null,
338
            "overflow": null,
339
            "overflow_x": null,
340
            "overflow_y": null,
341
            "padding": null,
342
            "right": null,
343
            "top": null,
344
            "visibility": null,
345
            "width": null
346
          }
347
        },
348
        "90b281e9c5ed4e77ab93e5879d0b15a3": {
349
          "model_module": "@jupyter-widgets/controls",
350
          "model_name": "DescriptionStyleModel",
351
          "model_module_version": "1.5.0",
352
          "state": {
353
            "_model_module": "@jupyter-widgets/controls",
354
            "_model_module_version": "1.5.0",
355
            "_model_name": "DescriptionStyleModel",
356
            "_view_count": null,
357
            "_view_module": "@jupyter-widgets/base",
358
            "_view_module_version": "1.2.0",
359
            "_view_name": "StyleView",
360
            "description_width": ""
361
          }
362
        },
363
        "051d193cd87f47c1971fb87544e1e615": {
364
          "model_module": "@jupyter-widgets/controls",
365
          "model_name": "HBoxModel",
366
          "model_module_version": "1.5.0",
367
          "state": {
368
            "_dom_classes": [],
369
            "_model_module": "@jupyter-widgets/controls",
370
            "_model_module_version": "1.5.0",
371
            "_model_name": "HBoxModel",
372
            "_view_count": null,
373
            "_view_module": "@jupyter-widgets/controls",
374
            "_view_module_version": "1.5.0",
375
            "_view_name": "HBoxView",
376
            "box_style": "",
377
            "children": [
378
              "IPY_MODEL_9d7247c119e642c5894f15ca6974ef3e",
379
              "IPY_MODEL_a79c22bb34ec4f698a00752b47a6f631",
380
              "IPY_MODEL_d95f3a3f26c6470d984542cdfd68bec1"
381
            ],
382
            "layout": "IPY_MODEL_343e11c62a59448eb43bbc0c31bf5f11"
383
          }
384
        },
385
        "9d7247c119e642c5894f15ca6974ef3e": {
386
          "model_module": "@jupyter-widgets/controls",
387
          "model_name": "HTMLModel",
388
          "model_module_version": "1.5.0",
389
          "state": {
390
            "_dom_classes": [],
391
            "_model_module": "@jupyter-widgets/controls",
392
            "_model_module_version": "1.5.0",
393
            "_model_name": "HTMLModel",
394
            "_view_count": null,
395
            "_view_module": "@jupyter-widgets/controls",
396
            "_view_module_version": "1.5.0",
397
            "_view_name": "HTMLView",
398
            "description": "",
399
            "description_tooltip": null,
400
            "layout": "IPY_MODEL_a153c96bd1fe4c48a41e9b9c7c00dd6e",
401
            "placeholder": "​",
402
            "style": "IPY_MODEL_84da055d24694320843e13ad37438792",
403
            "value": "Loading checkpoint shards: 100%"
404
          }
405
        },
406
        "a79c22bb34ec4f698a00752b47a6f631": {
407
          "model_module": "@jupyter-widgets/controls",
408
          "model_name": "FloatProgressModel",
409
          "model_module_version": "1.5.0",
410
          "state": {
411
            "_dom_classes": [],
412
            "_model_module": "@jupyter-widgets/controls",
413
            "_model_module_version": "1.5.0",
414
            "_model_name": "FloatProgressModel",
415
            "_view_count": null,
416
            "_view_module": "@jupyter-widgets/controls",
417
            "_view_module_version": "1.5.0",
418
            "_view_name": "ProgressView",
419
            "bar_style": "success",
420
            "description": "",
421
            "description_tooltip": null,
422
            "layout": "IPY_MODEL_e375632975904402baea46163e2eeca1",
423
            "max": 2,
424
            "min": 0,
425
            "orientation": "horizontal",
426
            "style": "IPY_MODEL_95501d0b5a22407288f008bf8cc69726",
427
            "value": 2
428
          }
429
        },
430
        "d95f3a3f26c6470d984542cdfd68bec1": {
431
          "model_module": "@jupyter-widgets/controls",
432
          "model_name": "HTMLModel",
433
          "model_module_version": "1.5.0",
434
          "state": {
435
            "_dom_classes": [],
436
            "_model_module": "@jupyter-widgets/controls",
437
            "_model_module_version": "1.5.0",
438
            "_model_name": "HTMLModel",
439
            "_view_count": null,
440
            "_view_module": "@jupyter-widgets/controls",
441
            "_view_module_version": "1.5.0",
442
            "_view_name": "HTMLView",
443
            "description": "",
444
            "description_tooltip": null,
445
            "layout": "IPY_MODEL_6aef866a6c474dfabb2140ded933c5aa",
446
            "placeholder": "​",
447
            "style": "IPY_MODEL_d66fa096d442423c9447cbfbdc1aad8d",
448
            "value": " 2/2 [00:59<00:00, 27.43s/it]"
449
          }
450
        },
451
        "343e11c62a59448eb43bbc0c31bf5f11": {
452
          "model_module": "@jupyter-widgets/base",
453
          "model_name": "LayoutModel",
454
          "model_module_version": "1.2.0",
455
          "state": {
456
            "_model_module": "@jupyter-widgets/base",
457
            "_model_module_version": "1.2.0",
458
            "_model_name": "LayoutModel",
459
            "_view_count": null,
460
            "_view_module": "@jupyter-widgets/base",
461
            "_view_module_version": "1.2.0",
462
            "_view_name": "LayoutView",
463
            "align_content": null,
464
            "align_items": null,
465
            "align_self": null,
466
            "border": null,
467
            "bottom": null,
468
            "display": null,
469
            "flex": null,
470
            "flex_flow": null,
471
            "grid_area": null,
472
            "grid_auto_columns": null,
473
            "grid_auto_flow": null,
474
            "grid_auto_rows": null,
475
            "grid_column": null,
476
            "grid_gap": null,
477
            "grid_row": null,
478
            "grid_template_areas": null,
479
            "grid_template_columns": null,
480
            "grid_template_rows": null,
481
            "height": null,
482
            "justify_content": null,
483
            "justify_items": null,
484
            "left": null,
485
            "margin": null,
486
            "max_height": null,
487
            "max_width": null,
488
            "min_height": null,
489
            "min_width": null,
490
            "object_fit": null,
491
            "object_position": null,
492
            "order": null,
493
            "overflow": null,
494
            "overflow_x": null,
495
            "overflow_y": null,
496
            "padding": null,
497
            "right": null,
498
            "top": null,
499
            "visibility": null,
500
            "width": null
501
          }
502
        },
503
        "a153c96bd1fe4c48a41e9b9c7c00dd6e": {
504
          "model_module": "@jupyter-widgets/base",
505
          "model_name": "LayoutModel",
506
          "model_module_version": "1.2.0",
507
          "state": {
508
            "_model_module": "@jupyter-widgets/base",
509
            "_model_module_version": "1.2.0",
510
            "_model_name": "LayoutModel",
511
            "_view_count": null,
512
            "_view_module": "@jupyter-widgets/base",
513
            "_view_module_version": "1.2.0",
514
            "_view_name": "LayoutView",
515
            "align_content": null,
516
            "align_items": null,
517
            "align_self": null,
518
            "border": null,
519
            "bottom": null,
520
            "display": null,
521
            "flex": null,
522
            "flex_flow": null,
523
            "grid_area": null,
524
            "grid_auto_columns": null,
525
            "grid_auto_flow": null,
526
            "grid_auto_rows": null,
527
            "grid_column": null,
528
            "grid_gap": null,
529
            "grid_row": null,
530
            "grid_template_areas": null,
531
            "grid_template_columns": null,
532
            "grid_template_rows": null,
533
            "height": null,
534
            "justify_content": null,
535
            "justify_items": null,
536
            "left": null,
537
            "margin": null,
538
            "max_height": null,
539
            "max_width": null,
540
            "min_height": null,
541
            "min_width": null,
542
            "object_fit": null,
543
            "object_position": null,
544
            "order": null,
545
            "overflow": null,
546
            "overflow_x": null,
547
            "overflow_y": null,
548
            "padding": null,
549
            "right": null,
550
            "top": null,
551
            "visibility": null,
552
            "width": null
553
          }
554
        },
555
        "84da055d24694320843e13ad37438792": {
556
          "model_module": "@jupyter-widgets/controls",
557
          "model_name": "DescriptionStyleModel",
558
          "model_module_version": "1.5.0",
559
          "state": {
560
            "_model_module": "@jupyter-widgets/controls",
561
            "_model_module_version": "1.5.0",
562
            "_model_name": "DescriptionStyleModel",
563
            "_view_count": null,
564
            "_view_module": "@jupyter-widgets/base",
565
            "_view_module_version": "1.2.0",
566
            "_view_name": "StyleView",
567
            "description_width": ""
568
          }
569
        },
570
        "e375632975904402baea46163e2eeca1": {
571
          "model_module": "@jupyter-widgets/base",
572
          "model_name": "LayoutModel",
573
          "model_module_version": "1.2.0",
574
          "state": {
575
            "_model_module": "@jupyter-widgets/base",
576
            "_model_module_version": "1.2.0",
577
            "_model_name": "LayoutModel",
578
            "_view_count": null,
579
            "_view_module": "@jupyter-widgets/base",
580
            "_view_module_version": "1.2.0",
581
            "_view_name": "LayoutView",
582
            "align_content": null,
583
            "align_items": null,
584
            "align_self": null,
585
            "border": null,
586
            "bottom": null,
587
            "display": null,
588
            "flex": null,
589
            "flex_flow": null,
590
            "grid_area": null,
591
            "grid_auto_columns": null,
592
            "grid_auto_flow": null,
593
            "grid_auto_rows": null,
594
            "grid_column": null,
595
            "grid_gap": null,
596
            "grid_row": null,
597
            "grid_template_areas": null,
598
            "grid_template_columns": null,
599
            "grid_template_rows": null,
600
            "height": null,
601
            "justify_content": null,
602
            "justify_items": null,
603
            "left": null,
604
            "margin": null,
605
            "max_height": null,
606
            "max_width": null,
607
            "min_height": null,
608
            "min_width": null,
609
            "object_fit": null,
610
            "object_position": null,
611
            "order": null,
612
            "overflow": null,
613
            "overflow_x": null,
614
            "overflow_y": null,
615
            "padding": null,
616
            "right": null,
617
            "top": null,
618
            "visibility": null,
619
            "width": null
620
          }
621
        },
622
        "95501d0b5a22407288f008bf8cc69726": {
623
          "model_module": "@jupyter-widgets/controls",
624
          "model_name": "ProgressStyleModel",
625
          "model_module_version": "1.5.0",
626
          "state": {
627
            "_model_module": "@jupyter-widgets/controls",
628
            "_model_module_version": "1.5.0",
629
            "_model_name": "ProgressStyleModel",
630
            "_view_count": null,
631
            "_view_module": "@jupyter-widgets/base",
632
            "_view_module_version": "1.2.0",
633
            "_view_name": "StyleView",
634
            "bar_color": null,
635
            "description_width": ""
636
          }
637
        },
638
        "6aef866a6c474dfabb2140ded933c5aa": {
639
          "model_module": "@jupyter-widgets/base",
640
          "model_name": "LayoutModel",
641
          "model_module_version": "1.2.0",
642
          "state": {
643
            "_model_module": "@jupyter-widgets/base",
644
            "_model_module_version": "1.2.0",
645
            "_model_name": "LayoutModel",
646
            "_view_count": null,
647
            "_view_module": "@jupyter-widgets/base",
648
            "_view_module_version": "1.2.0",
649
            "_view_name": "LayoutView",
650
            "align_content": null,
651
            "align_items": null,
652
            "align_self": null,
653
            "border": null,
654
            "bottom": null,
655
            "display": null,
656
            "flex": null,
657
            "flex_flow": null,
658
            "grid_area": null,
659
            "grid_auto_columns": null,
660
            "grid_auto_flow": null,
661
            "grid_auto_rows": null,
662
            "grid_column": null,
663
            "grid_gap": null,
664
            "grid_row": null,
665
            "grid_template_areas": null,
666
            "grid_template_columns": null,
667
            "grid_template_rows": null,
668
            "height": null,
669
            "justify_content": null,
670
            "justify_items": null,
671
            "left": null,
672
            "margin": null,
673
            "max_height": null,
674
            "max_width": null,
675
            "min_height": null,
676
            "min_width": null,
677
            "object_fit": null,
678
            "object_position": null,
679
            "order": null,
680
            "overflow": null,
681
            "overflow_x": null,
682
            "overflow_y": null,
683
            "padding": null,
684
            "right": null,
685
            "top": null,
686
            "visibility": null,
687
            "width": null
688
          }
689
        },
690
        "d66fa096d442423c9447cbfbdc1aad8d": {
691
          "model_module": "@jupyter-widgets/controls",
692
          "model_name": "DescriptionStyleModel",
693
          "model_module_version": "1.5.0",
694
          "state": {
695
            "_model_module": "@jupyter-widgets/controls",
696
            "_model_module_version": "1.5.0",
697
            "_model_name": "DescriptionStyleModel",
698
            "_view_count": null,
699
            "_view_module": "@jupyter-widgets/base",
700
            "_view_module_version": "1.2.0",
701
            "_view_name": "StyleView",
702
            "description_width": ""
703
          }
704
        },
705
        "c99aff4cfd664ae8a165a27bea0566c8": {
706
          "model_module": "@jupyter-widgets/controls",
707
          "model_name": "HBoxModel",
708
          "model_module_version": "1.5.0",
709
          "state": {
710
            "_dom_classes": [],
711
            "_model_module": "@jupyter-widgets/controls",
712
            "_model_module_version": "1.5.0",
713
            "_model_name": "HBoxModel",
714
            "_view_count": null,
715
            "_view_module": "@jupyter-widgets/controls",
716
            "_view_module_version": "1.5.0",
717
            "_view_name": "HBoxView",
718
            "box_style": "",
719
            "children": [
720
              "IPY_MODEL_e4b64cab6b7b418c8a2575ee26839039",
721
              "IPY_MODEL_c3a4fedc73b3480089ef9d13381471ed",
722
              "IPY_MODEL_bf722f71c61b4285bcbbf32fd619b3a6"
723
            ],
724
            "layout": "IPY_MODEL_fd11a6148b704c5b9142c5e8de2d3b25"
725
          }
726
        },
727
        "e4b64cab6b7b418c8a2575ee26839039": {
728
          "model_module": "@jupyter-widgets/controls",
729
          "model_name": "HTMLModel",
730
          "model_module_version": "1.5.0",
731
          "state": {
732
            "_dom_classes": [],
733
            "_model_module": "@jupyter-widgets/controls",
734
            "_model_module_version": "1.5.0",
735
            "_model_name": "HTMLModel",
736
            "_view_count": null,
737
            "_view_module": "@jupyter-widgets/controls",
738
            "_view_module_version": "1.5.0",
739
            "_view_name": "HTMLView",
740
            "description": "",
741
            "description_tooltip": null,
742
            "layout": "IPY_MODEL_f0bcdaf940d14ad796fc7ac46c8e1e64",
743
            "placeholder": "​",
744
            "style": "IPY_MODEL_b6e821c974674f2290c354238d6c919c",
745
            "value": "Upload 2 LFS files: 100%"
746
          }
747
        },
748
        "c3a4fedc73b3480089ef9d13381471ed": {
749
          "model_module": "@jupyter-widgets/controls",
750
          "model_name": "FloatProgressModel",
751
          "model_module_version": "1.5.0",
752
          "state": {
753
            "_dom_classes": [],
754
            "_model_module": "@jupyter-widgets/controls",
755
            "_model_module_version": "1.5.0",
756
            "_model_name": "FloatProgressModel",
757
            "_view_count": null,
758
            "_view_module": "@jupyter-widgets/controls",
759
            "_view_module_version": "1.5.0",
760
            "_view_name": "ProgressView",
761
            "bar_style": "success",
762
            "description": "",
763
            "description_tooltip": null,
764
            "layout": "IPY_MODEL_eeba50e8242c4753bfc0ea48e03f9078",
765
            "max": 2,
766
            "min": 0,
767
            "orientation": "horizontal",
768
            "style": "IPY_MODEL_7a1f3340688d408092adade75f4baac4",
769
            "value": 2
770
          }
771
        },
772
        "bf722f71c61b4285bcbbf32fd619b3a6": {
773
          "model_module": "@jupyter-widgets/controls",
774
          "model_name": "HTMLModel",
775
          "model_module_version": "1.5.0",
776
          "state": {
777
            "_dom_classes": [],
778
            "_model_module": "@jupyter-widgets/controls",
779
            "_model_module_version": "1.5.0",
780
            "_model_name": "HTMLModel",
781
            "_view_count": null,
782
            "_view_module": "@jupyter-widgets/controls",
783
            "_view_module_version": "1.5.0",
784
            "_view_name": "HTMLView",
785
            "description": "",
786
            "description_tooltip": null,
787
            "layout": "IPY_MODEL_8c887ca9b0eb44fdb8608bf36b5db5c5",
788
            "placeholder": "​",
789
            "style": "IPY_MODEL_e4698337e6b843afac706ab657ca6af9",
790
            "value": " 2/2 [06:36<00:00, 396.47s/it]"
791
          }
792
        },
793
        "fd11a6148b704c5b9142c5e8de2d3b25": {
794
          "model_module": "@jupyter-widgets/base",
795
          "model_name": "LayoutModel",
796
          "model_module_version": "1.2.0",
797
          "state": {
798
            "_model_module": "@jupyter-widgets/base",
799
            "_model_module_version": "1.2.0",
800
            "_model_name": "LayoutModel",
801
            "_view_count": null,
802
            "_view_module": "@jupyter-widgets/base",
803
            "_view_module_version": "1.2.0",
804
            "_view_name": "LayoutView",
805
            "align_content": null,
806
            "align_items": null,
807
            "align_self": null,
808
            "border": null,
809
            "bottom": null,
810
            "display": null,
811
            "flex": null,
812
            "flex_flow": null,
813
            "grid_area": null,
814
            "grid_auto_columns": null,
815
            "grid_auto_flow": null,
816
            "grid_auto_rows": null,
817
            "grid_column": null,
818
            "grid_gap": null,
819
            "grid_row": null,
820
            "grid_template_areas": null,
821
            "grid_template_columns": null,
822
            "grid_template_rows": null,
823
            "height": null,
824
            "justify_content": null,
825
            "justify_items": null,
826
            "left": null,
827
            "margin": null,
828
            "max_height": null,
829
            "max_width": null,
830
            "min_height": null,
831
            "min_width": null,
832
            "object_fit": null,
833
            "object_position": null,
834
            "order": null,
835
            "overflow": null,
836
            "overflow_x": null,
837
            "overflow_y": null,
838
            "padding": null,
839
            "right": null,
840
            "top": null,
841
            "visibility": null,
842
            "width": null
843
          }
844
        },
845
        "f0bcdaf940d14ad796fc7ac46c8e1e64": {
846
          "model_module": "@jupyter-widgets/base",
847
          "model_name": "LayoutModel",
848
          "model_module_version": "1.2.0",
849
          "state": {
850
            "_model_module": "@jupyter-widgets/base",
851
            "_model_module_version": "1.2.0",
852
            "_model_name": "LayoutModel",
853
            "_view_count": null,
854
            "_view_module": "@jupyter-widgets/base",
855
            "_view_module_version": "1.2.0",
856
            "_view_name": "LayoutView",
857
            "align_content": null,
858
            "align_items": null,
859
            "align_self": null,
860
            "border": null,
861
            "bottom": null,
862
            "display": null,
863
            "flex": null,
864
            "flex_flow": null,
865
            "grid_area": null,
866
            "grid_auto_columns": null,
867
            "grid_auto_flow": null,
868
            "grid_auto_rows": null,
869
            "grid_column": null,
870
            "grid_gap": null,
871
            "grid_row": null,
872
            "grid_template_areas": null,
873
            "grid_template_columns": null,
874
            "grid_template_rows": null,
875
            "height": null,
876
            "justify_content": null,
877
            "justify_items": null,
878
            "left": null,
879
            "margin": null,
880
            "max_height": null,
881
            "max_width": null,
882
            "min_height": null,
883
            "min_width": null,
884
            "object_fit": null,
885
            "object_position": null,
886
            "order": null,
887
            "overflow": null,
888
            "overflow_x": null,
889
            "overflow_y": null,
890
            "padding": null,
891
            "right": null,
892
            "top": null,
893
            "visibility": null,
894
            "width": null
895
          }
896
        },
897
        "b6e821c974674f2290c354238d6c919c": {
898
          "model_module": "@jupyter-widgets/controls",
899
          "model_name": "DescriptionStyleModel",
900
          "model_module_version": "1.5.0",
901
          "state": {
902
            "_model_module": "@jupyter-widgets/controls",
903
            "_model_module_version": "1.5.0",
904
            "_model_name": "DescriptionStyleModel",
905
            "_view_count": null,
906
            "_view_module": "@jupyter-widgets/base",
907
            "_view_module_version": "1.2.0",
908
            "_view_name": "StyleView",
909
            "description_width": ""
910
          }
911
        },
912
        "eeba50e8242c4753bfc0ea48e03f9078": {
913
          "model_module": "@jupyter-widgets/base",
914
          "model_name": "LayoutModel",
915
          "model_module_version": "1.2.0",
916
          "state": {
917
            "_model_module": "@jupyter-widgets/base",
918
            "_model_module_version": "1.2.0",
919
            "_model_name": "LayoutModel",
920
            "_view_count": null,
921
            "_view_module": "@jupyter-widgets/base",
922
            "_view_module_version": "1.2.0",
923
            "_view_name": "LayoutView",
924
            "align_content": null,
925
            "align_items": null,
926
            "align_self": null,
927
            "border": null,
928
            "bottom": null,
929
            "display": null,
930
            "flex": null,
931
            "flex_flow": null,
932
            "grid_area": null,
933
            "grid_auto_columns": null,
934
            "grid_auto_flow": null,
935
            "grid_auto_rows": null,
936
            "grid_column": null,
937
            "grid_gap": null,
938
            "grid_row": null,
939
            "grid_template_areas": null,
940
            "grid_template_columns": null,
941
            "grid_template_rows": null,
942
            "height": null,
943
            "justify_content": null,
944
            "justify_items": null,
945
            "left": null,
946
            "margin": null,
947
            "max_height": null,
948
            "max_width": null,
949
            "min_height": null,
950
            "min_width": null,
951
            "object_fit": null,
952
            "object_position": null,
953
            "order": null,
954
            "overflow": null,
955
            "overflow_x": null,
956
            "overflow_y": null,
957
            "padding": null,
958
            "right": null,
959
            "top": null,
960
            "visibility": null,
961
            "width": null
962
          }
963
        },
964
        "7a1f3340688d408092adade75f4baac4": {
965
          "model_module": "@jupyter-widgets/controls",
966
          "model_name": "ProgressStyleModel",
967
          "model_module_version": "1.5.0",
968
          "state": {
969
            "_model_module": "@jupyter-widgets/controls",
970
            "_model_module_version": "1.5.0",
971
            "_model_name": "ProgressStyleModel",
972
            "_view_count": null,
973
            "_view_module": "@jupyter-widgets/base",
974
            "_view_module_version": "1.2.0",
975
            "_view_name": "StyleView",
976
            "bar_color": null,
977
            "description_width": ""
978
          }
979
        },
980
        "8c887ca9b0eb44fdb8608bf36b5db5c5": {
981
          "model_module": "@jupyter-widgets/base",
982
          "model_name": "LayoutModel",
983
          "model_module_version": "1.2.0",
984
          "state": {
985
            "_model_module": "@jupyter-widgets/base",
986
            "_model_module_version": "1.2.0",
987
            "_model_name": "LayoutModel",
988
            "_view_count": null,
989
            "_view_module": "@jupyter-widgets/base",
990
            "_view_module_version": "1.2.0",
991
            "_view_name": "LayoutView",
992
            "align_content": null,
993
            "align_items": null,
994
            "align_self": null,
995
            "border": null,
996
            "bottom": null,
997
            "display": null,
998
            "flex": null,
999
            "flex_flow": null,
1000
            "grid_area": null,
1001
            "grid_auto_columns": null,
1002
            "grid_auto_flow": null,
1003
            "grid_auto_rows": null,
1004
            "grid_column": null,
1005
            "grid_gap": null,
1006
            "grid_row": null,
1007
            "grid_template_areas": null,
1008
            "grid_template_columns": null,
1009
            "grid_template_rows": null,
1010
            "height": null,
1011
            "justify_content": null,
1012
            "justify_items": null,
1013
            "left": null,
1014
            "margin": null,
1015
            "max_height": null,
1016
            "max_width": null,
1017
            "min_height": null,
1018
            "min_width": null,
1019
            "object_fit": null,
1020
            "object_position": null,
1021
            "order": null,
1022
            "overflow": null,
1023
            "overflow_x": null,
1024
            "overflow_y": null,
1025
            "padding": null,
1026
            "right": null,
1027
            "top": null,
1028
            "visibility": null,
1029
            "width": null
1030
          }
1031
        },
1032
        "e4698337e6b843afac706ab657ca6af9": {
1033
          "model_module": "@jupyter-widgets/controls",
1034
          "model_name": "DescriptionStyleModel",
1035
          "model_module_version": "1.5.0",
1036
          "state": {
1037
            "_model_module": "@jupyter-widgets/controls",
1038
            "_model_module_version": "1.5.0",
1039
            "_model_name": "DescriptionStyleModel",
1040
            "_view_count": null,
1041
            "_view_module": "@jupyter-widgets/base",
1042
            "_view_module_version": "1.2.0",
1043
            "_view_name": "StyleView",
1044
            "description_width": ""
1045
          }
1046
        },
1047
        "1af01f1f1aac42b8bff46fe4df8a59ad": {
1048
          "model_module": "@jupyter-widgets/controls",
1049
          "model_name": "HBoxModel",
1050
          "model_module_version": "1.5.0",
1051
          "state": {
1052
            "_dom_classes": [],
1053
            "_model_module": "@jupyter-widgets/controls",
1054
            "_model_module_version": "1.5.0",
1055
            "_model_name": "HBoxModel",
1056
            "_view_count": null,
1057
            "_view_module": "@jupyter-widgets/controls",
1058
            "_view_module_version": "1.5.0",
1059
            "_view_name": "HBoxView",
1060
            "box_style": "",
1061
            "children": [
1062
              "IPY_MODEL_eee8731f316244eda5ff0765fd12bf85",
1063
              "IPY_MODEL_f135278e410f4b708435bb80fb630bcf",
1064
              "IPY_MODEL_2e6fc79bf5c149d6b0bc5c52e18debc7"
1065
            ],
1066
            "layout": "IPY_MODEL_a4b0debc025444a59abd6953b3512c0d"
1067
          }
1068
        },
1069
        "eee8731f316244eda5ff0765fd12bf85": {
1070
          "model_module": "@jupyter-widgets/controls",
1071
          "model_name": "HTMLModel",
1072
          "model_module_version": "1.5.0",
1073
          "state": {
1074
            "_dom_classes": [],
1075
            "_model_module": "@jupyter-widgets/controls",
1076
            "_model_module_version": "1.5.0",
1077
            "_model_name": "HTMLModel",
1078
            "_view_count": null,
1079
            "_view_module": "@jupyter-widgets/controls",
1080
            "_view_module_version": "1.5.0",
1081
            "_view_name": "HTMLView",
1082
            "description": "",
1083
            "description_tooltip": null,
1084
            "layout": "IPY_MODEL_130120644beb48acbc038651459af43c",
1085
            "placeholder": "​",
1086
            "style": "IPY_MODEL_bf77e97593a349718bdb5fd9bfd28fe3",
1087
            "value": "pytorch_model-00001-of-00002.bin: 100%"
1088
          }
1089
        },
1090
        "f135278e410f4b708435bb80fb630bcf": {
1091
          "model_module": "@jupyter-widgets/controls",
1092
          "model_name": "FloatProgressModel",
1093
          "model_module_version": "1.5.0",
1094
          "state": {
1095
            "_dom_classes": [],
1096
            "_model_module": "@jupyter-widgets/controls",
1097
            "_model_module_version": "1.5.0",
1098
            "_model_name": "FloatProgressModel",
1099
            "_view_count": null,
1100
            "_view_module": "@jupyter-widgets/controls",
1101
            "_view_module_version": "1.5.0",
1102
            "_view_name": "ProgressView",
1103
            "bar_style": "success",
1104
            "description": "",
1105
            "description_tooltip": null,
1106
            "layout": "IPY_MODEL_f7292741953e47699540ef8712fc0d8d",
1107
            "max": 9976637886,
1108
            "min": 0,
1109
            "orientation": "horizontal",
1110
            "style": "IPY_MODEL_9434350b1b9c4060812feb9ecbf63278",
1111
            "value": 9976637886
1112
          }
1113
        },
1114
        "2e6fc79bf5c149d6b0bc5c52e18debc7": {
1115
          "model_module": "@jupyter-widgets/controls",
1116
          "model_name": "HTMLModel",
1117
          "model_module_version": "1.5.0",
1118
          "state": {
1119
            "_dom_classes": [],
1120
            "_model_module": "@jupyter-widgets/controls",
1121
            "_model_module_version": "1.5.0",
1122
            "_model_name": "HTMLModel",
1123
            "_view_count": null,
1124
            "_view_module": "@jupyter-widgets/controls",
1125
            "_view_module_version": "1.5.0",
1126
            "_view_name": "HTMLView",
1127
            "description": "",
1128
            "description_tooltip": null,
1129
            "layout": "IPY_MODEL_b29647e268414329be56047e522e28b9",
1130
            "placeholder": "​",
1131
            "style": "IPY_MODEL_27bb18a199ca47108c7a61e9c443de36",
1132
            "value": " 9.98G/9.98G [06:35<00:00, 25.8MB/s]"
1133
          }
1134
        },
1135
        "a4b0debc025444a59abd6953b3512c0d": {
1136
          "model_module": "@jupyter-widgets/base",
1137
          "model_name": "LayoutModel",
1138
          "model_module_version": "1.2.0",
1139
          "state": {
1140
            "_model_module": "@jupyter-widgets/base",
1141
            "_model_module_version": "1.2.0",
1142
            "_model_name": "LayoutModel",
1143
            "_view_count": null,
1144
            "_view_module": "@jupyter-widgets/base",
1145
            "_view_module_version": "1.2.0",
1146
            "_view_name": "LayoutView",
1147
            "align_content": null,
1148
            "align_items": null,
1149
            "align_self": null,
1150
            "border": null,
1151
            "bottom": null,
1152
            "display": null,
1153
            "flex": null,
1154
            "flex_flow": null,
1155
            "grid_area": null,
1156
            "grid_auto_columns": null,
1157
            "grid_auto_flow": null,
1158
            "grid_auto_rows": null,
1159
            "grid_column": null,
1160
            "grid_gap": null,
1161
            "grid_row": null,
1162
            "grid_template_areas": null,
1163
            "grid_template_columns": null,
1164
            "grid_template_rows": null,
1165
            "height": null,
1166
            "justify_content": null,
1167
            "justify_items": null,
1168
            "left": null,
1169
            "margin": null,
1170
            "max_height": null,
1171
            "max_width": null,
1172
            "min_height": null,
1173
            "min_width": null,
1174
            "object_fit": null,
1175
            "object_position": null,
1176
            "order": null,
1177
            "overflow": null,
1178
            "overflow_x": null,
1179
            "overflow_y": null,
1180
            "padding": null,
1181
            "right": null,
1182
            "top": null,
1183
            "visibility": null,
1184
            "width": null
1185
          }
1186
        },
1187
        "130120644beb48acbc038651459af43c": {
1188
          "model_module": "@jupyter-widgets/base",
1189
          "model_name": "LayoutModel",
1190
          "model_module_version": "1.2.0",
1191
          "state": {
1192
            "_model_module": "@jupyter-widgets/base",
1193
            "_model_module_version": "1.2.0",
1194
            "_model_name": "LayoutModel",
1195
            "_view_count": null,
1196
            "_view_module": "@jupyter-widgets/base",
1197
            "_view_module_version": "1.2.0",
1198
            "_view_name": "LayoutView",
1199
            "align_content": null,
1200
            "align_items": null,
1201
            "align_self": null,
1202
            "border": null,
1203
            "bottom": null,
1204
            "display": null,
1205
            "flex": null,
1206
            "flex_flow": null,
1207
            "grid_area": null,
1208
            "grid_auto_columns": null,
1209
            "grid_auto_flow": null,
1210
            "grid_auto_rows": null,
1211
            "grid_column": null,
1212
            "grid_gap": null,
1213
            "grid_row": null,
1214
            "grid_template_areas": null,
1215
            "grid_template_columns": null,
1216
            "grid_template_rows": null,
1217
            "height": null,
1218
            "justify_content": null,
1219
            "justify_items": null,
1220
            "left": null,
1221
            "margin": null,
1222
            "max_height": null,
1223
            "max_width": null,
1224
            "min_height": null,
1225
            "min_width": null,
1226
            "object_fit": null,
1227
            "object_position": null,
1228
            "order": null,
1229
            "overflow": null,
1230
            "overflow_x": null,
1231
            "overflow_y": null,
1232
            "padding": null,
1233
            "right": null,
1234
            "top": null,
1235
            "visibility": null,
1236
            "width": null
1237
          }
1238
        },
1239
        "bf77e97593a349718bdb5fd9bfd28fe3": {
1240
          "model_module": "@jupyter-widgets/controls",
1241
          "model_name": "DescriptionStyleModel",
1242
          "model_module_version": "1.5.0",
1243
          "state": {
1244
            "_model_module": "@jupyter-widgets/controls",
1245
            "_model_module_version": "1.5.0",
1246
            "_model_name": "DescriptionStyleModel",
1247
            "_view_count": null,
1248
            "_view_module": "@jupyter-widgets/base",
1249
            "_view_module_version": "1.2.0",
1250
            "_view_name": "StyleView",
1251
            "description_width": ""
1252
          }
1253
        },
1254
        "f7292741953e47699540ef8712fc0d8d": {
1255
          "model_module": "@jupyter-widgets/base",
1256
          "model_name": "LayoutModel",
1257
          "model_module_version": "1.2.0",
1258
          "state": {
1259
            "_model_module": "@jupyter-widgets/base",
1260
            "_model_module_version": "1.2.0",
1261
            "_model_name": "LayoutModel",
1262
            "_view_count": null,
1263
            "_view_module": "@jupyter-widgets/base",
1264
            "_view_module_version": "1.2.0",
1265
            "_view_name": "LayoutView",
1266
            "align_content": null,
1267
            "align_items": null,
1268
            "align_self": null,
1269
            "border": null,
1270
            "bottom": null,
1271
            "display": null,
1272
            "flex": null,
1273
            "flex_flow": null,
1274
            "grid_area": null,
1275
            "grid_auto_columns": null,
1276
            "grid_auto_flow": null,
1277
            "grid_auto_rows": null,
1278
            "grid_column": null,
1279
            "grid_gap": null,
1280
            "grid_row": null,
1281
            "grid_template_areas": null,
1282
            "grid_template_columns": null,
1283
            "grid_template_rows": null,
1284
            "height": null,
1285
            "justify_content": null,
1286
            "justify_items": null,
1287
            "left": null,
1288
            "margin": null,
1289
            "max_height": null,
1290
            "max_width": null,
1291
            "min_height": null,
1292
            "min_width": null,
1293
            "object_fit": null,
1294
            "object_position": null,
1295
            "order": null,
1296
            "overflow": null,
1297
            "overflow_x": null,
1298
            "overflow_y": null,
1299
            "padding": null,
1300
            "right": null,
1301
            "top": null,
1302
            "visibility": null,
1303
            "width": null
1304
          }
1305
        },
1306
        "9434350b1b9c4060812feb9ecbf63278": {
1307
          "model_module": "@jupyter-widgets/controls",
1308
          "model_name": "ProgressStyleModel",
1309
          "model_module_version": "1.5.0",
1310
          "state": {
1311
            "_model_module": "@jupyter-widgets/controls",
1312
            "_model_module_version": "1.5.0",
1313
            "_model_name": "ProgressStyleModel",
1314
            "_view_count": null,
1315
            "_view_module": "@jupyter-widgets/base",
1316
            "_view_module_version": "1.2.0",
1317
            "_view_name": "StyleView",
1318
            "bar_color": null,
1319
            "description_width": ""
1320
          }
1321
        },
1322
        "b29647e268414329be56047e522e28b9": {
1323
          "model_module": "@jupyter-widgets/base",
1324
          "model_name": "LayoutModel",
1325
          "model_module_version": "1.2.0",
1326
          "state": {
1327
            "_model_module": "@jupyter-widgets/base",
1328
            "_model_module_version": "1.2.0",
1329
            "_model_name": "LayoutModel",
1330
            "_view_count": null,
1331
            "_view_module": "@jupyter-widgets/base",
1332
            "_view_module_version": "1.2.0",
1333
            "_view_name": "LayoutView",
1334
            "align_content": null,
1335
            "align_items": null,
1336
            "align_self": null,
1337
            "border": null,
1338
            "bottom": null,
1339
            "display": null,
1340
            "flex": null,
1341
            "flex_flow": null,
1342
            "grid_area": null,
1343
            "grid_auto_columns": null,
1344
            "grid_auto_flow": null,
1345
            "grid_auto_rows": null,
1346
            "grid_column": null,
1347
            "grid_gap": null,
1348
            "grid_row": null,
1349
            "grid_template_areas": null,
1350
            "grid_template_columns": null,
1351
            "grid_template_rows": null,
1352
            "height": null,
1353
            "justify_content": null,
1354
            "justify_items": null,
1355
            "left": null,
1356
            "margin": null,
1357
            "max_height": null,
1358
            "max_width": null,
1359
            "min_height": null,
1360
            "min_width": null,
1361
            "object_fit": null,
1362
            "object_position": null,
1363
            "order": null,
1364
            "overflow": null,
1365
            "overflow_x": null,
1366
            "overflow_y": null,
1367
            "padding": null,
1368
            "right": null,
1369
            "top": null,
1370
            "visibility": null,
1371
            "width": null
1372
          }
1373
        },
1374
        "27bb18a199ca47108c7a61e9c443de36": {
1375
          "model_module": "@jupyter-widgets/controls",
1376
          "model_name": "DescriptionStyleModel",
1377
          "model_module_version": "1.5.0",
1378
          "state": {
1379
            "_model_module": "@jupyter-widgets/controls",
1380
            "_model_module_version": "1.5.0",
1381
            "_model_name": "DescriptionStyleModel",
1382
            "_view_count": null,
1383
            "_view_module": "@jupyter-widgets/base",
1384
            "_view_module_version": "1.2.0",
1385
            "_view_name": "StyleView",
1386
            "description_width": ""
1387
          }
1388
        },
1389
        "33ebb868f3e846f6af1a1a2a8ad6a3cb": {
1390
          "model_module": "@jupyter-widgets/controls",
1391
          "model_name": "HBoxModel",
1392
          "model_module_version": "1.5.0",
1393
          "state": {
1394
            "_dom_classes": [],
1395
            "_model_module": "@jupyter-widgets/controls",
1396
            "_model_module_version": "1.5.0",
1397
            "_model_name": "HBoxModel",
1398
            "_view_count": null,
1399
            "_view_module": "@jupyter-widgets/controls",
1400
            "_view_module_version": "1.5.0",
1401
            "_view_name": "HBoxView",
1402
            "box_style": "",
1403
            "children": [
1404
              "IPY_MODEL_1f73f8b4d4da4e74adc135f2a2f6ee65",
1405
              "IPY_MODEL_68da6e6e69c8419895bea2068760534e",
1406
              "IPY_MODEL_6dc1a868e08c4c3b8315116d2c46573b"
1407
            ],
1408
            "layout": "IPY_MODEL_7a5d714c17374104bb6f5caaa5541c10"
1409
          }
1410
        },
1411
        "1f73f8b4d4da4e74adc135f2a2f6ee65": {
1412
          "model_module": "@jupyter-widgets/controls",
1413
          "model_name": "HTMLModel",
1414
          "model_module_version": "1.5.0",
1415
          "state": {
1416
            "_dom_classes": [],
1417
            "_model_module": "@jupyter-widgets/controls",
1418
            "_model_module_version": "1.5.0",
1419
            "_model_name": "HTMLModel",
1420
            "_view_count": null,
1421
            "_view_module": "@jupyter-widgets/controls",
1422
            "_view_module_version": "1.5.0",
1423
            "_view_name": "HTMLView",
1424
            "description": "",
1425
            "description_tooltip": null,
1426
            "layout": "IPY_MODEL_1b6c59a51359453c926bfcddb3d0f0ea",
1427
            "placeholder": "​",
1428
            "style": "IPY_MODEL_dac3669f18284161a58d52f26dffb761",
1429
            "value": "pytorch_model-00002-of-00002.bin: 100%"
1430
          }
1431
        },
1432
        "68da6e6e69c8419895bea2068760534e": {
1433
          "model_module": "@jupyter-widgets/controls",
1434
          "model_name": "FloatProgressModel",
1435
          "model_module_version": "1.5.0",
1436
          "state": {
1437
            "_dom_classes": [],
1438
            "_model_module": "@jupyter-widgets/controls",
1439
            "_model_module_version": "1.5.0",
1440
            "_model_name": "FloatProgressModel",
1441
            "_view_count": null,
1442
            "_view_module": "@jupyter-widgets/controls",
1443
            "_view_module_version": "1.5.0",
1444
            "_view_name": "ProgressView",
1445
            "bar_style": "success",
1446
            "description": "",
1447
            "description_tooltip": null,
1448
            "layout": "IPY_MODEL_a3511f489f6d47cc8d404ab6f367b29f",
1449
            "max": 3500316627,
1450
            "min": 0,
1451
            "orientation": "horizontal",
1452
            "style": "IPY_MODEL_20670478612f4b1a8a5f23d71a2609a7",
1453
            "value": 3500316627
1454
          }
1455
        },
1456
        "6dc1a868e08c4c3b8315116d2c46573b": {
1457
          "model_module": "@jupyter-widgets/controls",
1458
          "model_name": "HTMLModel",
1459
          "model_module_version": "1.5.0",
1460
          "state": {
1461
            "_dom_classes": [],
1462
            "_model_module": "@jupyter-widgets/controls",
1463
            "_model_module_version": "1.5.0",
1464
            "_model_name": "HTMLModel",
1465
            "_view_count": null,
1466
            "_view_module": "@jupyter-widgets/controls",
1467
            "_view_module_version": "1.5.0",
1468
            "_view_name": "HTMLView",
1469
            "description": "",
1470
            "description_tooltip": null,
1471
            "layout": "IPY_MODEL_b463153ec04749e38540389efa2981f7",
1472
            "placeholder": "​",
1473
            "style": "IPY_MODEL_2bb3d36d248a48fba364f14d9e840306",
1474
            "value": " 3.50G/3.50G [02:27<00:00, 26.4MB/s]"
1475
          }
1476
        },
1477
        "7a5d714c17374104bb6f5caaa5541c10": {
1478
          "model_module": "@jupyter-widgets/base",
1479
          "model_name": "LayoutModel",
1480
          "model_module_version": "1.2.0",
1481
          "state": {
1482
            "_model_module": "@jupyter-widgets/base",
1483
            "_model_module_version": "1.2.0",
1484
            "_model_name": "LayoutModel",
1485
            "_view_count": null,
1486
            "_view_module": "@jupyter-widgets/base",
1487
            "_view_module_version": "1.2.0",
1488
            "_view_name": "LayoutView",
1489
            "align_content": null,
1490
            "align_items": null,
1491
            "align_self": null,
1492
            "border": null,
1493
            "bottom": null,
1494
            "display": null,
1495
            "flex": null,
1496
            "flex_flow": null,
1497
            "grid_area": null,
1498
            "grid_auto_columns": null,
1499
            "grid_auto_flow": null,
1500
            "grid_auto_rows": null,
1501
            "grid_column": null,
1502
            "grid_gap": null,
1503
            "grid_row": null,
1504
            "grid_template_areas": null,
1505
            "grid_template_columns": null,
1506
            "grid_template_rows": null,
1507
            "height": null,
1508
            "justify_content": null,
1509
            "justify_items": null,
1510
            "left": null,
1511
            "margin": null,
1512
            "max_height": null,
1513
            "max_width": null,
1514
            "min_height": null,
1515
            "min_width": null,
1516
            "object_fit": null,
1517
            "object_position": null,
1518
            "order": null,
1519
            "overflow": null,
1520
            "overflow_x": null,
1521
            "overflow_y": null,
1522
            "padding": null,
1523
            "right": null,
1524
            "top": null,
1525
            "visibility": null,
1526
            "width": null
1527
          }
1528
        },
1529
        "1b6c59a51359453c926bfcddb3d0f0ea": {
1530
          "model_module": "@jupyter-widgets/base",
1531
          "model_name": "LayoutModel",
1532
          "model_module_version": "1.2.0",
1533
          "state": {
1534
            "_model_module": "@jupyter-widgets/base",
1535
            "_model_module_version": "1.2.0",
1536
            "_model_name": "LayoutModel",
1537
            "_view_count": null,
1538
            "_view_module": "@jupyter-widgets/base",
1539
            "_view_module_version": "1.2.0",
1540
            "_view_name": "LayoutView",
1541
            "align_content": null,
1542
            "align_items": null,
1543
            "align_self": null,
1544
            "border": null,
1545
            "bottom": null,
1546
            "display": null,
1547
            "flex": null,
1548
            "flex_flow": null,
1549
            "grid_area": null,
1550
            "grid_auto_columns": null,
1551
            "grid_auto_flow": null,
1552
            "grid_auto_rows": null,
1553
            "grid_column": null,
1554
            "grid_gap": null,
1555
            "grid_row": null,
1556
            "grid_template_areas": null,
1557
            "grid_template_columns": null,
1558
            "grid_template_rows": null,
1559
            "height": null,
1560
            "justify_content": null,
1561
            "justify_items": null,
1562
            "left": null,
1563
            "margin": null,
1564
            "max_height": null,
1565
            "max_width": null,
1566
            "min_height": null,
1567
            "min_width": null,
1568
            "object_fit": null,
1569
            "object_position": null,
1570
            "order": null,
1571
            "overflow": null,
1572
            "overflow_x": null,
1573
            "overflow_y": null,
1574
            "padding": null,
1575
            "right": null,
1576
            "top": null,
1577
            "visibility": null,
1578
            "width": null
1579
          }
1580
        },
1581
        "dac3669f18284161a58d52f26dffb761": {
1582
          "model_module": "@jupyter-widgets/controls",
1583
          "model_name": "DescriptionStyleModel",
1584
          "model_module_version": "1.5.0",
1585
          "state": {
1586
            "_model_module": "@jupyter-widgets/controls",
1587
            "_model_module_version": "1.5.0",
1588
            "_model_name": "DescriptionStyleModel",
1589
            "_view_count": null,
1590
            "_view_module": "@jupyter-widgets/base",
1591
            "_view_module_version": "1.2.0",
1592
            "_view_name": "StyleView",
1593
            "description_width": ""
1594
          }
1595
        },
1596
        "a3511f489f6d47cc8d404ab6f367b29f": {
1597
          "model_module": "@jupyter-widgets/base",
1598
          "model_name": "LayoutModel",
1599
          "model_module_version": "1.2.0",
1600
          "state": {
1601
            "_model_module": "@jupyter-widgets/base",
1602
            "_model_module_version": "1.2.0",
1603
            "_model_name": "LayoutModel",
1604
            "_view_count": null,
1605
            "_view_module": "@jupyter-widgets/base",
1606
            "_view_module_version": "1.2.0",
1607
            "_view_name": "LayoutView",
1608
            "align_content": null,
1609
            "align_items": null,
1610
            "align_self": null,
1611
            "border": null,
1612
            "bottom": null,
1613
            "display": null,
1614
            "flex": null,
1615
            "flex_flow": null,
1616
            "grid_area": null,
1617
            "grid_auto_columns": null,
1618
            "grid_auto_flow": null,
1619
            "grid_auto_rows": null,
1620
            "grid_column": null,
1621
            "grid_gap": null,
1622
            "grid_row": null,
1623
            "grid_template_areas": null,
1624
            "grid_template_columns": null,
1625
            "grid_template_rows": null,
1626
            "height": null,
1627
            "justify_content": null,
1628
            "justify_items": null,
1629
            "left": null,
1630
            "margin": null,
1631
            "max_height": null,
1632
            "max_width": null,
1633
            "min_height": null,
1634
            "min_width": null,
1635
            "object_fit": null,
1636
            "object_position": null,
1637
            "order": null,
1638
            "overflow": null,
1639
            "overflow_x": null,
1640
            "overflow_y": null,
1641
            "padding": null,
1642
            "right": null,
1643
            "top": null,
1644
            "visibility": null,
1645
            "width": null
1646
          }
1647
        },
1648
        "20670478612f4b1a8a5f23d71a2609a7": {
1649
          "model_module": "@jupyter-widgets/controls",
1650
          "model_name": "ProgressStyleModel",
1651
          "model_module_version": "1.5.0",
1652
          "state": {
1653
            "_model_module": "@jupyter-widgets/controls",
1654
            "_model_module_version": "1.5.0",
1655
            "_model_name": "ProgressStyleModel",
1656
            "_view_count": null,
1657
            "_view_module": "@jupyter-widgets/base",
1658
            "_view_module_version": "1.2.0",
1659
            "_view_name": "StyleView",
1660
            "bar_color": null,
1661
            "description_width": ""
1662
          }
1663
        },
1664
        "b463153ec04749e38540389efa2981f7": {
1665
          "model_module": "@jupyter-widgets/base",
1666
          "model_name": "LayoutModel",
1667
          "model_module_version": "1.2.0",
1668
          "state": {
1669
            "_model_module": "@jupyter-widgets/base",
1670
            "_model_module_version": "1.2.0",
1671
            "_model_name": "LayoutModel",
1672
            "_view_count": null,
1673
            "_view_module": "@jupyter-widgets/base",
1674
            "_view_module_version": "1.2.0",
1675
            "_view_name": "LayoutView",
1676
            "align_content": null,
1677
            "align_items": null,
1678
            "align_self": null,
1679
            "border": null,
1680
            "bottom": null,
1681
            "display": null,
1682
            "flex": null,
1683
            "flex_flow": null,
1684
            "grid_area": null,
1685
            "grid_auto_columns": null,
1686
            "grid_auto_flow": null,
1687
            "grid_auto_rows": null,
1688
            "grid_column": null,
1689
            "grid_gap": null,
1690
            "grid_row": null,
1691
            "grid_template_areas": null,
1692
            "grid_template_columns": null,
1693
            "grid_template_rows": null,
1694
            "height": null,
1695
            "justify_content": null,
1696
            "justify_items": null,
1697
            "left": null,
1698
            "margin": null,
1699
            "max_height": null,
1700
            "max_width": null,
1701
            "min_height": null,
1702
            "min_width": null,
1703
            "object_fit": null,
1704
            "object_position": null,
1705
            "order": null,
1706
            "overflow": null,
1707
            "overflow_x": null,
1708
            "overflow_y": null,
1709
            "padding": null,
1710
            "right": null,
1711
            "top": null,
1712
            "visibility": null,
1713
            "width": null
1714
          }
1715
        },
1716
        "2bb3d36d248a48fba364f14d9e840306": {
1717
          "model_module": "@jupyter-widgets/controls",
1718
          "model_name": "DescriptionStyleModel",
1719
          "model_module_version": "1.5.0",
1720
          "state": {
1721
            "_model_module": "@jupyter-widgets/controls",
1722
            "_model_module_version": "1.5.0",
1723
            "_model_name": "DescriptionStyleModel",
1724
            "_view_count": null,
1725
            "_view_module": "@jupyter-widgets/base",
1726
            "_view_module_version": "1.2.0",
1727
            "_view_name": "StyleView",
1728
            "description_width": ""
1729
          }
1730
        }
1731
      }
1732
    }
1733
  },
1734
  "cells": [
1735
    {
1736
      "cell_type": "markdown",
1737
      "metadata": {
1738
        "id": "view-in-github",
1739
        "colab_type": "text"
1740
      },
1741
      "source": [
1742
        "<a href=\"https://colab.research.google.com/github/mlabonne/llm-course/blob/main/Fine_tune_Llama_2_in_Google_Colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
1743
      ]
1744
    },
1745
    {
1746
      "cell_type": "markdown",
1747
      "source": [
1748
        "# Fine-tune Llama 2 in Google Colab\n",
1749
        "> 🗣️ Large Language Model Course\n",
1750
        "\n",
1751
        "❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne), based on Younes Belkada's [GitHub Gist](https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da). Special thanks to Tolga HOŞGÖR for his solution to empty the VRAM.\n",
1752
        "\n",
1753
        "This notebook runs on a T4 GPU. (Last update: 01 Aug 2023)\n"
1754
      ],
1755
      "metadata": {
1756
        "id": "OSHlAbqzDFDq"
1757
      }
1758
    },
1759
    {
1760
      "cell_type": "code",
1761
      "execution_count": null,
1762
      "metadata": {
1763
        "id": "GLXwJqbjtPho"
1764
      },
1765
      "outputs": [],
1766
      "source": [
1767
        "!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7"
1768
      ]
1769
    },
1770
    {
1771
      "cell_type": "code",
1772
      "source": [
1773
        "import os\n",
1774
        "import torch\n",
1775
        "from datasets import load_dataset\n",
1776
        "from transformers import (\n",
1777
        "    AutoModelForCausalLM,\n",
1778
        "    AutoTokenizer,\n",
1779
        "    BitsAndBytesConfig,\n",
1780
        "    HfArgumentParser,\n",
1781
        "    TrainingArguments,\n",
1782
        "    pipeline,\n",
1783
        "    logging,\n",
1784
        ")\n",
1785
        "from peft import LoraConfig, PeftModel\n",
1786
        "from trl import SFTTrainer"
1787
      ],
1788
      "metadata": {
1789
        "id": "nAMzy_0FtaUZ"
1790
      },
1791
      "execution_count": null,
1792
      "outputs": []
1793
    },
1794
    {
1795
      "cell_type": "code",
1796
      "source": [
1797
        "# The model that you want to train from the Hugging Face hub\n",
1798
        "model_name = \"NousResearch/Llama-2-7b-chat-hf\"\n",
1799
        "\n",
1800
        "# The instruction dataset to use\n",
1801
        "dataset_name = \"mlabonne/guanaco-llama2-1k\"\n",
1802
        "\n",
1803
        "# Fine-tuned model name\n",
1804
        "new_model = \"llama-2-7b-miniguanaco\"\n",
1805
        "\n",
1806
        "################################################################################\n",
1807
        "# QLoRA parameters\n",
1808
        "################################################################################\n",
1809
        "\n",
1810
        "# LoRA attention dimension\n",
1811
        "lora_r = 64\n",
1812
        "\n",
1813
        "# Alpha parameter for LoRA scaling\n",
1814
        "lora_alpha = 16\n",
1815
        "\n",
1816
        "# Dropout probability for LoRA layers\n",
1817
        "lora_dropout = 0.1\n",
1818
        "\n",
1819
        "################################################################################\n",
1820
        "# bitsandbytes parameters\n",
1821
        "################################################################################\n",
1822
        "\n",
1823
        "# Activate 4-bit precision base model loading\n",
1824
        "use_4bit = True\n",
1825
        "\n",
1826
        "# Compute dtype for 4-bit base models\n",
1827
        "bnb_4bit_compute_dtype = \"float16\"\n",
1828
        "\n",
1829
        "# Quantization type (fp4 or nf4)\n",
1830
        "bnb_4bit_quant_type = \"nf4\"\n",
1831
        "\n",
1832
        "# Activate nested quantization for 4-bit base models (double quantization)\n",
1833
        "use_nested_quant = False\n",
1834
        "\n",
1835
        "################################################################################\n",
1836
        "# TrainingArguments parameters\n",
1837
        "################################################################################\n",
1838
        "\n",
1839
        "# Output directory where the model predictions and checkpoints will be stored\n",
1840
        "output_dir = \"./results\"\n",
1841
        "\n",
1842
        "# Number of training epochs\n",
1843
        "num_train_epochs = 1\n",
1844
        "\n",
1845
        "# Enable fp16/bf16 training (set bf16 to True with an A100)\n",
1846
        "fp16 = False\n",
1847
        "bf16 = False\n",
1848
        "\n",
1849
        "# Batch size per GPU for training\n",
1850
        "per_device_train_batch_size = 4\n",
1851
        "\n",
1852
        "# Batch size per GPU for evaluation\n",
1853
        "per_device_eval_batch_size = 4\n",
1854
        "\n",
1855
        "# Number of update steps to accumulate the gradients for\n",
1856
        "gradient_accumulation_steps = 1\n",
1857
        "\n",
1858
        "# Enable gradient checkpointing\n",
1859
        "gradient_checkpointing = True\n",
1860
        "\n",
1861
        "# Maximum gradient normal (gradient clipping)\n",
1862
        "max_grad_norm = 0.3\n",
1863
        "\n",
1864
        "# Initial learning rate (AdamW optimizer)\n",
1865
        "learning_rate = 2e-4\n",
1866
        "\n",
1867
        "# Weight decay to apply to all layers except bias/LayerNorm weights\n",
1868
        "weight_decay = 0.001\n",
1869
        "\n",
1870
        "# Optimizer to use\n",
1871
        "optim = \"paged_adamw_32bit\"\n",
1872
        "\n",
1873
        "# Learning rate schedule\n",
1874
        "lr_scheduler_type = \"cosine\"\n",
1875
        "\n",
1876
        "# Number of training steps (overrides num_train_epochs)\n",
1877
        "max_steps = -1\n",
1878
        "\n",
1879
        "# Ratio of steps for a linear warmup (from 0 to learning rate)\n",
1880
        "warmup_ratio = 0.03\n",
1881
        "\n",
1882
        "# Group sequences into batches with same length\n",
1883
        "# Saves memory and speeds up training considerably\n",
1884
        "group_by_length = True\n",
1885
        "\n",
1886
        "# Save checkpoint every X updates steps\n",
1887
        "save_steps = 0\n",
1888
        "\n",
1889
        "# Log every X updates steps\n",
1890
        "logging_steps = 25\n",
1891
        "\n",
1892
        "################################################################################\n",
1893
        "# SFT parameters\n",
1894
        "################################################################################\n",
1895
        "\n",
1896
        "# Maximum sequence length to use\n",
1897
        "max_seq_length = None\n",
1898
        "\n",
1899
        "# Pack multiple short examples in the same input sequence to increase efficiency\n",
1900
        "packing = False\n",
1901
        "\n",
1902
        "# Load the entire model on the GPU 0\n",
1903
        "device_map = {\"\": 0}"
1904
      ],
1905
      "metadata": {
1906
        "id": "ib_We3NLtj2E"
1907
      },
1908
      "execution_count": null,
1909
      "outputs": []
1910
    },
1911
    {
1912
      "cell_type": "code",
1913
      "source": [
1914
        "# Load dataset (you can process it here)\n",
1915
        "dataset = load_dataset(dataset_name, split=\"train\")\n",
1916
        "\n",
1917
        "# Load tokenizer and model with QLoRA configuration\n",
1918
        "compute_dtype = getattr(torch, bnb_4bit_compute_dtype)\n",
1919
        "\n",
1920
        "bnb_config = BitsAndBytesConfig(\n",
1921
        "    load_in_4bit=use_4bit,\n",
1922
        "    bnb_4bit_quant_type=bnb_4bit_quant_type,\n",
1923
        "    bnb_4bit_compute_dtype=compute_dtype,\n",
1924
        "    bnb_4bit_use_double_quant=use_nested_quant,\n",
1925
        ")\n",
1926
        "\n",
1927
        "# Check GPU compatibility with bfloat16\n",
1928
        "if compute_dtype == torch.float16 and use_4bit:\n",
1929
        "    major, _ = torch.cuda.get_device_capability()\n",
1930
        "    if major >= 8:\n",
1931
        "        print(\"=\" * 80)\n",
1932
        "        print(\"Your GPU supports bfloat16: accelerate training with bf16=True\")\n",
1933
        "        print(\"=\" * 80)\n",
1934
        "\n",
1935
        "# Load base model\n",
1936
        "model = AutoModelForCausalLM.from_pretrained(\n",
1937
        "    model_name,\n",
1938
        "    quantization_config=bnb_config,\n",
1939
        "    device_map=device_map\n",
1940
        ")\n",
1941
        "model.config.use_cache = False\n",
1942
        "model.config.pretraining_tp = 1\n",
1943
        "\n",
1944
        "# Load LLaMA tokenizer\n",
1945
        "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
1946
        "tokenizer.pad_token = tokenizer.eos_token\n",
1947
        "tokenizer.padding_side = \"right\" # Fix weird overflow issue with fp16 training\n",
1948
        "\n",
1949
        "# Load LoRA configuration\n",
1950
        "peft_config = LoraConfig(\n",
1951
        "    lora_alpha=lora_alpha,\n",
1952
        "    lora_dropout=lora_dropout,\n",
1953
        "    r=lora_r,\n",
1954
        "    bias=\"none\",\n",
1955
        "    task_type=\"CAUSAL_LM\",\n",
1956
        ")\n",
1957
        "\n",
1958
        "# Set training parameters\n",
1959
        "training_arguments = TrainingArguments(\n",
1960
        "    output_dir=output_dir,\n",
1961
        "    num_train_epochs=num_train_epochs,\n",
1962
        "    per_device_train_batch_size=per_device_train_batch_size,\n",
1963
        "    gradient_accumulation_steps=gradient_accumulation_steps,\n",
1964
        "    optim=optim,\n",
1965
        "    save_steps=save_steps,\n",
1966
        "    logging_steps=logging_steps,\n",
1967
        "    learning_rate=learning_rate,\n",
1968
        "    weight_decay=weight_decay,\n",
1969
        "    fp16=fp16,\n",
1970
        "    bf16=bf16,\n",
1971
        "    max_grad_norm=max_grad_norm,\n",
1972
        "    max_steps=max_steps,\n",
1973
        "    warmup_ratio=warmup_ratio,\n",
1974
        "    group_by_length=group_by_length,\n",
1975
        "    lr_scheduler_type=lr_scheduler_type,\n",
1976
        "    report_to=\"tensorboard\"\n",
1977
        ")\n",
1978
        "\n",
1979
        "# Set supervised fine-tuning parameters\n",
1980
        "trainer = SFTTrainer(\n",
1981
        "    model=model,\n",
1982
        "    train_dataset=dataset,\n",
1983
        "    peft_config=peft_config,\n",
1984
        "    dataset_text_field=\"text\",\n",
1985
        "    max_seq_length=max_seq_length,\n",
1986
        "    tokenizer=tokenizer,\n",
1987
        "    args=training_arguments,\n",
1988
        "    packing=packing,\n",
1989
        ")\n",
1990
        "\n",
1991
        "# Train model\n",
1992
        "trainer.train()\n",
1993
        "\n",
1994
        "# Save trained model\n",
1995
        "trainer.model.save_pretrained(new_model)"
1996
      ],
1997
      "metadata": {
1998
        "id": "OJXpOgBFuSrc",
1999
        "colab": {
2000
          "base_uri": "https://localhost:8080/",
2001
          "height": 1000,
2002
          "referenced_widgets": [
2003
            "52c4bf7418f74bc79a8c12fe35901974",
2004
            "c5e609d111b34d408a53a4cd71bb43d5",
2005
            "0e0a20b5ed7a44e9834022e7eba2194d",
2006
            "b5627331e78e4eb28765ed20f32cf403",
2007
            "8084d4cb267f4a52b3d80ec34d291190",
2008
            "a8dcdf1f7ab64242acb057e8b54ebf79",
2009
            "1ca492fddbaa4ea7a3226649154e01fd",
2010
            "a8eda8bfe08e4152a80c63830138c96d",
2011
            "1f258eacd6d0472385d41523b65dea8b",
2012
            "228b1bcf604f454f8060a250b58008a1",
2013
            "90b281e9c5ed4e77ab93e5879d0b15a3"
2014
          ]
2015
        },
2016
        "outputId": "8d06ed40-ea32-4d85-8665-413bde069607"
2017
      },
2018
      "execution_count": null,
2019
      "outputs": [
2020
        {
2021
          "output_type": "display_data",
2022
          "data": {
2023
            "text/plain": [
2024
              "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
2025
            ],
2026
            "application/vnd.jupyter.widget-view+json": {
2027
              "version_major": 2,
2028
              "version_minor": 0,
2029
              "model_id": "52c4bf7418f74bc79a8c12fe35901974"
2030
            }
2031
          },
2032
          "metadata": {}
2033
        },
2034
        {
2035
          "output_type": "stream",
2036
          "name": "stderr",
2037
          "text": [
2038
            "/usr/local/lib/python3.10/dist-packages/peft/utils/other.py:102: FutureWarning: prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.\n",
2039
            "  warnings.warn(\n",
2040
            "/usr/local/lib/python3.10/dist-packages/trl/trainer/sft_trainer.py:159: UserWarning: You didn't pass a `max_seq_length` argument to the SFTTrainer, this will default to 1024\n",
2041
            "  warnings.warn(\n",
2042
            "You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
2043
          ]
2044
        },
2045
        {
2046
          "output_type": "display_data",
2047
          "data": {
2048
            "text/plain": [
2049
              "<IPython.core.display.HTML object>"
2050
            ],
2051
            "text/html": [
2052
              "\n",
2053
              "    <div>\n",
2054
              "      \n",
2055
              "      <progress value='250' max='250' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
2056
              "      [250/250 24:05, Epoch 1/1]\n",
2057
              "    </div>\n",
2058
              "    <table border=\"1\" class=\"dataframe\">\n",
2059
              "  <thead>\n",
2060
              " <tr style=\"text-align: left;\">\n",
2061
              "      <th>Step</th>\n",
2062
              "      <th>Training Loss</th>\n",
2063
              "    </tr>\n",
2064
              "  </thead>\n",
2065
              "  <tbody>\n",
2066
              "    <tr>\n",
2067
              "      <td>1</td>\n",
2068
              "      <td>1.350100</td>\n",
2069
              "    </tr>\n",
2070
              "    <tr>\n",
2071
              "      <td>2</td>\n",
2072
              "      <td>2.015800</td>\n",
2073
              "    </tr>\n",
2074
              "    <tr>\n",
2075
              "      <td>3</td>\n",
2076
              "      <td>1.048700</td>\n",
2077
              "    </tr>\n",
2078
              "    <tr>\n",
2079
              "      <td>4</td>\n",
2080
              "      <td>1.287700</td>\n",
2081
              "    </tr>\n",
2082
              "    <tr>\n",
2083
              "      <td>5</td>\n",
2084
              "      <td>1.451200</td>\n",
2085
              "    </tr>\n",
2086
              "    <tr>\n",
2087
              "      <td>6</td>\n",
2088
              "      <td>1.659900</td>\n",
2089
              "    </tr>\n",
2090
              "    <tr>\n",
2091
              "      <td>7</td>\n",
2092
              "      <td>1.472300</td>\n",
2093
              "    </tr>\n",
2094
              "    <tr>\n",
2095
              "      <td>8</td>\n",
2096
              "      <td>1.326700</td>\n",
2097
              "    </tr>\n",
2098
              "    <tr>\n",
2099
              "      <td>9</td>\n",
2100
              "      <td>1.140000</td>\n",
2101
              "    </tr>\n",
2102
              "    <tr>\n",
2103
              "      <td>10</td>\n",
2104
              "      <td>1.395300</td>\n",
2105
              "    </tr>\n",
2106
              "    <tr>\n",
2107
              "      <td>11</td>\n",
2108
              "      <td>1.776400</td>\n",
2109
              "    </tr>\n",
2110
              "    <tr>\n",
2111
              "      <td>12</td>\n",
2112
              "      <td>1.169100</td>\n",
2113
              "    </tr>\n",
2114
              "    <tr>\n",
2115
              "      <td>13</td>\n",
2116
              "      <td>1.434700</td>\n",
2117
              "    </tr>\n",
2118
              "    <tr>\n",
2119
              "      <td>14</td>\n",
2120
              "      <td>1.550400</td>\n",
2121
              "    </tr>\n",
2122
              "    <tr>\n",
2123
              "      <td>15</td>\n",
2124
              "      <td>1.440400</td>\n",
2125
              "    </tr>\n",
2126
              "    <tr>\n",
2127
              "      <td>16</td>\n",
2128
              "      <td>1.352100</td>\n",
2129
              "    </tr>\n",
2130
              "    <tr>\n",
2131
              "      <td>17</td>\n",
2132
              "      <td>1.062800</td>\n",
2133
              "    </tr>\n",
2134
              "    <tr>\n",
2135
              "      <td>18</td>\n",
2136
              "      <td>1.173400</td>\n",
2137
              "    </tr>\n",
2138
              "    <tr>\n",
2139
              "      <td>19</td>\n",
2140
              "      <td>1.385300</td>\n",
2141
              "    </tr>\n",
2142
              "    <tr>\n",
2143
              "      <td>20</td>\n",
2144
              "      <td>1.433300</td>\n",
2145
              "    </tr>\n",
2146
              "    <tr>\n",
2147
              "      <td>21</td>\n",
2148
              "      <td>1.787800</td>\n",
2149
              "    </tr>\n",
2150
              "    <tr>\n",
2151
              "      <td>22</td>\n",
2152
              "      <td>1.600200</td>\n",
2153
              "    </tr>\n",
2154
              "    <tr>\n",
2155
              "      <td>23</td>\n",
2156
              "      <td>1.067800</td>\n",
2157
              "    </tr>\n",
2158
              "    <tr>\n",
2159
              "      <td>24</td>\n",
2160
              "      <td>1.679300</td>\n",
2161
              "    </tr>\n",
2162
              "    <tr>\n",
2163
              "      <td>25</td>\n",
2164
              "      <td>1.209900</td>\n",
2165
              "    </tr>\n",
2166
              "    <tr>\n",
2167
              "      <td>26</td>\n",
2168
              "      <td>1.305200</td>\n",
2169
              "    </tr>\n",
2170
              "    <tr>\n",
2171
              "      <td>27</td>\n",
2172
              "      <td>1.465300</td>\n",
2173
              "    </tr>\n",
2174
              "    <tr>\n",
2175
              "      <td>28</td>\n",
2176
              "      <td>1.781800</td>\n",
2177
              "    </tr>\n",
2178
              "    <tr>\n",
2179
              "      <td>29</td>\n",
2180
              "      <td>1.152400</td>\n",
2181
              "    </tr>\n",
2182
              "    <tr>\n",
2183
              "      <td>30</td>\n",
2184
              "      <td>1.434400</td>\n",
2185
              "    </tr>\n",
2186
              "    <tr>\n",
2187
              "      <td>31</td>\n",
2188
              "      <td>1.399300</td>\n",
2189
              "    </tr>\n",
2190
              "    <tr>\n",
2191
              "      <td>32</td>\n",
2192
              "      <td>1.796300</td>\n",
2193
              "    </tr>\n",
2194
              "    <tr>\n",
2195
              "      <td>33</td>\n",
2196
              "      <td>1.674500</td>\n",
2197
              "    </tr>\n",
2198
              "    <tr>\n",
2199
              "      <td>34</td>\n",
2200
              "      <td>1.567600</td>\n",
2201
              "    </tr>\n",
2202
              "    <tr>\n",
2203
              "      <td>35</td>\n",
2204
              "      <td>1.830000</td>\n",
2205
              "    </tr>\n",
2206
              "    <tr>\n",
2207
              "      <td>36</td>\n",
2208
              "      <td>1.720200</td>\n",
2209
              "    </tr>\n",
2210
              "    <tr>\n",
2211
              "      <td>37</td>\n",
2212
              "      <td>1.335800</td>\n",
2213
              "    </tr>\n",
2214
              "    <tr>\n",
2215
              "      <td>38</td>\n",
2216
              "      <td>1.333000</td>\n",
2217
              "    </tr>\n",
2218
              "    <tr>\n",
2219
              "      <td>39</td>\n",
2220
              "      <td>2.044900</td>\n",
2221
              "    </tr>\n",
2222
              "    <tr>\n",
2223
              "      <td>40</td>\n",
2224
              "      <td>1.832200</td>\n",
2225
              "    </tr>\n",
2226
              "    <tr>\n",
2227
              "      <td>41</td>\n",
2228
              "      <td>1.533900</td>\n",
2229
              "    </tr>\n",
2230
              "    <tr>\n",
2231
              "      <td>42</td>\n",
2232
              "      <td>1.259900</td>\n",
2233
              "    </tr>\n",
2234
              "    <tr>\n",
2235
              "      <td>43</td>\n",
2236
              "      <td>1.372300</td>\n",
2237
              "    </tr>\n",
2238
              "    <tr>\n",
2239
              "      <td>44</td>\n",
2240
              "      <td>1.551600</td>\n",
2241
              "    </tr>\n",
2242
              "    <tr>\n",
2243
              "      <td>45</td>\n",
2244
              "      <td>2.002400</td>\n",
2245
              "    </tr>\n",
2246
              "    <tr>\n",
2247
              "      <td>46</td>\n",
2248
              "      <td>1.956100</td>\n",
2249
              "    </tr>\n",
2250
              "    <tr>\n",
2251
              "      <td>47</td>\n",
2252
              "      <td>2.441900</td>\n",
2253
              "    </tr>\n",
2254
              "    <tr>\n",
2255
              "      <td>48</td>\n",
2256
              "      <td>2.289100</td>\n",
2257
              "    </tr>\n",
2258
              "    <tr>\n",
2259
              "      <td>49</td>\n",
2260
              "      <td>1.544500</td>\n",
2261
              "    </tr>\n",
2262
              "    <tr>\n",
2263
              "      <td>50</td>\n",
2264
              "      <td>2.040300</td>\n",
2265
              "    </tr>\n",
2266
              "    <tr>\n",
2267
              "      <td>51</td>\n",
2268
              "      <td>1.103800</td>\n",
2269
              "    </tr>\n",
2270
              "    <tr>\n",
2271
              "      <td>52</td>\n",
2272
              "      <td>1.630800</td>\n",
2273
              "    </tr>\n",
2274
              "    <tr>\n",
2275
              "      <td>53</td>\n",
2276
              "      <td>1.437900</td>\n",
2277
              "    </tr>\n",
2278
              "    <tr>\n",
2279
              "      <td>54</td>\n",
2280
              "      <td>1.820900</td>\n",
2281
              "    </tr>\n",
2282
              "    <tr>\n",
2283
              "      <td>55</td>\n",
2284
              "      <td>1.080300</td>\n",
2285
              "    </tr>\n",
2286
              "    <tr>\n",
2287
              "      <td>56</td>\n",
2288
              "      <td>1.029200</td>\n",
2289
              "    </tr>\n",
2290
              "    <tr>\n",
2291
              "      <td>57</td>\n",
2292
              "      <td>0.999400</td>\n",
2293
              "    </tr>\n",
2294
              "    <tr>\n",
2295
              "      <td>58</td>\n",
2296
              "      <td>0.795900</td>\n",
2297
              "    </tr>\n",
2298
              "    <tr>\n",
2299
              "      <td>59</td>\n",
2300
              "      <td>1.331600</td>\n",
2301
              "    </tr>\n",
2302
              "    <tr>\n",
2303
              "      <td>60</td>\n",
2304
              "      <td>1.099500</td>\n",
2305
              "    </tr>\n",
2306
              "    <tr>\n",
2307
              "      <td>61</td>\n",
2308
              "      <td>1.199000</td>\n",
2309
              "    </tr>\n",
2310
              "    <tr>\n",
2311
              "      <td>62</td>\n",
2312
              "      <td>1.146000</td>\n",
2313
              "    </tr>\n",
2314
              "    <tr>\n",
2315
              "      <td>63</td>\n",
2316
              "      <td>1.129000</td>\n",
2317
              "    </tr>\n",
2318
              "    <tr>\n",
2319
              "      <td>64</td>\n",
2320
              "      <td>1.109500</td>\n",
2321
              "    </tr>\n",
2322
              "    <tr>\n",
2323
              "      <td>65</td>\n",
2324
              "      <td>1.207000</td>\n",
2325
              "    </tr>\n",
2326
              "    <tr>\n",
2327
              "      <td>66</td>\n",
2328
              "      <td>1.360600</td>\n",
2329
              "    </tr>\n",
2330
              "    <tr>\n",
2331
              "      <td>67</td>\n",
2332
              "      <td>1.879000</td>\n",
2333
              "    </tr>\n",
2334
              "    <tr>\n",
2335
              "      <td>68</td>\n",
2336
              "      <td>1.317200</td>\n",
2337
              "    </tr>\n",
2338
              "    <tr>\n",
2339
              "      <td>69</td>\n",
2340
              "      <td>1.033300</td>\n",
2341
              "    </tr>\n",
2342
              "    <tr>\n",
2343
              "      <td>70</td>\n",
2344
              "      <td>1.153400</td>\n",
2345
              "    </tr>\n",
2346
              "    <tr>\n",
2347
              "      <td>71</td>\n",
2348
              "      <td>1.112400</td>\n",
2349
              "    </tr>\n",
2350
              "    <tr>\n",
2351
              "      <td>72</td>\n",
2352
              "      <td>1.218400</td>\n",
2353
              "    </tr>\n",
2354
              "    <tr>\n",
2355
              "      <td>73</td>\n",
2356
              "      <td>1.134600</td>\n",
2357
              "    </tr>\n",
2358
              "    <tr>\n",
2359
              "      <td>74</td>\n",
2360
              "      <td>1.053200</td>\n",
2361
              "    </tr>\n",
2362
              "    <tr>\n",
2363
              "      <td>75</td>\n",
2364
              "      <td>1.008900</td>\n",
2365
              "    </tr>\n",
2366
              "    <tr>\n",
2367
              "      <td>76</td>\n",
2368
              "      <td>1.077000</td>\n",
2369
              "    </tr>\n",
2370
              "    <tr>\n",
2371
              "      <td>77</td>\n",
2372
              "      <td>1.245000</td>\n",
2373
              "    </tr>\n",
2374
              "    <tr>\n",
2375
              "      <td>78</td>\n",
2376
              "      <td>1.395900</td>\n",
2377
              "    </tr>\n",
2378
              "    <tr>\n",
2379
              "      <td>79</td>\n",
2380
              "      <td>1.488800</td>\n",
2381
              "    </tr>\n",
2382
              "    <tr>\n",
2383
              "      <td>80</td>\n",
2384
              "      <td>1.382500</td>\n",
2385
              "    </tr>\n",
2386
              "    <tr>\n",
2387
              "      <td>81</td>\n",
2388
              "      <td>1.442200</td>\n",
2389
              "    </tr>\n",
2390
              "    <tr>\n",
2391
              "      <td>82</td>\n",
2392
              "      <td>1.028500</td>\n",
2393
              "    </tr>\n",
2394
              "    <tr>\n",
2395
              "      <td>83</td>\n",
2396
              "      <td>1.208500</td>\n",
2397
              "    </tr>\n",
2398
              "    <tr>\n",
2399
              "      <td>84</td>\n",
2400
              "      <td>1.780200</td>\n",
2401
              "    </tr>\n",
2402
              "    <tr>\n",
2403
              "      <td>85</td>\n",
2404
              "      <td>1.679300</td>\n",
2405
              "    </tr>\n",
2406
              "    <tr>\n",
2407
              "      <td>86</td>\n",
2408
              "      <td>1.276600</td>\n",
2409
              "    </tr>\n",
2410
              "    <tr>\n",
2411
              "      <td>87</td>\n",
2412
              "      <td>1.374600</td>\n",
2413
              "    </tr>\n",
2414
              "    <tr>\n",
2415
              "      <td>88</td>\n",
2416
              "      <td>1.490000</td>\n",
2417
              "    </tr>\n",
2418
              "    <tr>\n",
2419
              "      <td>89</td>\n",
2420
              "      <td>1.567100</td>\n",
2421
              "    </tr>\n",
2422
              "    <tr>\n",
2423
              "      <td>90</td>\n",
2424
              "      <td>1.435000</td>\n",
2425
              "    </tr>\n",
2426
              "    <tr>\n",
2427
              "      <td>91</td>\n",
2428
              "      <td>1.329800</td>\n",
2429
              "    </tr>\n",
2430
              "    <tr>\n",
2431
              "      <td>92</td>\n",
2432
              "      <td>1.387600</td>\n",
2433
              "    </tr>\n",
2434
              "    <tr>\n",
2435
              "      <td>93</td>\n",
2436
              "      <td>0.971400</td>\n",
2437
              "    </tr>\n",
2438
              "    <tr>\n",
2439
              "      <td>94</td>\n",
2440
              "      <td>1.293800</td>\n",
2441
              "    </tr>\n",
2442
              "    <tr>\n",
2443
              "      <td>95</td>\n",
2444
              "      <td>1.585900</td>\n",
2445
              "    </tr>\n",
2446
              "    <tr>\n",
2447
              "      <td>96</td>\n",
2448
              "      <td>1.431700</td>\n",
2449
              "    </tr>\n",
2450
              "    <tr>\n",
2451
              "      <td>97</td>\n",
2452
              "      <td>1.948900</td>\n",
2453
              "    </tr>\n",
2454
              "    <tr>\n",
2455
              "      <td>98</td>\n",
2456
              "      <td>1.630500</td>\n",
2457
              "    </tr>\n",
2458
              "    <tr>\n",
2459
              "      <td>99</td>\n",
2460
              "      <td>1.839100</td>\n",
2461
              "    </tr>\n",
2462
              "    <tr>\n",
2463
              "      <td>100</td>\n",
2464
              "      <td>1.740900</td>\n",
2465
              "    </tr>\n",
2466
              "    <tr>\n",
2467
              "      <td>101</td>\n",
2468
              "      <td>0.717200</td>\n",
2469
              "    </tr>\n",
2470
              "    <tr>\n",
2471
              "      <td>102</td>\n",
2472
              "      <td>0.958100</td>\n",
2473
              "    </tr>\n",
2474
              "    <tr>\n",
2475
              "      <td>103</td>\n",
2476
              "      <td>1.625900</td>\n",
2477
              "    </tr>\n",
2478
              "    <tr>\n",
2479
              "      <td>104</td>\n",
2480
              "      <td>1.150000</td>\n",
2481
              "    </tr>\n",
2482
              "    <tr>\n",
2483
              "      <td>105</td>\n",
2484
              "      <td>0.999200</td>\n",
2485
              "    </tr>\n",
2486
              "    <tr>\n",
2487
              "      <td>106</td>\n",
2488
              "      <td>1.253100</td>\n",
2489
              "    </tr>\n",
2490
              "    <tr>\n",
2491
              "      <td>107</td>\n",
2492
              "      <td>1.007600</td>\n",
2493
              "    </tr>\n",
2494
              "    <tr>\n",
2495
              "      <td>108</td>\n",
2496
              "      <td>1.049700</td>\n",
2497
              "    </tr>\n",
2498
              "    <tr>\n",
2499
              "      <td>109</td>\n",
2500
              "      <td>1.265900</td>\n",
2501
              "    </tr>\n",
2502
              "    <tr>\n",
2503
              "      <td>110</td>\n",
2504
              "      <td>1.251300</td>\n",
2505
              "    </tr>\n",
2506
              "    <tr>\n",
2507
              "      <td>111</td>\n",
2508
              "      <td>1.109500</td>\n",
2509
              "    </tr>\n",
2510
              "    <tr>\n",
2511
              "      <td>112</td>\n",
2512
              "      <td>1.652500</td>\n",
2513
              "    </tr>\n",
2514
              "    <tr>\n",
2515
              "      <td>113</td>\n",
2516
              "      <td>1.238000</td>\n",
2517
              "    </tr>\n",
2518
              "    <tr>\n",
2519
              "      <td>114</td>\n",
2520
              "      <td>1.521300</td>\n",
2521
              "    </tr>\n",
2522
              "    <tr>\n",
2523
              "      <td>115</td>\n",
2524
              "      <td>1.002400</td>\n",
2525
              "    </tr>\n",
2526
              "    <tr>\n",
2527
              "      <td>116</td>\n",
2528
              "      <td>0.982400</td>\n",
2529
              "    </tr>\n",
2530
              "    <tr>\n",
2531
              "      <td>117</td>\n",
2532
              "      <td>1.389300</td>\n",
2533
              "    </tr>\n",
2534
              "    <tr>\n",
2535
              "      <td>118</td>\n",
2536
              "      <td>1.114900</td>\n",
2537
              "    </tr>\n",
2538
              "    <tr>\n",
2539
              "      <td>119</td>\n",
2540
              "      <td>1.093900</td>\n",
2541
              "    </tr>\n",
2542
              "    <tr>\n",
2543
              "      <td>120</td>\n",
2544
              "      <td>1.254200</td>\n",
2545
              "    </tr>\n",
2546
              "    <tr>\n",
2547
              "      <td>121</td>\n",
2548
              "      <td>1.132300</td>\n",
2549
              "    </tr>\n",
2550
              "    <tr>\n",
2551
              "      <td>122</td>\n",
2552
              "      <td>0.925300</td>\n",
2553
              "    </tr>\n",
2554
              "    <tr>\n",
2555
              "      <td>123</td>\n",
2556
              "      <td>1.292700</td>\n",
2557
              "    </tr>\n",
2558
              "    <tr>\n",
2559
              "      <td>124</td>\n",
2560
              "      <td>1.317600</td>\n",
2561
              "    </tr>\n",
2562
              "    <tr>\n",
2563
              "      <td>125</td>\n",
2564
              "      <td>1.080400</td>\n",
2565
              "    </tr>\n",
2566
              "    <tr>\n",
2567
              "      <td>126</td>\n",
2568
              "      <td>0.918800</td>\n",
2569
              "    </tr>\n",
2570
              "    <tr>\n",
2571
              "      <td>127</td>\n",
2572
              "      <td>1.203400</td>\n",
2573
              "    </tr>\n",
2574
              "    <tr>\n",
2575
              "      <td>128</td>\n",
2576
              "      <td>1.098800</td>\n",
2577
              "    </tr>\n",
2578
              "    <tr>\n",
2579
              "      <td>129</td>\n",
2580
              "      <td>1.360800</td>\n",
2581
              "    </tr>\n",
2582
              "    <tr>\n",
2583
              "      <td>130</td>\n",
2584
              "      <td>1.256900</td>\n",
2585
              "    </tr>\n",
2586
              "    <tr>\n",
2587
              "      <td>131</td>\n",
2588
              "      <td>1.392600</td>\n",
2589
              "    </tr>\n",
2590
              "    <tr>\n",
2591
              "      <td>132</td>\n",
2592
              "      <td>1.167600</td>\n",
2593
              "    </tr>\n",
2594
              "    <tr>\n",
2595
              "      <td>133</td>\n",
2596
              "      <td>1.134900</td>\n",
2597
              "    </tr>\n",
2598
              "    <tr>\n",
2599
              "      <td>134</td>\n",
2600
              "      <td>1.423700</td>\n",
2601
              "    </tr>\n",
2602
              "    <tr>\n",
2603
              "      <td>135</td>\n",
2604
              "      <td>1.111200</td>\n",
2605
              "    </tr>\n",
2606
              "    <tr>\n",
2607
              "      <td>136</td>\n",
2608
              "      <td>1.081600</td>\n",
2609
              "    </tr>\n",
2610
              "    <tr>\n",
2611
              "      <td>137</td>\n",
2612
              "      <td>1.806000</td>\n",
2613
              "    </tr>\n",
2614
              "    <tr>\n",
2615
              "      <td>138</td>\n",
2616
              "      <td>1.238800</td>\n",
2617
              "    </tr>\n",
2618
              "    <tr>\n",
2619
              "      <td>139</td>\n",
2620
              "      <td>1.306800</td>\n",
2621
              "    </tr>\n",
2622
              "    <tr>\n",
2623
              "      <td>140</td>\n",
2624
              "      <td>1.421900</td>\n",
2625
              "    </tr>\n",
2626
              "    <tr>\n",
2627
              "      <td>141</td>\n",
2628
              "      <td>1.467300</td>\n",
2629
              "    </tr>\n",
2630
              "    <tr>\n",
2631
              "      <td>142</td>\n",
2632
              "      <td>1.245100</td>\n",
2633
              "    </tr>\n",
2634
              "    <tr>\n",
2635
              "      <td>143</td>\n",
2636
              "      <td>1.594200</td>\n",
2637
              "    </tr>\n",
2638
              "    <tr>\n",
2639
              "      <td>144</td>\n",
2640
              "      <td>1.426000</td>\n",
2641
              "    </tr>\n",
2642
              "    <tr>\n",
2643
              "      <td>145</td>\n",
2644
              "      <td>1.393800</td>\n",
2645
              "    </tr>\n",
2646
              "    <tr>\n",
2647
              "      <td>146</td>\n",
2648
              "      <td>1.894400</td>\n",
2649
              "    </tr>\n",
2650
              "    <tr>\n",
2651
              "      <td>147</td>\n",
2652
              "      <td>1.331200</td>\n",
2653
              "    </tr>\n",
2654
              "    <tr>\n",
2655
              "      <td>148</td>\n",
2656
              "      <td>1.519400</td>\n",
2657
              "    </tr>\n",
2658
              "    <tr>\n",
2659
              "      <td>149</td>\n",
2660
              "      <td>1.926300</td>\n",
2661
              "    </tr>\n",
2662
              "    <tr>\n",
2663
              "      <td>150</td>\n",
2664
              "      <td>1.293200</td>\n",
2665
              "    </tr>\n",
2666
              "    <tr>\n",
2667
              "      <td>151</td>\n",
2668
              "      <td>1.135100</td>\n",
2669
              "    </tr>\n",
2670
              "    <tr>\n",
2671
              "      <td>152</td>\n",
2672
              "      <td>1.066700</td>\n",
2673
              "    </tr>\n",
2674
              "    <tr>\n",
2675
              "      <td>153</td>\n",
2676
              "      <td>0.856900</td>\n",
2677
              "    </tr>\n",
2678
              "    <tr>\n",
2679
              "      <td>154</td>\n",
2680
              "      <td>1.021500</td>\n",
2681
              "    </tr>\n",
2682
              "    <tr>\n",
2683
              "      <td>155</td>\n",
2684
              "      <td>0.808800</td>\n",
2685
              "    </tr>\n",
2686
              "    <tr>\n",
2687
              "      <td>156</td>\n",
2688
              "      <td>0.936300</td>\n",
2689
              "    </tr>\n",
2690
              "    <tr>\n",
2691
              "      <td>157</td>\n",
2692
              "      <td>0.979700</td>\n",
2693
              "    </tr>\n",
2694
              "    <tr>\n",
2695
              "      <td>158</td>\n",
2696
              "      <td>1.100200</td>\n",
2697
              "    </tr>\n",
2698
              "    <tr>\n",
2699
              "      <td>159</td>\n",
2700
              "      <td>1.091400</td>\n",
2701
              "    </tr>\n",
2702
              "    <tr>\n",
2703
              "      <td>160</td>\n",
2704
              "      <td>0.918800</td>\n",
2705
              "    </tr>\n",
2706
              "    <tr>\n",
2707
              "      <td>161</td>\n",
2708
              "      <td>1.370800</td>\n",
2709
              "    </tr>\n",
2710
              "    <tr>\n",
2711
              "      <td>162</td>\n",
2712
              "      <td>1.380300</td>\n",
2713
              "    </tr>\n",
2714
              "    <tr>\n",
2715
              "      <td>163</td>\n",
2716
              "      <td>0.965300</td>\n",
2717
              "    </tr>\n",
2718
              "    <tr>\n",
2719
              "      <td>164</td>\n",
2720
              "      <td>1.142400</td>\n",
2721
              "    </tr>\n",
2722
              "    <tr>\n",
2723
              "      <td>165</td>\n",
2724
              "      <td>1.436400</td>\n",
2725
              "    </tr>\n",
2726
              "    <tr>\n",
2727
              "      <td>166</td>\n",
2728
              "      <td>0.970400</td>\n",
2729
              "    </tr>\n",
2730
              "    <tr>\n",
2731
              "      <td>167</td>\n",
2732
              "      <td>0.872600</td>\n",
2733
              "    </tr>\n",
2734
              "    <tr>\n",
2735
              "      <td>168</td>\n",
2736
              "      <td>1.662500</td>\n",
2737
              "    </tr>\n",
2738
              "    <tr>\n",
2739
              "      <td>169</td>\n",
2740
              "      <td>1.623500</td>\n",
2741
              "    </tr>\n",
2742
              "    <tr>\n",
2743
              "      <td>170</td>\n",
2744
              "      <td>1.481700</td>\n",
2745
              "    </tr>\n",
2746
              "    <tr>\n",
2747
              "      <td>171</td>\n",
2748
              "      <td>0.822300</td>\n",
2749
              "    </tr>\n",
2750
              "    <tr>\n",
2751
              "      <td>172</td>\n",
2752
              "      <td>1.605500</td>\n",
2753
              "    </tr>\n",
2754
              "    <tr>\n",
2755
              "      <td>173</td>\n",
2756
              "      <td>1.769800</td>\n",
2757
              "    </tr>\n",
2758
              "    <tr>\n",
2759
              "      <td>174</td>\n",
2760
              "      <td>1.320100</td>\n",
2761
              "    </tr>\n",
2762
              "    <tr>\n",
2763
              "      <td>175</td>\n",
2764
              "      <td>0.969300</td>\n",
2765
              "    </tr>\n",
2766
              "    <tr>\n",
2767
              "      <td>176</td>\n",
2768
              "      <td>0.798700</td>\n",
2769
              "    </tr>\n",
2770
              "    <tr>\n",
2771
              "      <td>177</td>\n",
2772
              "      <td>1.233200</td>\n",
2773
              "    </tr>\n",
2774
              "    <tr>\n",
2775
              "      <td>178</td>\n",
2776
              "      <td>1.168500</td>\n",
2777
              "    </tr>\n",
2778
              "    <tr>\n",
2779
              "      <td>179</td>\n",
2780
              "      <td>1.251400</td>\n",
2781
              "    </tr>\n",
2782
              "    <tr>\n",
2783
              "      <td>180</td>\n",
2784
              "      <td>1.221500</td>\n",
2785
              "    </tr>\n",
2786
              "    <tr>\n",
2787
              "      <td>181</td>\n",
2788
              "      <td>1.491100</td>\n",
2789
              "    </tr>\n",
2790
              "    <tr>\n",
2791
              "      <td>182</td>\n",
2792
              "      <td>1.010200</td>\n",
2793
              "    </tr>\n",
2794
              "    <tr>\n",
2795
              "      <td>183</td>\n",
2796
              "      <td>1.375500</td>\n",
2797
              "    </tr>\n",
2798
              "    <tr>\n",
2799
              "      <td>184</td>\n",
2800
              "      <td>1.722900</td>\n",
2801
              "    </tr>\n",
2802
              "    <tr>\n",
2803
              "      <td>185</td>\n",
2804
              "      <td>1.179300</td>\n",
2805
              "    </tr>\n",
2806
              "    <tr>\n",
2807
              "      <td>186</td>\n",
2808
              "      <td>1.474400</td>\n",
2809
              "    </tr>\n",
2810
              "    <tr>\n",
2811
              "      <td>187</td>\n",
2812
              "      <td>1.968200</td>\n",
2813
              "    </tr>\n",
2814
              "    <tr>\n",
2815
              "      <td>188</td>\n",
2816
              "      <td>1.297200</td>\n",
2817
              "    </tr>\n",
2818
              "    <tr>\n",
2819
              "      <td>189</td>\n",
2820
              "      <td>1.564500</td>\n",
2821
              "    </tr>\n",
2822
              "    <tr>\n",
2823
              "      <td>190</td>\n",
2824
              "      <td>1.480700</td>\n",
2825
              "    </tr>\n",
2826
              "    <tr>\n",
2827
              "      <td>191</td>\n",
2828
              "      <td>1.464700</td>\n",
2829
              "    </tr>\n",
2830
              "    <tr>\n",
2831
              "      <td>192</td>\n",
2832
              "      <td>1.901400</td>\n",
2833
              "    </tr>\n",
2834
              "    <tr>\n",
2835
              "      <td>193</td>\n",
2836
              "      <td>1.620100</td>\n",
2837
              "    </tr>\n",
2838
              "    <tr>\n",
2839
              "      <td>194</td>\n",
2840
              "      <td>1.509000</td>\n",
2841
              "    </tr>\n",
2842
              "    <tr>\n",
2843
              "      <td>195</td>\n",
2844
              "      <td>1.587000</td>\n",
2845
              "    </tr>\n",
2846
              "    <tr>\n",
2847
              "      <td>196</td>\n",
2848
              "      <td>1.510000</td>\n",
2849
              "    </tr>\n",
2850
              "    <tr>\n",
2851
              "      <td>197</td>\n",
2852
              "      <td>1.773900</td>\n",
2853
              "    </tr>\n",
2854
              "    <tr>\n",
2855
              "      <td>198</td>\n",
2856
              "      <td>1.473200</td>\n",
2857
              "    </tr>\n",
2858
              "    <tr>\n",
2859
              "      <td>199</td>\n",
2860
              "      <td>1.660400</td>\n",
2861
              "    </tr>\n",
2862
              "    <tr>\n",
2863
              "      <td>200</td>\n",
2864
              "      <td>1.832600</td>\n",
2865
              "    </tr>\n",
2866
              "    <tr>\n",
2867
              "      <td>201</td>\n",
2868
              "      <td>1.021400</td>\n",
2869
              "    </tr>\n",
2870
              "    <tr>\n",
2871
              "      <td>202</td>\n",
2872
              "      <td>1.120400</td>\n",
2873
              "    </tr>\n",
2874
              "    <tr>\n",
2875
              "      <td>203</td>\n",
2876
              "      <td>1.030200</td>\n",
2877
              "    </tr>\n",
2878
              "    <tr>\n",
2879
              "      <td>204</td>\n",
2880
              "      <td>1.167500</td>\n",
2881
              "    </tr>\n",
2882
              "    <tr>\n",
2883
              "      <td>205</td>\n",
2884
              "      <td>0.853200</td>\n",
2885
              "    </tr>\n",
2886
              "    <tr>\n",
2887
              "      <td>206</td>\n",
2888
              "      <td>0.927000</td>\n",
2889
              "    </tr>\n",
2890
              "    <tr>\n",
2891
              "      <td>207</td>\n",
2892
              "      <td>1.157400</td>\n",
2893
              "    </tr>\n",
2894
              "    <tr>\n",
2895
              "      <td>208</td>\n",
2896
              "      <td>1.071600</td>\n",
2897
              "    </tr>\n",
2898
              "    <tr>\n",
2899
              "      <td>209</td>\n",
2900
              "      <td>1.195400</td>\n",
2901
              "    </tr>\n",
2902
              "    <tr>\n",
2903
              "      <td>210</td>\n",
2904
              "      <td>1.155800</td>\n",
2905
              "    </tr>\n",
2906
              "    <tr>\n",
2907
              "      <td>211</td>\n",
2908
              "      <td>1.502300</td>\n",
2909
              "    </tr>\n",
2910
              "    <tr>\n",
2911
              "      <td>212</td>\n",
2912
              "      <td>1.091600</td>\n",
2913
              "    </tr>\n",
2914
              "    <tr>\n",
2915
              "      <td>213</td>\n",
2916
              "      <td>1.225200</td>\n",
2917
              "    </tr>\n",
2918
              "    <tr>\n",
2919
              "      <td>214</td>\n",
2920
              "      <td>1.148900</td>\n",
2921
              "    </tr>\n",
2922
              "    <tr>\n",
2923
              "      <td>215</td>\n",
2924
              "      <td>1.238200</td>\n",
2925
              "    </tr>\n",
2926
              "    <tr>\n",
2927
              "      <td>216</td>\n",
2928
              "      <td>1.600200</td>\n",
2929
              "    </tr>\n",
2930
              "    <tr>\n",
2931
              "      <td>217</td>\n",
2932
              "      <td>1.203600</td>\n",
2933
              "    </tr>\n",
2934
              "    <tr>\n",
2935
              "      <td>218</td>\n",
2936
              "      <td>1.266200</td>\n",
2937
              "    </tr>\n",
2938
              "    <tr>\n",
2939
              "      <td>219</td>\n",
2940
              "      <td>0.970900</td>\n",
2941
              "    </tr>\n",
2942
              "    <tr>\n",
2943
              "      <td>220</td>\n",
2944
              "      <td>1.451000</td>\n",
2945
              "    </tr>\n",
2946
              "    <tr>\n",
2947
              "      <td>221</td>\n",
2948
              "      <td>1.281300</td>\n",
2949
              "    </tr>\n",
2950
              "    <tr>\n",
2951
              "      <td>222</td>\n",
2952
              "      <td>0.952500</td>\n",
2953
              "    </tr>\n",
2954
              "    <tr>\n",
2955
              "      <td>223</td>\n",
2956
              "      <td>1.313800</td>\n",
2957
              "    </tr>\n",
2958
              "    <tr>\n",
2959
              "      <td>224</td>\n",
2960
              "      <td>0.915700</td>\n",
2961
              "    </tr>\n",
2962
              "    <tr>\n",
2963
              "      <td>225</td>\n",
2964
              "      <td>1.040000</td>\n",
2965
              "    </tr>\n",
2966
              "    <tr>\n",
2967
              "      <td>226</td>\n",
2968
              "      <td>1.493800</td>\n",
2969
              "    </tr>\n",
2970
              "    <tr>\n",
2971
              "      <td>227</td>\n",
2972
              "      <td>1.186400</td>\n",
2973
              "    </tr>\n",
2974
              "    <tr>\n",
2975
              "      <td>228</td>\n",
2976
              "      <td>1.278700</td>\n",
2977
              "    </tr>\n",
2978
              "    <tr>\n",
2979
              "      <td>229</td>\n",
2980
              "      <td>1.061100</td>\n",
2981
              "    </tr>\n",
2982
              "    <tr>\n",
2983
              "      <td>230</td>\n",
2984
              "      <td>1.209000</td>\n",
2985
              "    </tr>\n",
2986
              "    <tr>\n",
2987
              "      <td>231</td>\n",
2988
              "      <td>0.881400</td>\n",
2989
              "    </tr>\n",
2990
              "    <tr>\n",
2991
              "      <td>232</td>\n",
2992
              "      <td>1.659300</td>\n",
2993
              "    </tr>\n",
2994
              "    <tr>\n",
2995
              "      <td>233</td>\n",
2996
              "      <td>1.135200</td>\n",
2997
              "    </tr>\n",
2998
              "    <tr>\n",
2999
              "      <td>234</td>\n",
3000
              "      <td>1.497800</td>\n",
3001
              "    </tr>\n",
3002
              "    <tr>\n",
3003
              "      <td>235</td>\n",
3004
              "      <td>1.557500</td>\n",
3005
              "    </tr>\n",
3006
              "    <tr>\n",
3007
              "      <td>236</td>\n",
3008
              "      <td>0.849200</td>\n",
3009
              "    </tr>\n",
3010
              "    <tr>\n",
3011
              "      <td>237</td>\n",
3012
              "      <td>1.329200</td>\n",
3013
              "    </tr>\n",
3014
              "    <tr>\n",
3015
              "      <td>238</td>\n",
3016
              "      <td>1.147700</td>\n",
3017
              "    </tr>\n",
3018
              "    <tr>\n",
3019
              "      <td>239</td>\n",
3020
              "      <td>1.764600</td>\n",
3021
              "    </tr>\n",
3022
              "    <tr>\n",
3023
              "      <td>240</td>\n",
3024
              "      <td>1.740000</td>\n",
3025
              "    </tr>\n",
3026
              "    <tr>\n",
3027
              "      <td>241</td>\n",
3028
              "      <td>2.043700</td>\n",
3029
              "    </tr>\n",
3030
              "    <tr>\n",
3031
              "      <td>242</td>\n",
3032
              "      <td>1.675000</td>\n",
3033
              "    </tr>\n",
3034
              "    <tr>\n",
3035
              "      <td>243</td>\n",
3036
              "      <td>1.809600</td>\n",
3037
              "    </tr>\n",
3038
              "    <tr>\n",
3039
              "      <td>244</td>\n",
3040
              "      <td>1.721400</td>\n",
3041
              "    </tr>\n",
3042
              "    <tr>\n",
3043
              "      <td>245</td>\n",
3044
              "      <td>2.343300</td>\n",
3045
              "    </tr>\n",
3046
              "    <tr>\n",
3047
              "      <td>246</td>\n",
3048
              "      <td>1.830400</td>\n",
3049
              "    </tr>\n",
3050
              "    <tr>\n",
3051
              "      <td>247</td>\n",
3052
              "      <td>1.754400</td>\n",
3053
              "    </tr>\n",
3054
              "    <tr>\n",
3055
              "      <td>248</td>\n",
3056
              "      <td>1.741900</td>\n",
3057
              "    </tr>\n",
3058
              "    <tr>\n",
3059
              "      <td>249</td>\n",
3060
              "      <td>2.011000</td>\n",
3061
              "    </tr>\n",
3062
              "    <tr>\n",
3063
              "      <td>250</td>\n",
3064
              "      <td>1.741700</td>\n",
3065
              "    </tr>\n",
3066
              "  </tbody>\n",
3067
              "</table><p>"
3068
            ]
3069
          },
3070
          "metadata": {}
3071
        }
3072
      ]
3073
    },
3074
    {
3075
      "cell_type": "code",
3076
      "source": [
3077
        "# %load_ext tensorboard\n",
3078
        "# %tensorboard --logdir results/runs"
3079
      ],
3080
      "metadata": {
3081
        "id": "crj9svNe4hU5"
3082
      },
3083
      "execution_count": null,
3084
      "outputs": []
3085
    },
3086
    {
3087
      "cell_type": "code",
3088
      "source": [
3089
        "# Ignore warnings\n",
3090
        "logging.set_verbosity(logging.CRITICAL)\n",
3091
        "\n",
3092
        "# Run text generation pipeline with our next model\n",
3093
        "prompt = \"What is a large language model?\"\n",
3094
        "pipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\n",
3095
        "result = pipe(f\"<s>[INST] {prompt} [/INST]\")\n",
3096
        "print(result[0]['generated_text'])"
3097
      ],
3098
      "metadata": {
3099
        "id": "frlSLPin4IJ4",
3100
        "colab": {
3101
          "base_uri": "https://localhost:8080/"
3102
        },
3103
        "outputId": "e5bf6b3a-f20e-49f7-e0b7-36f71ca207c1"
3104
      },
3105
      "execution_count": null,
3106
      "outputs": [
3107
        {
3108
          "output_type": "stream",
3109
          "name": "stderr",
3110
          "text": [
3111
            "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1270: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation )\n",
3112
            "  warnings.warn(\n",
3113
            "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:31: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n",
3114
            "  warnings.warn(\"None of the inputs have requires_grad=True. Gradients will be None\")\n"
3115
          ]
3116
        },
3117
        {
3118
          "output_type": "stream",
3119
          "name": "stdout",
3120
          "text": [
3121
            "<s>[INST] What is a large language model? [/INST] A large language model is a type of artificial intelligence (AI) model that is trained on a large dataset of text to generate human-like language outputs. It is designed to be able to understand and generate text in a way that is similar to human language, and can be used for a wide range of applications such as chatbots, language translation, and text summarization.\n",
3122
            "\n",
3123
            "Large language models are typically trained using deep learning techniques, such as recurrent neural networks (RNNs) or transformer models, and are often based on pre-trained models such as BERT or RoBERTa. These models are trained on large datasets of text, such as books, articles, or websites, and are designed to learn the patterns and structures of language.\n",
3124
            "\n",
3125
            "Some examples of large language models include:\n",
3126
            "\n",
3127
            "* BERT (Bidirectional Encoder Representations from Transformers\n"
3128
          ]
3129
        }
3130
      ]
3131
    },
3132
    {
3133
      "cell_type": "code",
3134
      "source": [
3135
        "# Empty VRAM\n",
3136
        "del model\n",
3137
        "del pipe\n",
3138
        "del trainer\n",
3139
        "import gc\n",
3140
        "gc.collect()\n",
3141
        "gc.collect()"
3142
      ],
3143
      "metadata": {
3144
        "colab": {
3145
          "base_uri": "https://localhost:8080/"
3146
        },
3147
        "id": "mkQCviG0Zta-",
3148
        "outputId": "e7c4ab10-4039-4490-b7f0-6ea118bdd709"
3149
      },
3150
      "execution_count": null,
3151
      "outputs": [
3152
        {
3153
          "output_type": "execute_result",
3154
          "data": {
3155
            "text/plain": [
3156
              "19965"
3157
            ]
3158
          },
3159
          "metadata": {},
3160
          "execution_count": 7
3161
        }
3162
      ]
3163
    },
3164
    {
3165
      "cell_type": "code",
3166
      "source": [
3167
        "# Reload model in FP16 and merge it with LoRA weights\n",
3168
        "base_model = AutoModelForCausalLM.from_pretrained(\n",
3169
        "    model_name,\n",
3170
        "    low_cpu_mem_usage=True,\n",
3171
        "    return_dict=True,\n",
3172
        "    torch_dtype=torch.float16,\n",
3173
        "    device_map=device_map,\n",
3174
        ")\n",
3175
        "model = PeftModel.from_pretrained(base_model, new_model)\n",
3176
        "model = model.merge_and_unload()\n",
3177
        "\n",
3178
        "# Reload tokenizer to save it\n",
3179
        "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
3180
        "tokenizer.pad_token = tokenizer.eos_token\n",
3181
        "tokenizer.padding_side = \"right\""
3182
      ],
3183
      "metadata": {
3184
        "id": "QQn30cRtAZ-P",
3185
        "colab": {
3186
          "base_uri": "https://localhost:8080/",
3187
          "height": 49,
3188
          "referenced_widgets": [
3189
            "051d193cd87f47c1971fb87544e1e615",
3190
            "9d7247c119e642c5894f15ca6974ef3e",
3191
            "a79c22bb34ec4f698a00752b47a6f631",
3192
            "d95f3a3f26c6470d984542cdfd68bec1",
3193
            "343e11c62a59448eb43bbc0c31bf5f11",
3194
            "a153c96bd1fe4c48a41e9b9c7c00dd6e",
3195
            "84da055d24694320843e13ad37438792",
3196
            "e375632975904402baea46163e2eeca1",
3197
            "95501d0b5a22407288f008bf8cc69726",
3198
            "6aef866a6c474dfabb2140ded933c5aa",
3199
            "d66fa096d442423c9447cbfbdc1aad8d"
3200
          ]
3201
        },
3202
        "outputId": "1c5ef3c4-d107-4c43-9bd6-0ca72903db0e"
3203
      },
3204
      "execution_count": null,
3205
      "outputs": [
3206
        {
3207
          "output_type": "display_data",
3208
          "data": {
3209
            "text/plain": [
3210
              "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
3211
            ],
3212
            "application/vnd.jupyter.widget-view+json": {
3213
              "version_major": 2,
3214
              "version_minor": 0,
3215
              "model_id": "051d193cd87f47c1971fb87544e1e615"
3216
            }
3217
          },
3218
          "metadata": {}
3219
        }
3220
      ]
3221
    },
3222
    {
3223
      "cell_type": "code",
3224
      "source": [
3225
        "!huggingface-cli login\n",
3226
        "\n",
3227
        "model.push_to_hub(new_model, use_temp_dir=False)\n",
3228
        "tokenizer.push_to_hub(new_model, use_temp_dir=False)"
3229
      ],
3230
      "metadata": {
3231
        "id": "x-xPb-_qB0dz",
3232
        "colab": {
3233
          "base_uri": "https://localhost:8080/",
3234
          "height": 373,
3235
          "referenced_widgets": [
3236
            "c99aff4cfd664ae8a165a27bea0566c8",
3237
            "e4b64cab6b7b418c8a2575ee26839039",
3238
            "c3a4fedc73b3480089ef9d13381471ed",
3239
            "bf722f71c61b4285bcbbf32fd619b3a6",
3240
            "fd11a6148b704c5b9142c5e8de2d3b25",
3241
            "f0bcdaf940d14ad796fc7ac46c8e1e64",
3242
            "b6e821c974674f2290c354238d6c919c",
3243
            "eeba50e8242c4753bfc0ea48e03f9078",
3244
            "7a1f3340688d408092adade75f4baac4",
3245
            "8c887ca9b0eb44fdb8608bf36b5db5c5",
3246
            "e4698337e6b843afac706ab657ca6af9",
3247
            "1af01f1f1aac42b8bff46fe4df8a59ad",
3248
            "eee8731f316244eda5ff0765fd12bf85",
3249
            "f135278e410f4b708435bb80fb630bcf",
3250
            "2e6fc79bf5c149d6b0bc5c52e18debc7",
3251
            "a4b0debc025444a59abd6953b3512c0d",
3252
            "130120644beb48acbc038651459af43c",
3253
            "bf77e97593a349718bdb5fd9bfd28fe3",
3254
            "f7292741953e47699540ef8712fc0d8d",
3255
            "9434350b1b9c4060812feb9ecbf63278",
3256
            "b29647e268414329be56047e522e28b9",
3257
            "27bb18a199ca47108c7a61e9c443de36",
3258
            "33ebb868f3e846f6af1a1a2a8ad6a3cb",
3259
            "1f73f8b4d4da4e74adc135f2a2f6ee65",
3260
            "68da6e6e69c8419895bea2068760534e",
3261
            "6dc1a868e08c4c3b8315116d2c46573b",
3262
            "7a5d714c17374104bb6f5caaa5541c10",
3263
            "1b6c59a51359453c926bfcddb3d0f0ea",
3264
            "dac3669f18284161a58d52f26dffb761",
3265
            "a3511f489f6d47cc8d404ab6f367b29f",
3266
            "20670478612f4b1a8a5f23d71a2609a7",
3267
            "b463153ec04749e38540389efa2981f7",
3268
            "2bb3d36d248a48fba364f14d9e840306"
3269
          ]
3270
        },
3271
        "outputId": "6ed9166c-5f92-4375-eca5-dbb247c0e13a"
3272
      },
3273
      "execution_count": null,
3274
      "outputs": [
3275
        {
3276
          "output_type": "stream",
3277
          "name": "stdout",
3278
          "text": [
3279
            "\n",
3280
            "    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|\n",
3281
            "    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|\n",
3282
            "    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|\n",
3283
            "    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|\n",
3284
            "    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|\n",
3285
            "    \n",
3286
            "    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n",
3287
            "Token: \n",
3288
            "Add token as git credential? (Y/n) n\n",
3289
            "Token is valid (permission: write).\n",
3290
            "Your token has been saved to /root/.cache/huggingface/token\n",
3291
            "Login successful\n"
3292
          ]
3293
        },
3294
        {
3295
          "output_type": "display_data",
3296
          "data": {
3297
            "text/plain": [
3298
              "Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]"
3299
            ],
3300
            "application/vnd.jupyter.widget-view+json": {
3301
              "version_major": 2,
3302
              "version_minor": 0,
3303
              "model_id": "c99aff4cfd664ae8a165a27bea0566c8"
3304
            }
3305
          },
3306
          "metadata": {}
3307
        },
3308
        {
3309
          "output_type": "display_data",
3310
          "data": {
3311
            "text/plain": [
3312
              "pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]"
3313
            ],
3314
            "application/vnd.jupyter.widget-view+json": {
3315
              "version_major": 2,
3316
              "version_minor": 0,
3317
              "model_id": "1af01f1f1aac42b8bff46fe4df8a59ad"
3318
            }
3319
          },
3320
          "metadata": {}
3321
        },
3322
        {
3323
          "output_type": "display_data",
3324
          "data": {
3325
            "text/plain": [
3326
              "pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]"
3327
            ],
3328
            "application/vnd.jupyter.widget-view+json": {
3329
              "version_major": 2,
3330
              "version_minor": 0,
3331
              "model_id": "33ebb868f3e846f6af1a1a2a8ad6a3cb"
3332
            }
3333
          },
3334
          "metadata": {}
3335
        },
3336
        {
3337
          "output_type": "execute_result",
3338
          "data": {
3339
            "text/plain": [
3340
              "CommitInfo(commit_url='https://huggingface.co/mlabonne/llama-2-7b-miniguanaco/commit/c81a32fd0b4d39e252326e639d63e75aa68c9a4a', commit_message='Upload tokenizer', commit_description='', oid='c81a32fd0b4d39e252326e639d63e75aa68c9a4a', pr_url=None, pr_revision=None, pr_num=None)"
3341
            ]
3342
          },
3343
          "metadata": {},
3344
          "execution_count": 10
3345
        }
3346
      ]
3347
    }
3348
  ]
3349
}

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.