llm-course
/
Mergekit.ipynb
1770 строк · 62.3 Кб
1{
2"nbformat": 4,
3"nbformat_minor": 0,
4"metadata": {
5"colab": {
6"provenance": [],
7"machine_shape": "hm",
8"authorship_tag": "ABX9TyO11tndDoFEdL4+/FDgLad9",
9"include_colab_link": true
10},
11"kernelspec": {
12"name": "python3",
13"display_name": "Python 3"
14},
15"language_info": {
16"name": "python"
17},
18"widgets": {
19"application/vnd.jupyter.widget-state+json": {
20"de24d272f2b842c5b01eedb3f536b810": {
21"model_module": "@jupyter-widgets/controls",
22"model_name": "HBoxModel",
23"model_module_version": "1.5.0",
24"state": {
25"_dom_classes": [],
26"_model_module": "@jupyter-widgets/controls",
27"_model_module_version": "1.5.0",
28"_model_name": "HBoxModel",
29"_view_count": null,
30"_view_module": "@jupyter-widgets/controls",
31"_view_module_version": "1.5.0",
32"_view_name": "HBoxView",
33"box_style": "",
34"children": [
35"IPY_MODEL_0c5dab2657b2473385a424d90f3d4664",
36"IPY_MODEL_57efe36e546c473d8be34102f6ba9a58",
37"IPY_MODEL_871bad1d905d4877a9eaa242cfd54c4e"
38],
39"layout": "IPY_MODEL_8951f6b2edf64464869391197c900f84"
40}
41},
42"0c5dab2657b2473385a424d90f3d4664": {
43"model_module": "@jupyter-widgets/controls",
44"model_name": "HTMLModel",
45"model_module_version": "1.5.0",
46"state": {
47"_dom_classes": [],
48"_model_module": "@jupyter-widgets/controls",
49"_model_module_version": "1.5.0",
50"_model_name": "HTMLModel",
51"_view_count": null,
52"_view_module": "@jupyter-widgets/controls",
53"_view_module_version": "1.5.0",
54"_view_name": "HTMLView",
55"description": "",
56"description_tooltip": null,
57"layout": "IPY_MODEL_69a61ad28d5141dcbaea44060bc5ebf7",
58"placeholder": "",
59"style": "IPY_MODEL_76c2fbf005ae4a5790edfeb499b387b7",
60"value": "tokenizer.model: 100%"
61}
62},
63"57efe36e546c473d8be34102f6ba9a58": {
64"model_module": "@jupyter-widgets/controls",
65"model_name": "FloatProgressModel",
66"model_module_version": "1.5.0",
67"state": {
68"_dom_classes": [],
69"_model_module": "@jupyter-widgets/controls",
70"_model_module_version": "1.5.0",
71"_model_name": "FloatProgressModel",
72"_view_count": null,
73"_view_module": "@jupyter-widgets/controls",
74"_view_module_version": "1.5.0",
75"_view_name": "ProgressView",
76"bar_style": "success",
77"description": "",
78"description_tooltip": null,
79"layout": "IPY_MODEL_116964f328dc45d991d895d684ac1216",
80"max": 493443,
81"min": 0,
82"orientation": "horizontal",
83"style": "IPY_MODEL_1ecec5ba4424498082a5f64cf3d7faf8",
84"value": 493443
85}
86},
87"871bad1d905d4877a9eaa242cfd54c4e": {
88"model_module": "@jupyter-widgets/controls",
89"model_name": "HTMLModel",
90"model_module_version": "1.5.0",
91"state": {
92"_dom_classes": [],
93"_model_module": "@jupyter-widgets/controls",
94"_model_module_version": "1.5.0",
95"_model_name": "HTMLModel",
96"_view_count": null,
97"_view_module": "@jupyter-widgets/controls",
98"_view_module_version": "1.5.0",
99"_view_name": "HTMLView",
100"description": "",
101"description_tooltip": null,
102"layout": "IPY_MODEL_fc4edcef273b4e75894f4b512122de94",
103"placeholder": "",
104"style": "IPY_MODEL_ca2323b142f54998985d30481d5cfabe",
105"value": " 493k/493k [00:00<00:00, 42.2kB/s]"
106}
107},
108"8951f6b2edf64464869391197c900f84": {
109"model_module": "@jupyter-widgets/base",
110"model_name": "LayoutModel",
111"model_module_version": "1.2.0",
112"state": {
113"_model_module": "@jupyter-widgets/base",
114"_model_module_version": "1.2.0",
115"_model_name": "LayoutModel",
116"_view_count": null,
117"_view_module": "@jupyter-widgets/base",
118"_view_module_version": "1.2.0",
119"_view_name": "LayoutView",
120"align_content": null,
121"align_items": null,
122"align_self": null,
123"border": null,
124"bottom": null,
125"display": null,
126"flex": null,
127"flex_flow": null,
128"grid_area": null,
129"grid_auto_columns": null,
130"grid_auto_flow": null,
131"grid_auto_rows": null,
132"grid_column": null,
133"grid_gap": null,
134"grid_row": null,
135"grid_template_areas": null,
136"grid_template_columns": null,
137"grid_template_rows": null,
138"height": null,
139"justify_content": null,
140"justify_items": null,
141"left": null,
142"margin": null,
143"max_height": null,
144"max_width": null,
145"min_height": null,
146"min_width": null,
147"object_fit": null,
148"object_position": null,
149"order": null,
150"overflow": null,
151"overflow_x": null,
152"overflow_y": null,
153"padding": null,
154"right": null,
155"top": null,
156"visibility": null,
157"width": null
158}
159},
160"69a61ad28d5141dcbaea44060bc5ebf7": {
161"model_module": "@jupyter-widgets/base",
162"model_name": "LayoutModel",
163"model_module_version": "1.2.0",
164"state": {
165"_model_module": "@jupyter-widgets/base",
166"_model_module_version": "1.2.0",
167"_model_name": "LayoutModel",
168"_view_count": null,
169"_view_module": "@jupyter-widgets/base",
170"_view_module_version": "1.2.0",
171"_view_name": "LayoutView",
172"align_content": null,
173"align_items": null,
174"align_self": null,
175"border": null,
176"bottom": null,
177"display": null,
178"flex": null,
179"flex_flow": null,
180"grid_area": null,
181"grid_auto_columns": null,
182"grid_auto_flow": null,
183"grid_auto_rows": null,
184"grid_column": null,
185"grid_gap": null,
186"grid_row": null,
187"grid_template_areas": null,
188"grid_template_columns": null,
189"grid_template_rows": null,
190"height": null,
191"justify_content": null,
192"justify_items": null,
193"left": null,
194"margin": null,
195"max_height": null,
196"max_width": null,
197"min_height": null,
198"min_width": null,
199"object_fit": null,
200"object_position": null,
201"order": null,
202"overflow": null,
203"overflow_x": null,
204"overflow_y": null,
205"padding": null,
206"right": null,
207"top": null,
208"visibility": null,
209"width": null
210}
211},
212"76c2fbf005ae4a5790edfeb499b387b7": {
213"model_module": "@jupyter-widgets/controls",
214"model_name": "DescriptionStyleModel",
215"model_module_version": "1.5.0",
216"state": {
217"_model_module": "@jupyter-widgets/controls",
218"_model_module_version": "1.5.0",
219"_model_name": "DescriptionStyleModel",
220"_view_count": null,
221"_view_module": "@jupyter-widgets/base",
222"_view_module_version": "1.2.0",
223"_view_name": "StyleView",
224"description_width": ""
225}
226},
227"116964f328dc45d991d895d684ac1216": {
228"model_module": "@jupyter-widgets/base",
229"model_name": "LayoutModel",
230"model_module_version": "1.2.0",
231"state": {
232"_model_module": "@jupyter-widgets/base",
233"_model_module_version": "1.2.0",
234"_model_name": "LayoutModel",
235"_view_count": null,
236"_view_module": "@jupyter-widgets/base",
237"_view_module_version": "1.2.0",
238"_view_name": "LayoutView",
239"align_content": null,
240"align_items": null,
241"align_self": null,
242"border": null,
243"bottom": null,
244"display": null,
245"flex": null,
246"flex_flow": null,
247"grid_area": null,
248"grid_auto_columns": null,
249"grid_auto_flow": null,
250"grid_auto_rows": null,
251"grid_column": null,
252"grid_gap": null,
253"grid_row": null,
254"grid_template_areas": null,
255"grid_template_columns": null,
256"grid_template_rows": null,
257"height": null,
258"justify_content": null,
259"justify_items": null,
260"left": null,
261"margin": null,
262"max_height": null,
263"max_width": null,
264"min_height": null,
265"min_width": null,
266"object_fit": null,
267"object_position": null,
268"order": null,
269"overflow": null,
270"overflow_x": null,
271"overflow_y": null,
272"padding": null,
273"right": null,
274"top": null,
275"visibility": null,
276"width": null
277}
278},
279"1ecec5ba4424498082a5f64cf3d7faf8": {
280"model_module": "@jupyter-widgets/controls",
281"model_name": "ProgressStyleModel",
282"model_module_version": "1.5.0",
283"state": {
284"_model_module": "@jupyter-widgets/controls",
285"_model_module_version": "1.5.0",
286"_model_name": "ProgressStyleModel",
287"_view_count": null,
288"_view_module": "@jupyter-widgets/base",
289"_view_module_version": "1.2.0",
290"_view_name": "StyleView",
291"bar_color": null,
292"description_width": ""
293}
294},
295"fc4edcef273b4e75894f4b512122de94": {
296"model_module": "@jupyter-widgets/base",
297"model_name": "LayoutModel",
298"model_module_version": "1.2.0",
299"state": {
300"_model_module": "@jupyter-widgets/base",
301"_model_module_version": "1.2.0",
302"_model_name": "LayoutModel",
303"_view_count": null,
304"_view_module": "@jupyter-widgets/base",
305"_view_module_version": "1.2.0",
306"_view_name": "LayoutView",
307"align_content": null,
308"align_items": null,
309"align_self": null,
310"border": null,
311"bottom": null,
312"display": null,
313"flex": null,
314"flex_flow": null,
315"grid_area": null,
316"grid_auto_columns": null,
317"grid_auto_flow": null,
318"grid_auto_rows": null,
319"grid_column": null,
320"grid_gap": null,
321"grid_row": null,
322"grid_template_areas": null,
323"grid_template_columns": null,
324"grid_template_rows": null,
325"height": null,
326"justify_content": null,
327"justify_items": null,
328"left": null,
329"margin": null,
330"max_height": null,
331"max_width": null,
332"min_height": null,
333"min_width": null,
334"object_fit": null,
335"object_position": null,
336"order": null,
337"overflow": null,
338"overflow_x": null,
339"overflow_y": null,
340"padding": null,
341"right": null,
342"top": null,
343"visibility": null,
344"width": null
345}
346},
347"ca2323b142f54998985d30481d5cfabe": {
348"model_module": "@jupyter-widgets/controls",
349"model_name": "DescriptionStyleModel",
350"model_module_version": "1.5.0",
351"state": {
352"_model_module": "@jupyter-widgets/controls",
353"_model_module_version": "1.5.0",
354"_model_name": "DescriptionStyleModel",
355"_view_count": null,
356"_view_module": "@jupyter-widgets/base",
357"_view_module_version": "1.2.0",
358"_view_name": "StyleView",
359"description_width": ""
360}
361},
362"63626ac2d0f546188c07512a04c71417": {
363"model_module": "@jupyter-widgets/controls",
364"model_name": "HBoxModel",
365"model_module_version": "1.5.0",
366"state": {
367"_dom_classes": [],
368"_model_module": "@jupyter-widgets/controls",
369"_model_module_version": "1.5.0",
370"_model_name": "HBoxModel",
371"_view_count": null,
372"_view_module": "@jupyter-widgets/controls",
373"_view_module_version": "1.5.0",
374"_view_name": "HBoxView",
375"box_style": "",
376"children": [
377"IPY_MODEL_decd91747fd04ce39f3e2b733bc7f477",
378"IPY_MODEL_7140e4c154424fcab846a71889e99ed2",
379"IPY_MODEL_2264d8b75251425e94e635558af4e223"
380],
381"layout": "IPY_MODEL_c37478198217457cb30c6649203cf4dc"
382}
383},
384"decd91747fd04ce39f3e2b733bc7f477": {
385"model_module": "@jupyter-widgets/controls",
386"model_name": "HTMLModel",
387"model_module_version": "1.5.0",
388"state": {
389"_dom_classes": [],
390"_model_module": "@jupyter-widgets/controls",
391"_model_module_version": "1.5.0",
392"_model_name": "HTMLModel",
393"_view_count": null,
394"_view_module": "@jupyter-widgets/controls",
395"_view_module_version": "1.5.0",
396"_view_name": "HTMLView",
397"description": "",
398"description_tooltip": null,
399"layout": "IPY_MODEL_4918769e4e984dfda924776e2373154c",
400"placeholder": "",
401"style": "IPY_MODEL_9b48494c94cf49b5835489d97f7a24c5",
402"value": "model-00001-of-00002.safetensors: 100%"
403}
404},
405"7140e4c154424fcab846a71889e99ed2": {
406"model_module": "@jupyter-widgets/controls",
407"model_name": "FloatProgressModel",
408"model_module_version": "1.5.0",
409"state": {
410"_dom_classes": [],
411"_model_module": "@jupyter-widgets/controls",
412"_model_module_version": "1.5.0",
413"_model_name": "FloatProgressModel",
414"_view_count": null,
415"_view_module": "@jupyter-widgets/controls",
416"_view_module_version": "1.5.0",
417"_view_name": "ProgressView",
418"bar_style": "success",
419"description": "",
420"description_tooltip": null,
421"layout": "IPY_MODEL_6ed844da52fe466eb1c10c814489448c",
422"max": 9942990000,
423"min": 0,
424"orientation": "horizontal",
425"style": "IPY_MODEL_9c60efa02e80423e828628190dd13bc3",
426"value": 9942990000
427}
428},
429"2264d8b75251425e94e635558af4e223": {
430"model_module": "@jupyter-widgets/controls",
431"model_name": "HTMLModel",
432"model_module_version": "1.5.0",
433"state": {
434"_dom_classes": [],
435"_model_module": "@jupyter-widgets/controls",
436"_model_module_version": "1.5.0",
437"_model_name": "HTMLModel",
438"_view_count": null,
439"_view_module": "@jupyter-widgets/controls",
440"_view_module_version": "1.5.0",
441"_view_name": "HTMLView",
442"description": "",
443"description_tooltip": null,
444"layout": "IPY_MODEL_0170e8cc57d94041956f7afbf2eef449",
445"placeholder": "",
446"style": "IPY_MODEL_220c2ba5f2524271b24fe049431a474c",
447"value": " 9.94G/9.94G [04:04<00:00, 36.9MB/s]"
448}
449},
450"c37478198217457cb30c6649203cf4dc": {
451"model_module": "@jupyter-widgets/base",
452"model_name": "LayoutModel",
453"model_module_version": "1.2.0",
454"state": {
455"_model_module": "@jupyter-widgets/base",
456"_model_module_version": "1.2.0",
457"_model_name": "LayoutModel",
458"_view_count": null,
459"_view_module": "@jupyter-widgets/base",
460"_view_module_version": "1.2.0",
461"_view_name": "LayoutView",
462"align_content": null,
463"align_items": null,
464"align_self": null,
465"border": null,
466"bottom": null,
467"display": null,
468"flex": null,
469"flex_flow": null,
470"grid_area": null,
471"grid_auto_columns": null,
472"grid_auto_flow": null,
473"grid_auto_rows": null,
474"grid_column": null,
475"grid_gap": null,
476"grid_row": null,
477"grid_template_areas": null,
478"grid_template_columns": null,
479"grid_template_rows": null,
480"height": null,
481"justify_content": null,
482"justify_items": null,
483"left": null,
484"margin": null,
485"max_height": null,
486"max_width": null,
487"min_height": null,
488"min_width": null,
489"object_fit": null,
490"object_position": null,
491"order": null,
492"overflow": null,
493"overflow_x": null,
494"overflow_y": null,
495"padding": null,
496"right": null,
497"top": null,
498"visibility": null,
499"width": null
500}
501},
502"4918769e4e984dfda924776e2373154c": {
503"model_module": "@jupyter-widgets/base",
504"model_name": "LayoutModel",
505"model_module_version": "1.2.0",
506"state": {
507"_model_module": "@jupyter-widgets/base",
508"_model_module_version": "1.2.0",
509"_model_name": "LayoutModel",
510"_view_count": null,
511"_view_module": "@jupyter-widgets/base",
512"_view_module_version": "1.2.0",
513"_view_name": "LayoutView",
514"align_content": null,
515"align_items": null,
516"align_self": null,
517"border": null,
518"bottom": null,
519"display": null,
520"flex": null,
521"flex_flow": null,
522"grid_area": null,
523"grid_auto_columns": null,
524"grid_auto_flow": null,
525"grid_auto_rows": null,
526"grid_column": null,
527"grid_gap": null,
528"grid_row": null,
529"grid_template_areas": null,
530"grid_template_columns": null,
531"grid_template_rows": null,
532"height": null,
533"justify_content": null,
534"justify_items": null,
535"left": null,
536"margin": null,
537"max_height": null,
538"max_width": null,
539"min_height": null,
540"min_width": null,
541"object_fit": null,
542"object_position": null,
543"order": null,
544"overflow": null,
545"overflow_x": null,
546"overflow_y": null,
547"padding": null,
548"right": null,
549"top": null,
550"visibility": null,
551"width": null
552}
553},
554"9b48494c94cf49b5835489d97f7a24c5": {
555"model_module": "@jupyter-widgets/controls",
556"model_name": "DescriptionStyleModel",
557"model_module_version": "1.5.0",
558"state": {
559"_model_module": "@jupyter-widgets/controls",
560"_model_module_version": "1.5.0",
561"_model_name": "DescriptionStyleModel",
562"_view_count": null,
563"_view_module": "@jupyter-widgets/base",
564"_view_module_version": "1.2.0",
565"_view_name": "StyleView",
566"description_width": ""
567}
568},
569"6ed844da52fe466eb1c10c814489448c": {
570"model_module": "@jupyter-widgets/base",
571"model_name": "LayoutModel",
572"model_module_version": "1.2.0",
573"state": {
574"_model_module": "@jupyter-widgets/base",
575"_model_module_version": "1.2.0",
576"_model_name": "LayoutModel",
577"_view_count": null,
578"_view_module": "@jupyter-widgets/base",
579"_view_module_version": "1.2.0",
580"_view_name": "LayoutView",
581"align_content": null,
582"align_items": null,
583"align_self": null,
584"border": null,
585"bottom": null,
586"display": null,
587"flex": null,
588"flex_flow": null,
589"grid_area": null,
590"grid_auto_columns": null,
591"grid_auto_flow": null,
592"grid_auto_rows": null,
593"grid_column": null,
594"grid_gap": null,
595"grid_row": null,
596"grid_template_areas": null,
597"grid_template_columns": null,
598"grid_template_rows": null,
599"height": null,
600"justify_content": null,
601"justify_items": null,
602"left": null,
603"margin": null,
604"max_height": null,
605"max_width": null,
606"min_height": null,
607"min_width": null,
608"object_fit": null,
609"object_position": null,
610"order": null,
611"overflow": null,
612"overflow_x": null,
613"overflow_y": null,
614"padding": null,
615"right": null,
616"top": null,
617"visibility": null,
618"width": null
619}
620},
621"9c60efa02e80423e828628190dd13bc3": {
622"model_module": "@jupyter-widgets/controls",
623"model_name": "ProgressStyleModel",
624"model_module_version": "1.5.0",
625"state": {
626"_model_module": "@jupyter-widgets/controls",
627"_model_module_version": "1.5.0",
628"_model_name": "ProgressStyleModel",
629"_view_count": null,
630"_view_module": "@jupyter-widgets/base",
631"_view_module_version": "1.2.0",
632"_view_name": "StyleView",
633"bar_color": null,
634"description_width": ""
635}
636},
637"0170e8cc57d94041956f7afbf2eef449": {
638"model_module": "@jupyter-widgets/base",
639"model_name": "LayoutModel",
640"model_module_version": "1.2.0",
641"state": {
642"_model_module": "@jupyter-widgets/base",
643"_model_module_version": "1.2.0",
644"_model_name": "LayoutModel",
645"_view_count": null,
646"_view_module": "@jupyter-widgets/base",
647"_view_module_version": "1.2.0",
648"_view_name": "LayoutView",
649"align_content": null,
650"align_items": null,
651"align_self": null,
652"border": null,
653"bottom": null,
654"display": null,
655"flex": null,
656"flex_flow": null,
657"grid_area": null,
658"grid_auto_columns": null,
659"grid_auto_flow": null,
660"grid_auto_rows": null,
661"grid_column": null,
662"grid_gap": null,
663"grid_row": null,
664"grid_template_areas": null,
665"grid_template_columns": null,
666"grid_template_rows": null,
667"height": null,
668"justify_content": null,
669"justify_items": null,
670"left": null,
671"margin": null,
672"max_height": null,
673"max_width": null,
674"min_height": null,
675"min_width": null,
676"object_fit": null,
677"object_position": null,
678"order": null,
679"overflow": null,
680"overflow_x": null,
681"overflow_y": null,
682"padding": null,
683"right": null,
684"top": null,
685"visibility": null,
686"width": null
687}
688},
689"220c2ba5f2524271b24fe049431a474c": {
690"model_module": "@jupyter-widgets/controls",
691"model_name": "DescriptionStyleModel",
692"model_module_version": "1.5.0",
693"state": {
694"_model_module": "@jupyter-widgets/controls",
695"_model_module_version": "1.5.0",
696"_model_name": "DescriptionStyleModel",
697"_view_count": null,
698"_view_module": "@jupyter-widgets/base",
699"_view_module_version": "1.2.0",
700"_view_name": "StyleView",
701"description_width": ""
702}
703},
704"a6f99dd0662846f9a381d2d507a7b447": {
705"model_module": "@jupyter-widgets/controls",
706"model_name": "HBoxModel",
707"model_module_version": "1.5.0",
708"state": {
709"_dom_classes": [],
710"_model_module": "@jupyter-widgets/controls",
711"_model_module_version": "1.5.0",
712"_model_name": "HBoxModel",
713"_view_count": null,
714"_view_module": "@jupyter-widgets/controls",
715"_view_module_version": "1.5.0",
716"_view_name": "HBoxView",
717"box_style": "",
718"children": [
719"IPY_MODEL_900b9fcb70a84781bd5b4213df54626d",
720"IPY_MODEL_0ea83f270e164795b64f23b143efb300",
721"IPY_MODEL_318dcdeac8fb40f88fa60114f1c6a7c1"
722],
723"layout": "IPY_MODEL_af89cf715e0e4c5e9f59943a255394c1"
724}
725},
726"900b9fcb70a84781bd5b4213df54626d": {
727"model_module": "@jupyter-widgets/controls",
728"model_name": "HTMLModel",
729"model_module_version": "1.5.0",
730"state": {
731"_dom_classes": [],
732"_model_module": "@jupyter-widgets/controls",
733"_model_module_version": "1.5.0",
734"_model_name": "HTMLModel",
735"_view_count": null,
736"_view_module": "@jupyter-widgets/controls",
737"_view_module_version": "1.5.0",
738"_view_name": "HTMLView",
739"description": "",
740"description_tooltip": null,
741"layout": "IPY_MODEL_40e23e35299d45d499432b8f1a9bc924",
742"placeholder": "",
743"style": "IPY_MODEL_126b374e286747768ef7218454534640",
744"value": "Upload 3 LFS files: 100%"
745}
746},
747"0ea83f270e164795b64f23b143efb300": {
748"model_module": "@jupyter-widgets/controls",
749"model_name": "FloatProgressModel",
750"model_module_version": "1.5.0",
751"state": {
752"_dom_classes": [],
753"_model_module": "@jupyter-widgets/controls",
754"_model_module_version": "1.5.0",
755"_model_name": "FloatProgressModel",
756"_view_count": null,
757"_view_module": "@jupyter-widgets/controls",
758"_view_module_version": "1.5.0",
759"_view_name": "ProgressView",
760"bar_style": "success",
761"description": "",
762"description_tooltip": null,
763"layout": "IPY_MODEL_bdd26e54eed5477f99b135552e5f3450",
764"max": 3,
765"min": 0,
766"orientation": "horizontal",
767"style": "IPY_MODEL_163a6fd878134e1eb5f193d1ebfff1c1",
768"value": 3
769}
770},
771"318dcdeac8fb40f88fa60114f1c6a7c1": {
772"model_module": "@jupyter-widgets/controls",
773"model_name": "HTMLModel",
774"model_module_version": "1.5.0",
775"state": {
776"_dom_classes": [],
777"_model_module": "@jupyter-widgets/controls",
778"_model_module_version": "1.5.0",
779"_model_name": "HTMLModel",
780"_view_count": null,
781"_view_module": "@jupyter-widgets/controls",
782"_view_module_version": "1.5.0",
783"_view_name": "HTMLView",
784"description": "",
785"description_tooltip": null,
786"layout": "IPY_MODEL_953d7c014f76413c9805a2ef8c2c9356",
787"placeholder": "",
788"style": "IPY_MODEL_348879bf76d1471f9c79c1ec2dc07c1d",
789"value": " 3/3 [04:05<00:00, 245.46s/it]"
790}
791},
792"af89cf715e0e4c5e9f59943a255394c1": {
793"model_module": "@jupyter-widgets/base",
794"model_name": "LayoutModel",
795"model_module_version": "1.2.0",
796"state": {
797"_model_module": "@jupyter-widgets/base",
798"_model_module_version": "1.2.0",
799"_model_name": "LayoutModel",
800"_view_count": null,
801"_view_module": "@jupyter-widgets/base",
802"_view_module_version": "1.2.0",
803"_view_name": "LayoutView",
804"align_content": null,
805"align_items": null,
806"align_self": null,
807"border": null,
808"bottom": null,
809"display": null,
810"flex": null,
811"flex_flow": null,
812"grid_area": null,
813"grid_auto_columns": null,
814"grid_auto_flow": null,
815"grid_auto_rows": null,
816"grid_column": null,
817"grid_gap": null,
818"grid_row": null,
819"grid_template_areas": null,
820"grid_template_columns": null,
821"grid_template_rows": null,
822"height": null,
823"justify_content": null,
824"justify_items": null,
825"left": null,
826"margin": null,
827"max_height": null,
828"max_width": null,
829"min_height": null,
830"min_width": null,
831"object_fit": null,
832"object_position": null,
833"order": null,
834"overflow": null,
835"overflow_x": null,
836"overflow_y": null,
837"padding": null,
838"right": null,
839"top": null,
840"visibility": null,
841"width": null
842}
843},
844"40e23e35299d45d499432b8f1a9bc924": {
845"model_module": "@jupyter-widgets/base",
846"model_name": "LayoutModel",
847"model_module_version": "1.2.0",
848"state": {
849"_model_module": "@jupyter-widgets/base",
850"_model_module_version": "1.2.0",
851"_model_name": "LayoutModel",
852"_view_count": null,
853"_view_module": "@jupyter-widgets/base",
854"_view_module_version": "1.2.0",
855"_view_name": "LayoutView",
856"align_content": null,
857"align_items": null,
858"align_self": null,
859"border": null,
860"bottom": null,
861"display": null,
862"flex": null,
863"flex_flow": null,
864"grid_area": null,
865"grid_auto_columns": null,
866"grid_auto_flow": null,
867"grid_auto_rows": null,
868"grid_column": null,
869"grid_gap": null,
870"grid_row": null,
871"grid_template_areas": null,
872"grid_template_columns": null,
873"grid_template_rows": null,
874"height": null,
875"justify_content": null,
876"justify_items": null,
877"left": null,
878"margin": null,
879"max_height": null,
880"max_width": null,
881"min_height": null,
882"min_width": null,
883"object_fit": null,
884"object_position": null,
885"order": null,
886"overflow": null,
887"overflow_x": null,
888"overflow_y": null,
889"padding": null,
890"right": null,
891"top": null,
892"visibility": null,
893"width": null
894}
895},
896"126b374e286747768ef7218454534640": {
897"model_module": "@jupyter-widgets/controls",
898"model_name": "DescriptionStyleModel",
899"model_module_version": "1.5.0",
900"state": {
901"_model_module": "@jupyter-widgets/controls",
902"_model_module_version": "1.5.0",
903"_model_name": "DescriptionStyleModel",
904"_view_count": null,
905"_view_module": "@jupyter-widgets/base",
906"_view_module_version": "1.2.0",
907"_view_name": "StyleView",
908"description_width": ""
909}
910},
911"bdd26e54eed5477f99b135552e5f3450": {
912"model_module": "@jupyter-widgets/base",
913"model_name": "LayoutModel",
914"model_module_version": "1.2.0",
915"state": {
916"_model_module": "@jupyter-widgets/base",
917"_model_module_version": "1.2.0",
918"_model_name": "LayoutModel",
919"_view_count": null,
920"_view_module": "@jupyter-widgets/base",
921"_view_module_version": "1.2.0",
922"_view_name": "LayoutView",
923"align_content": null,
924"align_items": null,
925"align_self": null,
926"border": null,
927"bottom": null,
928"display": null,
929"flex": null,
930"flex_flow": null,
931"grid_area": null,
932"grid_auto_columns": null,
933"grid_auto_flow": null,
934"grid_auto_rows": null,
935"grid_column": null,
936"grid_gap": null,
937"grid_row": null,
938"grid_template_areas": null,
939"grid_template_columns": null,
940"grid_template_rows": null,
941"height": null,
942"justify_content": null,
943"justify_items": null,
944"left": null,
945"margin": null,
946"max_height": null,
947"max_width": null,
948"min_height": null,
949"min_width": null,
950"object_fit": null,
951"object_position": null,
952"order": null,
953"overflow": null,
954"overflow_x": null,
955"overflow_y": null,
956"padding": null,
957"right": null,
958"top": null,
959"visibility": null,
960"width": null
961}
962},
963"163a6fd878134e1eb5f193d1ebfff1c1": {
964"model_module": "@jupyter-widgets/controls",
965"model_name": "ProgressStyleModel",
966"model_module_version": "1.5.0",
967"state": {
968"_model_module": "@jupyter-widgets/controls",
969"_model_module_version": "1.5.0",
970"_model_name": "ProgressStyleModel",
971"_view_count": null,
972"_view_module": "@jupyter-widgets/base",
973"_view_module_version": "1.2.0",
974"_view_name": "StyleView",
975"bar_color": null,
976"description_width": ""
977}
978},
979"953d7c014f76413c9805a2ef8c2c9356": {
980"model_module": "@jupyter-widgets/base",
981"model_name": "LayoutModel",
982"model_module_version": "1.2.0",
983"state": {
984"_model_module": "@jupyter-widgets/base",
985"_model_module_version": "1.2.0",
986"_model_name": "LayoutModel",
987"_view_count": null,
988"_view_module": "@jupyter-widgets/base",
989"_view_module_version": "1.2.0",
990"_view_name": "LayoutView",
991"align_content": null,
992"align_items": null,
993"align_self": null,
994"border": null,
995"bottom": null,
996"display": null,
997"flex": null,
998"flex_flow": null,
999"grid_area": null,
1000"grid_auto_columns": null,
1001"grid_auto_flow": null,
1002"grid_auto_rows": null,
1003"grid_column": null,
1004"grid_gap": null,
1005"grid_row": null,
1006"grid_template_areas": null,
1007"grid_template_columns": null,
1008"grid_template_rows": null,
1009"height": null,
1010"justify_content": null,
1011"justify_items": null,
1012"left": null,
1013"margin": null,
1014"max_height": null,
1015"max_width": null,
1016"min_height": null,
1017"min_width": null,
1018"object_fit": null,
1019"object_position": null,
1020"order": null,
1021"overflow": null,
1022"overflow_x": null,
1023"overflow_y": null,
1024"padding": null,
1025"right": null,
1026"top": null,
1027"visibility": null,
1028"width": null
1029}
1030},
1031"348879bf76d1471f9c79c1ec2dc07c1d": {
1032"model_module": "@jupyter-widgets/controls",
1033"model_name": "DescriptionStyleModel",
1034"model_module_version": "1.5.0",
1035"state": {
1036"_model_module": "@jupyter-widgets/controls",
1037"_model_module_version": "1.5.0",
1038"_model_name": "DescriptionStyleModel",
1039"_view_count": null,
1040"_view_module": "@jupyter-widgets/base",
1041"_view_module_version": "1.2.0",
1042"_view_name": "StyleView",
1043"description_width": ""
1044}
1045},
1046"8d54ae0d028b40e7b018454187db1a1c": {
1047"model_module": "@jupyter-widgets/controls",
1048"model_name": "HBoxModel",
1049"model_module_version": "1.5.0",
1050"state": {
1051"_dom_classes": [],
1052"_model_module": "@jupyter-widgets/controls",
1053"_model_module_version": "1.5.0",
1054"_model_name": "HBoxModel",
1055"_view_count": null,
1056"_view_module": "@jupyter-widgets/controls",
1057"_view_module_version": "1.5.0",
1058"_view_name": "HBoxView",
1059"box_style": "",
1060"children": [
1061"IPY_MODEL_562353040be54593b23734390f49927c",
1062"IPY_MODEL_00cbebe6df7d4995913f20e39fc71b15",
1063"IPY_MODEL_aee3c563fdc54f9cb3ebc2630c84a9e6"
1064],
1065"layout": "IPY_MODEL_b74e307a751844ffab9f7f3df367774b"
1066}
1067},
1068"562353040be54593b23734390f49927c": {
1069"model_module": "@jupyter-widgets/controls",
1070"model_name": "HTMLModel",
1071"model_module_version": "1.5.0",
1072"state": {
1073"_dom_classes": [],
1074"_model_module": "@jupyter-widgets/controls",
1075"_model_module_version": "1.5.0",
1076"_model_name": "HTMLModel",
1077"_view_count": null,
1078"_view_module": "@jupyter-widgets/controls",
1079"_view_module_version": "1.5.0",
1080"_view_name": "HTMLView",
1081"description": "",
1082"description_tooltip": null,
1083"layout": "IPY_MODEL_8e6142e41f714fe9abe6a5bb72c071f9",
1084"placeholder": "",
1085"style": "IPY_MODEL_49cd1c5663404fb5a307c345e7e970c3",
1086"value": "model-00002-of-00002.safetensors: 100%"
1087}
1088},
1089"00cbebe6df7d4995913f20e39fc71b15": {
1090"model_module": "@jupyter-widgets/controls",
1091"model_name": "FloatProgressModel",
1092"model_module_version": "1.5.0",
1093"state": {
1094"_dom_classes": [],
1095"_model_module": "@jupyter-widgets/controls",
1096"_model_module_version": "1.5.0",
1097"_model_name": "FloatProgressModel",
1098"_view_count": null,
1099"_view_module": "@jupyter-widgets/controls",
1100"_view_module_version": "1.5.0",
1101"_view_name": "ProgressView",
1102"bar_style": "success",
1103"description": "",
1104"description_tooltip": null,
1105"layout": "IPY_MODEL_920ef8e509d24ccda930f4c47eff158c",
1106"max": 8030324832,
1107"min": 0,
1108"orientation": "horizontal",
1109"style": "IPY_MODEL_c8828d61b26a47ac97a1541e14c00f62",
1110"value": 8030324832
1111}
1112},
1113"aee3c563fdc54f9cb3ebc2630c84a9e6": {
1114"model_module": "@jupyter-widgets/controls",
1115"model_name": "HTMLModel",
1116"model_module_version": "1.5.0",
1117"state": {
1118"_dom_classes": [],
1119"_model_module": "@jupyter-widgets/controls",
1120"_model_module_version": "1.5.0",
1121"_model_name": "HTMLModel",
1122"_view_count": null,
1123"_view_module": "@jupyter-widgets/controls",
1124"_view_module_version": "1.5.0",
1125"_view_name": "HTMLView",
1126"description": "",
1127"description_tooltip": null,
1128"layout": "IPY_MODEL_a3d7e352222647a99be79935b1ebd86a",
1129"placeholder": "",
1130"style": "IPY_MODEL_80666ef5f07641c482a23618a767791d",
1131"value": " 8.03G/8.03G [03:13<00:00, 54.0MB/s]"
1132}
1133},
1134"b74e307a751844ffab9f7f3df367774b": {
1135"model_module": "@jupyter-widgets/base",
1136"model_name": "LayoutModel",
1137"model_module_version": "1.2.0",
1138"state": {
1139"_model_module": "@jupyter-widgets/base",
1140"_model_module_version": "1.2.0",
1141"_model_name": "LayoutModel",
1142"_view_count": null,
1143"_view_module": "@jupyter-widgets/base",
1144"_view_module_version": "1.2.0",
1145"_view_name": "LayoutView",
1146"align_content": null,
1147"align_items": null,
1148"align_self": null,
1149"border": null,
1150"bottom": null,
1151"display": null,
1152"flex": null,
1153"flex_flow": null,
1154"grid_area": null,
1155"grid_auto_columns": null,
1156"grid_auto_flow": null,
1157"grid_auto_rows": null,
1158"grid_column": null,
1159"grid_gap": null,
1160"grid_row": null,
1161"grid_template_areas": null,
1162"grid_template_columns": null,
1163"grid_template_rows": null,
1164"height": null,
1165"justify_content": null,
1166"justify_items": null,
1167"left": null,
1168"margin": null,
1169"max_height": null,
1170"max_width": null,
1171"min_height": null,
1172"min_width": null,
1173"object_fit": null,
1174"object_position": null,
1175"order": null,
1176"overflow": null,
1177"overflow_x": null,
1178"overflow_y": null,
1179"padding": null,
1180"right": null,
1181"top": null,
1182"visibility": null,
1183"width": null
1184}
1185},
1186"8e6142e41f714fe9abe6a5bb72c071f9": {
1187"model_module": "@jupyter-widgets/base",
1188"model_name": "LayoutModel",
1189"model_module_version": "1.2.0",
1190"state": {
1191"_model_module": "@jupyter-widgets/base",
1192"_model_module_version": "1.2.0",
1193"_model_name": "LayoutModel",
1194"_view_count": null,
1195"_view_module": "@jupyter-widgets/base",
1196"_view_module_version": "1.2.0",
1197"_view_name": "LayoutView",
1198"align_content": null,
1199"align_items": null,
1200"align_self": null,
1201"border": null,
1202"bottom": null,
1203"display": null,
1204"flex": null,
1205"flex_flow": null,
1206"grid_area": null,
1207"grid_auto_columns": null,
1208"grid_auto_flow": null,
1209"grid_auto_rows": null,
1210"grid_column": null,
1211"grid_gap": null,
1212"grid_row": null,
1213"grid_template_areas": null,
1214"grid_template_columns": null,
1215"grid_template_rows": null,
1216"height": null,
1217"justify_content": null,
1218"justify_items": null,
1219"left": null,
1220"margin": null,
1221"max_height": null,
1222"max_width": null,
1223"min_height": null,
1224"min_width": null,
1225"object_fit": null,
1226"object_position": null,
1227"order": null,
1228"overflow": null,
1229"overflow_x": null,
1230"overflow_y": null,
1231"padding": null,
1232"right": null,
1233"top": null,
1234"visibility": null,
1235"width": null
1236}
1237},
1238"49cd1c5663404fb5a307c345e7e970c3": {
1239"model_module": "@jupyter-widgets/controls",
1240"model_name": "DescriptionStyleModel",
1241"model_module_version": "1.5.0",
1242"state": {
1243"_model_module": "@jupyter-widgets/controls",
1244"_model_module_version": "1.5.0",
1245"_model_name": "DescriptionStyleModel",
1246"_view_count": null,
1247"_view_module": "@jupyter-widgets/base",
1248"_view_module_version": "1.2.0",
1249"_view_name": "StyleView",
1250"description_width": ""
1251}
1252},
1253"920ef8e509d24ccda930f4c47eff158c": {
1254"model_module": "@jupyter-widgets/base",
1255"model_name": "LayoutModel",
1256"model_module_version": "1.2.0",
1257"state": {
1258"_model_module": "@jupyter-widgets/base",
1259"_model_module_version": "1.2.0",
1260"_model_name": "LayoutModel",
1261"_view_count": null,
1262"_view_module": "@jupyter-widgets/base",
1263"_view_module_version": "1.2.0",
1264"_view_name": "LayoutView",
1265"align_content": null,
1266"align_items": null,
1267"align_self": null,
1268"border": null,
1269"bottom": null,
1270"display": null,
1271"flex": null,
1272"flex_flow": null,
1273"grid_area": null,
1274"grid_auto_columns": null,
1275"grid_auto_flow": null,
1276"grid_auto_rows": null,
1277"grid_column": null,
1278"grid_gap": null,
1279"grid_row": null,
1280"grid_template_areas": null,
1281"grid_template_columns": null,
1282"grid_template_rows": null,
1283"height": null,
1284"justify_content": null,
1285"justify_items": null,
1286"left": null,
1287"margin": null,
1288"max_height": null,
1289"max_width": null,
1290"min_height": null,
1291"min_width": null,
1292"object_fit": null,
1293"object_position": null,
1294"order": null,
1295"overflow": null,
1296"overflow_x": null,
1297"overflow_y": null,
1298"padding": null,
1299"right": null,
1300"top": null,
1301"visibility": null,
1302"width": null
1303}
1304},
1305"c8828d61b26a47ac97a1541e14c00f62": {
1306"model_module": "@jupyter-widgets/controls",
1307"model_name": "ProgressStyleModel",
1308"model_module_version": "1.5.0",
1309"state": {
1310"_model_module": "@jupyter-widgets/controls",
1311"_model_module_version": "1.5.0",
1312"_model_name": "ProgressStyleModel",
1313"_view_count": null,
1314"_view_module": "@jupyter-widgets/base",
1315"_view_module_version": "1.2.0",
1316"_view_name": "StyleView",
1317"bar_color": null,
1318"description_width": ""
1319}
1320},
1321"a3d7e352222647a99be79935b1ebd86a": {
1322"model_module": "@jupyter-widgets/base",
1323"model_name": "LayoutModel",
1324"model_module_version": "1.2.0",
1325"state": {
1326"_model_module": "@jupyter-widgets/base",
1327"_model_module_version": "1.2.0",
1328"_model_name": "LayoutModel",
1329"_view_count": null,
1330"_view_module": "@jupyter-widgets/base",
1331"_view_module_version": "1.2.0",
1332"_view_name": "LayoutView",
1333"align_content": null,
1334"align_items": null,
1335"align_self": null,
1336"border": null,
1337"bottom": null,
1338"display": null,
1339"flex": null,
1340"flex_flow": null,
1341"grid_area": null,
1342"grid_auto_columns": null,
1343"grid_auto_flow": null,
1344"grid_auto_rows": null,
1345"grid_column": null,
1346"grid_gap": null,
1347"grid_row": null,
1348"grid_template_areas": null,
1349"grid_template_columns": null,
1350"grid_template_rows": null,
1351"height": null,
1352"justify_content": null,
1353"justify_items": null,
1354"left": null,
1355"margin": null,
1356"max_height": null,
1357"max_width": null,
1358"min_height": null,
1359"min_width": null,
1360"object_fit": null,
1361"object_position": null,
1362"order": null,
1363"overflow": null,
1364"overflow_x": null,
1365"overflow_y": null,
1366"padding": null,
1367"right": null,
1368"top": null,
1369"visibility": null,
1370"width": null
1371}
1372},
1373"80666ef5f07641c482a23618a767791d": {
1374"model_module": "@jupyter-widgets/controls",
1375"model_name": "DescriptionStyleModel",
1376"model_module_version": "1.5.0",
1377"state": {
1378"_model_module": "@jupyter-widgets/controls",
1379"_model_module_version": "1.5.0",
1380"_model_name": "DescriptionStyleModel",
1381"_view_count": null,
1382"_view_module": "@jupyter-widgets/base",
1383"_view_module_version": "1.2.0",
1384"_view_name": "StyleView",
1385"description_width": ""
1386}
1387}
1388}
1389}
1390},
1391"cells": [
1392{
1393"cell_type": "markdown",
1394"metadata": {
1395"id": "view-in-github",
1396"colab_type": "text"
1397},
1398"source": [
1399"<a href=\"https://colab.research.google.com/github/mlabonne/llm-course/blob/main/Mergekit.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
1400]
1401},
1402{
1403"cell_type": "markdown",
1404"source": [
1405"# Merge Large Language Models with mergekit\n",
1406"> 🗣️ [Large Language Model Course](https://github.com/mlabonne/llm-course)\n",
1407"\n",
1408"❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne).\n",
1409"\n",
1410"Model merging only requires a lot of RAM. With a free Google Colab account, you should be able to run it using a T4 GPU (VRAM offloading).\n",
1411"\n",
1412"Examples of merge configurations:\n",
1413"\n",
1414"### TIES-Merging\n",
1415"\n",
1416"```yaml\n",
1417"models:\n",
1418" - model: mistralai/Mistral-7B-v0.1\n",
1419" # no parameters necessary for base model\n",
1420" - model: OpenPipe/mistral-ft-optimized-1218\n",
1421" parameters:\n",
1422" density: 0.5\n",
1423" weight: 0.5\n",
1424" - model: mlabonne/NeuralHermes-2.5-Mistral-7B\n",
1425" parameters:\n",
1426" density: 0.5\n",
1427" weight: 0.3\n",
1428"merge_method: ties\n",
1429"base_model: mistralai/Mistral-7B-v0.1\n",
1430"parameters:\n",
1431" normalize: true\n",
1432"dtype: float16\n",
1433"```\n",
1434"\n",
1435"You can find the final model on the Hugging Face Hub at [mlabonne/NeuralPipe-7B-ties](https://huggingface.co/mlabonne/NeuralPipe-7B-ties).\n",
1436"\n",
1437"### SLERP\n",
1438"\n",
1439"```yaml\n",
1440"slices:\n",
1441" - sources:\n",
1442" - model: OpenPipe/mistral-ft-optimized-1218\n",
1443" layer_range: [0, 32]\n",
1444" - model: mlabonne/NeuralHermes-2.5-Mistral-7B\n",
1445" layer_range: [0, 32]\n",
1446"merge_method: slerp\n",
1447"base_model: OpenPipe/mistral-ft-optimized-1218\n",
1448"parameters:\n",
1449" t:\n",
1450" - filter: self_attn\n",
1451" value: [0, 0.5, 0.3, 0.7, 1]\n",
1452" - filter: mlp\n",
1453" value: [1, 0.5, 0.7, 0.3, 0]\n",
1454" - value: 0.5\n",
1455"dtype: bfloat16\n",
1456"```\n",
1457"\n",
1458"You can find the final model on the Hugging Face Hub at [mlabonne/NeuralPipe-7B-slerp](https://huggingface.co/mlabonne/NeuralPipe-7B-slerp).\n",
1459"\n",
1460"### Passthrough\n",
1461"\n",
1462"```yaml\n",
1463"slices:\n",
1464" - sources:\n",
1465" - model: OpenPipe/mistral-ft-optimized-1218\n",
1466" layer_range: [0, 32]\n",
1467" - sources:\n",
1468" - model: mlabonne/NeuralHermes-2.5-Mistral-7B\n",
1469" layer_range: [24, 32]\n",
1470"merge_method: passthrough\n",
1471"dtype: bfloat16\n",
1472"```\n",
1473"\n",
1474"You can find the final model on the Hugging Face Hub at [mlabonne/NeuralPipe-9B-merged](https://huggingface.co/mlabonne/NeuralPipe-9B-merged)."
1475],
1476"metadata": {
1477"id": "o12O0YjJvvLW"
1478}
1479},
1480{
1481"cell_type": "code",
1482"execution_count": null,
1483"metadata": {
1484"id": "NPNPie5Eo3EZ"
1485},
1486"outputs": [],
1487"source": [
1488"!git clone https://github.com/cg123/mergekit.git\n",
1489"!cd mergekit && pip install -q -e ."
1490]
1491},
1492{
1493"cell_type": "code",
1494"source": [
1495"import yaml\n",
1496"\n",
1497"MODEL_NAME = \"Marcoro14-7B-slerp\"\n",
1498"yaml_config = \"\"\"\n",
1499"slices:\n",
1500" - sources:\n",
1501" - model: AIDC-ai-business/Marcoroni-7B-v3\n",
1502" layer_range: [0, 32]\n",
1503" - model: EmbeddedLLM/Mistral-7B-Merge-14-v0.1\n",
1504" layer_range: [0, 32]\n",
1505"merge_method: slerp\n",
1506"base_model: AIDC-ai-business/Marcoroni-7B-v3\n",
1507"parameters:\n",
1508" t:\n",
1509" - filter: self_attn\n",
1510" value: [0, 0.5, 0.3, 0.7, 1]\n",
1511" - filter: mlp\n",
1512" value: [1, 0.5, 0.7, 0.3, 0]\n",
1513" - value: 0.5\n",
1514"dtype: bfloat16\n",
1515"\n",
1516"\"\"\"\n",
1517"\n",
1518"# Save config as yaml file\n",
1519"with open('config.yaml', 'w', encoding=\"utf-8\") as f:\n",
1520" f.write(yaml_config)"
1521],
1522"metadata": {
1523"id": "LGd7jlfCpNcg"
1524},
1525"execution_count": null,
1526"outputs": []
1527},
1528{
1529"cell_type": "code",
1530"source": [
1531"# Merge models\n",
1532"!mergekit-yaml config.yaml merge --copy-tokenizer --allow-crimes --out-shard-size 1B --lazy-unpickle"
1533],
1534"metadata": {
1535"id": "d5mYzDo1q96y"
1536},
1537"execution_count": null,
1538"outputs": []
1539},
1540{
1541"cell_type": "code",
1542"source": [
1543"!pip install -qU huggingface_hub\n",
1544"\n",
1545"from huggingface_hub import ModelCard, ModelCardData\n",
1546"from jinja2 import Template\n",
1547"\n",
1548"username = \"mlabonne\"\n",
1549"\n",
1550"template_text = \"\"\"\n",
1551"---\n",
1552"license: apache-2.0\n",
1553"tags:\n",
1554"- merge\n",
1555"- mergekit\n",
1556"- lazymergekit\n",
1557"{%- for model in models %}\n",
1558"- {{ model }}\n",
1559"{%- endfor %}\n",
1560"---\n",
1561"\n",
1562"# {{ model_name }}\n",
1563"\n",
1564"{{ model_name }} is a merge of the following models using [mergekit](https://github.com/cg123/mergekit):\n",
1565"\n",
1566"{%- for model in models %}\n",
1567"* [{{ model }}](https://huggingface.co/{{ model }})\n",
1568"{%- endfor %}\n",
1569"\n",
1570"## 🧩 Configuration\n",
1571"\n",
1572"```yaml\n",
1573"{{- yaml_config -}}\n",
1574"```\n",
1575"\"\"\"\n",
1576"\n",
1577"# Create a Jinja template object\n",
1578"jinja_template = Template(template_text.strip())\n",
1579"\n",
1580"# Get list of models from config\n",
1581"data = yaml.safe_load(yaml_config)\n",
1582"if \"models\" in data:\n",
1583" models = [data[\"models\"][i][\"model\"] for i in range(len(data[\"models\"])) if \"parameters\" in data[\"models\"][i]]\n",
1584"elif \"parameters\" in data:\n",
1585" models = [data[\"slices\"][0][\"sources\"][i][\"model\"] for i in range(len(data[\"slices\"][0][\"sources\"]))]\n",
1586"elif \"slices\" in data:\n",
1587" models = [data[\"slices\"][i][\"sources\"][0][\"model\"] for i in range(len(data[\"slices\"]))]\n",
1588"else:\n",
1589" raise Exception(\"No models or slices found in yaml config\")\n",
1590"\n",
1591"# Fill the template\n",
1592"content = jinja_template.render(\n",
1593" model_name=MODEL_NAME,\n",
1594" models=models,\n",
1595" yaml_config=yaml_config,\n",
1596" username=username,\n",
1597")\n",
1598"\n",
1599"# Save the model card\n",
1600"card = ModelCard(content)\n",
1601"card.save('merge/README.md')"
1602],
1603"metadata": {
1604"colab": {
1605"base_uri": "https://localhost:8080/"
1606},
1607"id": "w-RNKev373lI",
1608"outputId": "fccbbd1d-295f-4def-a398-f226813294bb"
1609},
1610"execution_count": null,
1611"outputs": [
1612{
1613"output_type": "stream",
1614"name": "stdout",
1615"text": [
1616"\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/330.1 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m112.6/330.1 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m330.1/330.1 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
1617"\u001b[?25h"
1618]
1619}
1620]
1621},
1622{
1623"cell_type": "code",
1624"source": [
1625"from google.colab import userdata\n",
1626"from huggingface_hub import HfApi\n",
1627"\n",
1628"username = \"mlabonne\"\n",
1629"\n",
1630"# Defined in the secrets tab in Google Colab\n",
1631"api = HfApi(token=userdata.get(\"HF_TOKEN\"))\n",
1632"\n",
1633"api.create_repo(\n",
1634" repo_id=f\"{username}/{MODEL_NAME}\",\n",
1635" repo_type=\"model\"\n",
1636")\n",
1637"api.upload_folder(\n",
1638" repo_id=f\"{username}/{MODEL_NAME}\",\n",
1639" folder_path=\"merge\",\n",
1640")"
1641],
1642"metadata": {
1643"colab": {
1644"base_uri": "https://localhost:8080/",
1645"height": 164,
1646"referenced_widgets": [
1647"de24d272f2b842c5b01eedb3f536b810",
1648"0c5dab2657b2473385a424d90f3d4664",
1649"57efe36e546c473d8be34102f6ba9a58",
1650"871bad1d905d4877a9eaa242cfd54c4e",
1651"8951f6b2edf64464869391197c900f84",
1652"69a61ad28d5141dcbaea44060bc5ebf7",
1653"76c2fbf005ae4a5790edfeb499b387b7",
1654"116964f328dc45d991d895d684ac1216",
1655"1ecec5ba4424498082a5f64cf3d7faf8",
1656"fc4edcef273b4e75894f4b512122de94",
1657"ca2323b142f54998985d30481d5cfabe",
1658"63626ac2d0f546188c07512a04c71417",
1659"decd91747fd04ce39f3e2b733bc7f477",
1660"7140e4c154424fcab846a71889e99ed2",
1661"2264d8b75251425e94e635558af4e223",
1662"c37478198217457cb30c6649203cf4dc",
1663"4918769e4e984dfda924776e2373154c",
1664"9b48494c94cf49b5835489d97f7a24c5",
1665"6ed844da52fe466eb1c10c814489448c",
1666"9c60efa02e80423e828628190dd13bc3",
1667"0170e8cc57d94041956f7afbf2eef449",
1668"220c2ba5f2524271b24fe049431a474c",
1669"a6f99dd0662846f9a381d2d507a7b447",
1670"900b9fcb70a84781bd5b4213df54626d",
1671"0ea83f270e164795b64f23b143efb300",
1672"318dcdeac8fb40f88fa60114f1c6a7c1",
1673"af89cf715e0e4c5e9f59943a255394c1",
1674"40e23e35299d45d499432b8f1a9bc924",
1675"126b374e286747768ef7218454534640",
1676"bdd26e54eed5477f99b135552e5f3450",
1677"163a6fd878134e1eb5f193d1ebfff1c1",
1678"953d7c014f76413c9805a2ef8c2c9356",
1679"348879bf76d1471f9c79c1ec2dc07c1d",
1680"8d54ae0d028b40e7b018454187db1a1c",
1681"562353040be54593b23734390f49927c",
1682"00cbebe6df7d4995913f20e39fc71b15",
1683"aee3c563fdc54f9cb3ebc2630c84a9e6",
1684"b74e307a751844ffab9f7f3df367774b",
1685"8e6142e41f714fe9abe6a5bb72c071f9",
1686"49cd1c5663404fb5a307c345e7e970c3",
1687"920ef8e509d24ccda930f4c47eff158c",
1688"c8828d61b26a47ac97a1541e14c00f62",
1689"a3d7e352222647a99be79935b1ebd86a",
1690"80666ef5f07641c482a23618a767791d"
1691]
1692},
1693"id": "ik0V0dF55gfU",
1694"outputId": "9f6c605c-6b51-473d-c1fa-b103e9208785"
1695},
1696"execution_count": null,
1697"outputs": [
1698{
1699"output_type": "display_data",
1700"data": {
1701"text/plain": [
1702"tokenizer.model: 0%| | 0.00/493k [00:00<?, ?B/s]"
1703],
1704"application/vnd.jupyter.widget-view+json": {
1705"version_major": 2,
1706"version_minor": 0,
1707"model_id": "de24d272f2b842c5b01eedb3f536b810"
1708}
1709},
1710"metadata": {}
1711},
1712{
1713"output_type": "display_data",
1714"data": {
1715"text/plain": [
1716"model-00001-of-00002.safetensors: 0%| | 0.00/9.94G [00:00<?, ?B/s]"
1717],
1718"application/vnd.jupyter.widget-view+json": {
1719"version_major": 2,
1720"version_minor": 0,
1721"model_id": "63626ac2d0f546188c07512a04c71417"
1722}
1723},
1724"metadata": {}
1725},
1726{
1727"output_type": "display_data",
1728"data": {
1729"text/plain": [
1730"Upload 3 LFS files: 0%| | 0/3 [00:00<?, ?it/s]"
1731],
1732"application/vnd.jupyter.widget-view+json": {
1733"version_major": 2,
1734"version_minor": 0,
1735"model_id": "a6f99dd0662846f9a381d2d507a7b447"
1736}
1737},
1738"metadata": {}
1739},
1740{
1741"output_type": "display_data",
1742"data": {
1743"text/plain": [
1744"model-00002-of-00002.safetensors: 0%| | 0.00/8.03G [00:00<?, ?B/s]"
1745],
1746"application/vnd.jupyter.widget-view+json": {
1747"version_major": 2,
1748"version_minor": 0,
1749"model_id": "8d54ae0d028b40e7b018454187db1a1c"
1750}
1751},
1752"metadata": {}
1753},
1754{
1755"output_type": "execute_result",
1756"data": {
1757"text/plain": [
1758"'https://huggingface.co/mlabonne/NeuralPipe-9B-merged/tree/main/'"
1759],
1760"application/vnd.google.colaboratory.intrinsic+json": {
1761"type": "string"
1762}
1763},
1764"metadata": {},
1765"execution_count": 5
1766}
1767]
1768}
1769]
1770}