4
"cell_type": "markdown",
11
"> 🗣️ [Large Language Model Course](https://github.com/mlabonne/llm-course)\n",
13
"❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne).\n",
15
"This notebook allows you to easily merge multiple models using [mergekit](https://github.com/cg123/mergekit). To evaluate your merges, see [🧐 LLM AutoEval](https://colab.research.google.com/drive/1Igs3WZuXAIv9X0vwqiE90QlEPys8e8Oa?usp=sharing#scrollTo=elyxjYI_rY5W).\n",
17
"*Special thanks to [@cg123](https://github.com/cg123) for this library and [@mrfakename](https://gist.github.com/fakerybakery) who told me about sharding (see his [Gist](https://gist.github.com/fakerybakery/d30a4d31b4f914757c1381166b9c683b)).*"
22
"execution_count": null,
28
"MODEL_NAME = \"HermesBagel-34B-v0.1\"\n",
29
"yaml_config = \"\"\"\n",
32
" - model: NousResearch/Nous-Hermes-2-Yi-34B\n",
33
" layer_range: [0, 60]\n",
34
" - model: jondurbin/bagel-dpo-34b-v0.2\n",
35
" layer_range: [0, 60]\n",
36
"merge_method: slerp\n",
37
"base_model: NousResearch/Nous-Hermes-2-Yi-34B\n",
40
" - filter: self_attn\n",
41
" value: [0, 0.5, 0.3, 0.7, 1]\n",
43
" value: [1, 0.5, 0.7, 0.3, 0]\n",
51
"execution_count": null,
55
"base_uri": "https://localhost:8080/"
58
"outputId": "5136bcf3-923a-4d40-d60d-12f25eaea3bd"
62
"# @title ## Run merge\n",
64
"# @markdown ### Runtime type\n",
65
"# @markdown Select your runtime (CPU, High RAM, GPU)\n",
67
"runtime = \"GPU\" # @param [\"CPU\", \"CPU + High-RAM\", \"GPU\"]\n",
69
"# @markdown ### Mergekit arguments\n",
70
"# @markdown Use the `main` branch by default, [`mixtral`](https://github.com/cg123/mergekit/blob/mixtral/moe.md) if you want to create a Mixture of Experts.\n",
72
"branch = \"main\" # @param [\"main\", \"mixtral\"]\n",
73
"trust_remote_code = True # @param {type:\"boolean\"}\n",
75
"# Install mergekit\n",
76
"if branch == \"main\":\n",
77
" !git clone https://github.com/cg123/mergekit.git\n",
78
" !cd mergekit && pip install -qqq -e . --progress-bar off\n",
79
"elif branch == \"mixtral\":\n",
80
" !git clone -b mixtral https://github.com/cg123/mergekit.git\n",
81
" !cd mergekit && pip install -qqq -e . --progress-bar off\n",
82
" !pip install -qqq -U transformers --progress-bar off\n",
84
"# Save config as yaml file\n",
85
"with open(\"config.yaml\", \"w\", encoding=\"utf-8\") as f:\n",
86
" f.write(yaml_config)\n",
89
"if branch == \"main\":\n",
90
" cli = \"mergekit-yaml config.yaml merge --copy-tokenizer\"\n",
91
"elif branch == \"mixtral\":\n",
92
" cli = \"mergekit-moe config.yaml merge --copy-tokenizer\"\n",
94
"# Additional arguments\n",
95
"if runtime == \"CPU\":\n",
96
" cli += \" --allow-crimes --out-shard-size 1B --lazy-unpickle\"\n",
97
"elif runtime == \"GPU\":\n",
98
" cli += \" --cuda --low-cpu-memory\"\n",
99
"if trust_remote_code:\n",
100
" cli += \" --trust-remote-code\"\n",
110
"execution_count": null,
116
"# @title ## Upload model to Hugging Face { display-mode: \"form\" }\n",
117
"# @markdown Enter your username the name of Colab secret that stores your [Hugging Face access token](https://huggingface.co/settings/tokens).\n",
118
"username = \"dfurman\" # @param {type:\"string\"}\n",
119
"token = \"HF_TOKEN\" # @param {type:\"string\"}\n",
121
"!pip install -qU huggingface_hub\n",
125
"from huggingface_hub import ModelCard, ModelCardData, HfApi\n",
126
"from google.colab import userdata\n",
127
"from jinja2 import Template\n",
129
"if branch == \"main\":\n",
130
" template_text = \"\"\"\n",
132
"license: apache-2.0\n",
137
"{%- for model in models %}\n",
142
"# {{ model_name }}\n",
144
"{{ model_name }} is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):\n",
146
"{%- for model in models %}\n",
147
"* [{{ model }}](https://huggingface.co/{{ model }})\n",
150
"## 🧩 Configuration\n",
153
"{{- yaml_config -}}\n",
159
"!pip install -qU transformers accelerate\n",
161
"from transformers import AutoTokenizer\n",
162
"import transformers\n",
165
"model = \"{{ username }}/{{ model_name }}\"\n",
166
"messages = [{\"role\": \"user\", \"content\": \"What is a large language model?\"}]\n",
168
"tokenizer = AutoTokenizer.from_pretrained(model)\n",
169
"prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n",
170
"pipeline = transformers.pipeline(\n",
171
" \"text-generation\",\n",
173
" torch_dtype=torch.float16,\n",
174
" device_map=\"auto\",\n",
177
"outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)\n",
178
"print(outputs[0][\"generated_text\"])\n",
182
" # Create a Jinja template object\n",
183
" jinja_template = Template(template_text.strip())\n",
185
" # Get list of models from config\n",
186
" data = yaml.safe_load(yaml_config)\n",
187
" if \"models\" in data:\n",
189
" data[\"models\"][i][\"model\"]\n",
190
" for i in range(len(data[\"models\"]))\n",
191
" if \"parameters\" in data[\"models\"][i]\n",
193
" elif \"parameters\" in data:\n",
195
" data[\"slices\"][0][\"sources\"][i][\"model\"]\n",
196
" for i in range(len(data[\"slices\"][0][\"sources\"]))\n",
198
" elif \"slices\" in data:\n",
200
" data[\"slices\"][i][\"sources\"][0][\"model\"] for i in range(len(data[\"slices\"]))\n",
203
" raise Exception(\"No models or slices found in yaml config\")\n",
205
" # Fill the template\n",
206
" content = jinja_template.render(\n",
207
" model_name=MODEL_NAME,\n",
209
" yaml_config=yaml_config,\n",
210
" username=username,\n",
213
"elif branch == \"mixtral\":\n",
214
" template_text = \"\"\"\n",
216
"license: apache-2.0\n",
222
"{%- for model in models %}\n",
227
"# {{ model_name }}\n",
229
"{{ model_name }} is a Mixure of Experts (MoE) made with the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):\n",
231
"{%- for model in models %}\n",
232
"* [{{ model }}](https://huggingface.co/{{ model }})\n",
235
"## 🧩 Configuration\n",
238
"{{- yaml_config -}}\n",
244
"!pip install -qU transformers bitsandbytes accelerate\n",
246
"from transformers import AutoTokenizer\n",
247
"import transformers\n",
250
"model = \"{{ username }}/{{ model_name }}\"\n",
252
"tokenizer = AutoTokenizer.from_pretrained(model)\n",
253
"pipeline = transformers.pipeline(\n",
254
" \"text-generation\",\n",
256
" model_kwargs={\"torch_dtype\": torch.float16, \"load_in_4bit\": True},\n",
259
"messages = [{\"role\": \"user\", \"content\": \"Explain what a Mixture of Experts is in less than 100 words.\"}]\n",
260
"prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n",
261
"outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)\n",
262
"print(outputs[0][\"generated_text\"])\n",
266
" # Create a Jinja template object\n",
267
" jinja_template = Template(template_text.strip())\n",
269
" # Fill the template\n",
270
" data = yaml.safe_load(yaml_config)\n",
271
" models = [model[\"source_model\"] for model in data[\"experts\"]]\n",
273
" content = jinja_template.render(\n",
274
" model_name=MODEL_NAME,\n",
276
" yaml_config=yaml_config,\n",
277
" username=username,\n",
280
"# Save the model card\n",
281
"card = ModelCard(content)\n",
282
"card.save(\"merge/README.md\")\n",
284
"# Defined in the secrets tab in Google Colab\n",
285
"api = HfApi(token=userdata.get(token))\n",
287
"# Upload merge folder\n",
288
"api.create_repo(\n",
289
" repo_id=f\"{username}/{MODEL_NAME}\",\n",
290
" repo_type=\"model\",\n",
293
"api.upload_folder(\n",
294
" repo_id=f\"{username}/{MODEL_NAME}\",\n",
295
" folder_path=\"merge\",\n",
301
"execution_count": null,
310
"accelerator": "GPU",
313
"machine_shape": "hm",
317
"display_name": "Python 3",