LLM-FineTuning-Large-Language-Models
/
Mistral-7B-Inferencing.ipynb
105 строк · 3.2 Кб
1{
2"cells": [
3{
4"cell_type": "markdown",
5"metadata": {},
6"source": [
7"### Checkout my [Twitter(@rohanpaul_ai)](https://twitter.com/rohanpaul_ai) for daily LLM bits"
8]
9},
10{
11"cell_type": "markdown",
12"metadata": {},
13"source": [
14"## Inferencing on Mistral 7B LLM with 4-bit quantization 🚀 - In FREE Google Colab\n",
15"\n",
16"# [Link to my Youtube Video Explaining this whole Notebook](https://www.youtube.com/watch?v=eovBbABk3hw&list=PLxqBkZuBynVTzqUQCQFgetR97y1X_1uCI&index=10&ab_channel=Rohan-Paul-AI)\n",
17"\n",
18"[![Imgur](https://imgur.com/Lz4ov4K.png)](https://www.youtube.com/watch?v=eovBbABk3hw&list=PLxqBkZuBynVTzqUQCQFgetR97y1X_1uCI&index=10&ab_channel=Rohan-Paul-AI)\n",
19"\n"
20]
21},
22{
23"cell_type": "code",
24"execution_count": null,
25"metadata": {},
26"outputs": [],
27"source": [
28"!pip install git+https://github.com/huggingface/transformers -q peft accelerate bitsandbytes safetensors sentencepiece"
29]
30},
31{
32"cell_type": "code",
33"execution_count": null,
34"metadata": {},
35"outputs": [],
36"source": [
37"import torch\n",
38"from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\n",
39"\n",
40"model_name = 'bn22/Mistral-7B-Instruct-v0.1-sharded'\n",
41"\n",
42"def load_quantized_model(model_name: str):\n",
43" \"\"\"\n",
44" :param model_name: Name or path of the model to be loaded.\n",
45" :return: Loaded quantized model.\n",
46" \"\"\"\n",
47" bnb_config = BitsAndBytesConfig(\n",
48" load_in_4bit=True,\n",
49" bnb_4bit_use_double_quant=True,\n",
50" bnb_4bit_quant_type=\"nf4\",\n",
51" bnb_4bit_compute_dtype=torch.bfloat16\n",
52" )\n",
53"\n",
54" model = AutoModelForCausalLM.from_pretrained(\n",
55" model_name,\n",
56" load_in_4bit=True,\n",
57" torch_dtype=torch.bfloat16,\n",
58" quantization_config=bnb_config\n",
59" )\n",
60"\n",
61" return model\n",
62"\n",
63"def initialize_tokenizer(model_name: str):\n",
64" \"\"\"\n",
65" Initialize the tokenizer with the specified model_name.\n",
66"\n",
67" :param model_name: Name or path of the model for tokenizer initialization.\n",
68" :return: Initialized tokenizer.\n",
69" \"\"\"\n",
70" tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
71" tokenizer.bos_token_id = 1 # Set beginning of sentence token id\n",
72" return tokenizer\n",
73"\n",
74"\n",
75"model = load_quantized_model(model_name)\n",
76"\n",
77"tokenizer = initialize_tokenizer(model_name)\n",
78"\n",
79"# Define stop token ids\n",
80"stop_token_ids = [0]\n",
81"\n",
82"text = \"[INST] How AI will replace Engineers [/INST]\"\n",
83"\n",
84"encoded = tokenizer(text, return_tensors=\"pt\", add_special_tokens=False)\n",
85"model_input = encoded\n",
86"generated_ids = model.generate(**model_input, max_new_tokens=200, do_sample=True)\n",
87"decoded = tokenizer.batch_decode(generated_ids)\n",
88"print(decoded[0])"
89]
90},
91{
92"cell_type": "markdown",
93"metadata": {},
94"source": []
95}
96],
97"metadata": {
98"language_info": {
99"name": "python"
100},
101"orig_nbformat": 4
102},
103"nbformat": 4,
104"nbformat_minor": 2
105}
106