2
from gradio.components import Component
3
from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Tuple
5
from llmtuner.chat import ChatModel
6
from llmtuner.extras.misc import torch_gc
7
from llmtuner.hparams import GeneratingArguments
8
from llmtuner.webui.common import get_save_dir
9
from llmtuner.webui.locales import ALERTS
12
from llmtuner.webui.manager import Manager
15
class WebChatModel(ChatModel):
20
demo_mode: Optional[bool] = False,
21
lazy_init: Optional[bool] = True
23
self.manager = manager
24
self.demo_mode = demo_mode
27
self.generating_args = GeneratingArguments()
35
with open("demo_config.json", "r", encoding="utf-8") as f:
37
assert args.get("model_name_or_path", None) and args.get("template", None)
38
super().__init__(args)
39
except AssertionError:
40
print("Please provided model name and template in `demo_config.json`.")
42
print("Cannot find `demo_config.json` at current directory.")
45
def loaded(self) -> bool:
46
return self.model is not None
48
def load_model(self, data: Dict[Component, Any]) -> Generator[str, None, None]:
49
get = lambda name: data[self.manager.get_elem_by_name(name)]
50
lang = get("top.lang")
53
error = ALERTS["err_exists"][lang]
54
elif not get("top.model_name"):
55
error = ALERTS["err_no_model"][lang]
56
elif not get("top.model_path"):
57
error = ALERTS["err_no_path"][lang]
59
error = ALERTS["err_demo"][lang]
66
if get("top.checkpoints"):
67
checkpoint_dir = ",".join([
68
get_save_dir(get("top.model_name"), get("top.finetuning_type"), ckpt) for ckpt in get("top.checkpoints")
73
yield ALERTS["info_loading"][lang]
75
model_name_or_path=get("top.model_path"),
76
checkpoint_dir=checkpoint_dir,
77
finetuning_type=get("top.finetuning_type"),
78
quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None,
79
template=get("top.template"),
80
flash_attn=get("top.flash_attn"),
81
shift_attn=get("top.shift_attn"),
82
rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None
84
super().__init__(args)
86
yield ALERTS["info_loaded"][lang]
88
def unload_model(self, data: Dict[Component, Any]) -> Generator[str, None, None]:
89
lang = data[self.manager.get_elem_by_name("top.lang")]
92
gr.Warning(ALERTS["err_demo"][lang])
93
yield ALERTS["err_demo"][lang]
96
yield ALERTS["info_unloading"][lang]
100
yield ALERTS["info_unloaded"][lang]
104
chatbot: List[Tuple[str, str]],
106
history: List[Tuple[str, str]],
111
) -> Generator[Tuple[List[Tuple[str, str]], List[Tuple[str, str]]], None, None]:
112
chatbot.append([query, ""])
114
for new_text in self.stream_chat(
115
query, history, system, max_new_tokens=max_new_tokens, top_p=top_p, temperature=temperature
118
new_history = history + [(query, response)]
119
chatbot[-1] = [query, self.postprocess(response)]
120
yield chatbot, new_history
122
def postprocess(self, response: str) -> str:
123
blocks = response.split("```")
124
for i, block in enumerate(blocks):
126
blocks[i] = block.replace("<", "<").replace(">", ">")
127
return "```".join(blocks)