Langchain-Chatchat
/
startup.py
896 строк · 30.9 Кб
1import asyncio2import multiprocessing as mp3import os4import subprocess5import sys6from multiprocessing import Process7from datetime import datetime8from pprint import pprint9from langchain_core._api import deprecated10
11try:12import numexpr13
14n_cores = numexpr.utils.detect_number_of_cores()15os.environ["NUMEXPR_MAX_THREADS"] = str(n_cores)16except:17pass18
19sys.path.append(os.path.dirname(os.path.dirname(__file__)))20from configs import (21LOG_PATH,22log_verbose,23logger,24LLM_MODELS,25EMBEDDING_MODEL,26TEXT_SPLITTER_NAME,27FSCHAT_CONTROLLER,28FSCHAT_OPENAI_API,29FSCHAT_MODEL_WORKERS,30API_SERVER,31WEBUI_SERVER,32HTTPX_DEFAULT_TIMEOUT,33)
34from server.utils import (fschat_controller_address, fschat_model_worker_address,35fschat_openai_api_address, get_httpx_client, get_model_worker_config,36MakeFastAPIOffline, FastAPI, llm_device, embedding_device)37from server.knowledge_base.migrate import create_tables38import argparse39from typing import List, Dict40from configs import VERSION41
42
43@deprecated(44since="0.3.0",45message="模型启动功能将于 Langchain-Chatchat 0.3.x重写,支持更多模式和加速启动,0.2.x中相关功能将废弃",46removal="0.3.0")47def create_controller_app(48dispatch_method: str,49log_level: str = "INFO",50) -> FastAPI:51import fastchat.constants52fastchat.constants.LOGDIR = LOG_PATH53from fastchat.serve.controller import app, Controller, logger54logger.setLevel(log_level)55
56controller = Controller(dispatch_method)57sys.modules["fastchat.serve.controller"].controller = controller58
59MakeFastAPIOffline(app)60app.title = "FastChat Controller"61app._controller = controller62return app63
64
65def create_model_worker_app(log_level: str = "INFO", **kwargs) -> FastAPI:66"""67kwargs包含的字段如下:
68host:
69port:
70model_names:[`model_name`]
71controller_address:
72worker_address:
73
74对于Langchain支持的模型:
75langchain_model:True
76不会使用fschat
77对于online_api:
78online_api:True
79worker_class: `provider`
80对于离线模型:
81model_path: `model_name_or_path`,huggingface的repo-id或本地路径
82device:`LLM_DEVICE`
83"""
84import fastchat.constants85fastchat.constants.LOGDIR = LOG_PATH86import argparse87
88parser = argparse.ArgumentParser()89args = parser.parse_args([])90
91for k, v in kwargs.items():92setattr(args, k, v)93if worker_class := kwargs.get("langchain_model"): # Langchian支持的模型不用做操作94from fastchat.serve.base_model_worker import app95worker = ""96# 在线模型API97elif worker_class := kwargs.get("worker_class"):98from fastchat.serve.base_model_worker import app99
100worker = worker_class(model_names=args.model_names,101controller_addr=args.controller_address,102worker_addr=args.worker_address)103# sys.modules["fastchat.serve.base_model_worker"].worker = worker104sys.modules["fastchat.serve.base_model_worker"].logger.setLevel(log_level)105# 本地模型106else:107from configs.model_config import VLLM_MODEL_DICT108if kwargs["model_names"][0] in VLLM_MODEL_DICT and args.infer_turbo == "vllm":109import fastchat.serve.vllm_worker110from fastchat.serve.vllm_worker import VLLMWorker, app, worker_id111from vllm import AsyncLLMEngine112from vllm.engine.arg_utils import AsyncEngineArgs113
114args.tokenizer = args.model_path115args.tokenizer_mode = 'auto'116args.trust_remote_code = True117args.download_dir = None118args.load_format = 'auto'119args.dtype = 'auto'120args.seed = 0121args.worker_use_ray = False122args.pipeline_parallel_size = 1123args.tensor_parallel_size = 1124args.block_size = 16125args.swap_space = 4 # GiB126args.gpu_memory_utilization = 0.90127args.max_num_batched_tokens = None # 一个批次中的最大令牌(tokens)数量,这个取决于你的显卡和大模型设置,设置太大显存会不够128args.max_num_seqs = 256129args.disable_log_stats = False130args.conv_template = None131args.limit_worker_concurrency = 5132args.no_register = False133args.num_gpus = 1 # vllm worker的切分是tensor并行,这里填写显卡的数量134args.engine_use_ray = False135args.disable_log_requests = False136
137# 0.2.1 vllm后要加的参数, 但是这里不需要138args.max_model_len = None139args.revision = None140args.quantization = None141args.max_log_len = None142args.tokenizer_revision = None143
144# 0.2.2 vllm需要新加的参数145args.max_paddings = 256146
147if args.model_path:148args.model = args.model_path149if args.num_gpus > 1:150args.tensor_parallel_size = args.num_gpus151
152for k, v in kwargs.items():153setattr(args, k, v)154
155engine_args = AsyncEngineArgs.from_cli_args(args)156engine = AsyncLLMEngine.from_engine_args(engine_args)157
158worker = VLLMWorker(159controller_addr=args.controller_address,160worker_addr=args.worker_address,161worker_id=worker_id,162model_path=args.model_path,163model_names=args.model_names,164limit_worker_concurrency=args.limit_worker_concurrency,165no_register=args.no_register,166llm_engine=engine,167conv_template=args.conv_template,168)169sys.modules["fastchat.serve.vllm_worker"].engine = engine170sys.modules["fastchat.serve.vllm_worker"].worker = worker171sys.modules["fastchat.serve.vllm_worker"].logger.setLevel(log_level)172
173else:174from fastchat.serve.model_worker import app, GptqConfig, AWQConfig, ModelWorker, worker_id175
176args.gpus = "0" # GPU的编号,如果有多个GPU,可以设置为"0,1,2,3"177args.max_gpu_memory = "22GiB"178args.num_gpus = 1 # model worker的切分是model并行,这里填写显卡的数量179
180args.load_8bit = False181args.cpu_offloading = None182args.gptq_ckpt = None183args.gptq_wbits = 16184args.gptq_groupsize = -1185args.gptq_act_order = False186args.awq_ckpt = None187args.awq_wbits = 16188args.awq_groupsize = -1189args.model_names = [""]190args.conv_template = None191args.limit_worker_concurrency = 5192args.stream_interval = 2193args.no_register = False194args.embed_in_truncate = False195for k, v in kwargs.items():196setattr(args, k, v)197if args.gpus:198if args.num_gpus is None:199args.num_gpus = len(args.gpus.split(','))200if len(args.gpus.split(",")) < args.num_gpus:201raise ValueError(202f"Larger --num-gpus ({args.num_gpus}) than --gpus {args.gpus}!"203)204os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus205gptq_config = GptqConfig(206ckpt=args.gptq_ckpt or args.model_path,207wbits=args.gptq_wbits,208groupsize=args.gptq_groupsize,209act_order=args.gptq_act_order,210)211awq_config = AWQConfig(212ckpt=args.awq_ckpt or args.model_path,213wbits=args.awq_wbits,214groupsize=args.awq_groupsize,215)216
217worker = ModelWorker(218controller_addr=args.controller_address,219worker_addr=args.worker_address,220worker_id=worker_id,221model_path=args.model_path,222model_names=args.model_names,223limit_worker_concurrency=args.limit_worker_concurrency,224no_register=args.no_register,225device=args.device,226num_gpus=args.num_gpus,227max_gpu_memory=args.max_gpu_memory,228load_8bit=args.load_8bit,229cpu_offloading=args.cpu_offloading,230gptq_config=gptq_config,231awq_config=awq_config,232stream_interval=args.stream_interval,233conv_template=args.conv_template,234embed_in_truncate=args.embed_in_truncate,235)236sys.modules["fastchat.serve.model_worker"].args = args237sys.modules["fastchat.serve.model_worker"].gptq_config = gptq_config238# sys.modules["fastchat.serve.model_worker"].worker = worker239sys.modules["fastchat.serve.model_worker"].logger.setLevel(log_level)240
241MakeFastAPIOffline(app)242app.title = f"FastChat LLM Server ({args.model_names[0]})"243app._worker = worker244return app245
246
247def create_openai_api_app(248controller_address: str,249api_keys: List = [],250log_level: str = "INFO",251) -> FastAPI:252import fastchat.constants253fastchat.constants.LOGDIR = LOG_PATH254from fastchat.serve.openai_api_server import app, CORSMiddleware, app_settings255from fastchat.utils import build_logger256logger = build_logger("openai_api", "openai_api.log")257logger.setLevel(log_level)258
259app.add_middleware(260CORSMiddleware,261allow_credentials=True,262allow_origins=["*"],263allow_methods=["*"],264allow_headers=["*"],265)266
267sys.modules["fastchat.serve.openai_api_server"].logger = logger268app_settings.controller_address = controller_address269app_settings.api_keys = api_keys270
271MakeFastAPIOffline(app)272app.title = "FastChat OpeanAI API Server"273return app274
275
276def _set_app_event(app: FastAPI, started_event: mp.Event = None):277@app.on_event("startup")278async def on_startup():279if started_event is not None:280started_event.set()281
282
283def run_controller(log_level: str = "INFO", started_event: mp.Event = None):284import uvicorn285import httpx286from fastapi import Body287import time288import sys289from server.utils import set_httpx_config290set_httpx_config()291
292app = create_controller_app(293dispatch_method=FSCHAT_CONTROLLER.get("dispatch_method"),294log_level=log_level,295)296_set_app_event(app, started_event)297
298# add interface to release and load model worker299@app.post("/release_worker")300def release_worker(301model_name: str = Body(..., description="要释放模型的名称", samples=["chatglm-6b"]),302# worker_address: str = Body(None, description="要释放模型的地址,与名称二选一", samples=[FSCHAT_CONTROLLER_address()]),303new_model_name: str = Body(None, description="释放后加载该模型"),304keep_origin: bool = Body(False, description="不释放原模型,加载新模型")305) -> Dict:306available_models = app._controller.list_models()307if new_model_name in available_models:308msg = f"要切换的LLM模型 {new_model_name} 已经存在"309logger.info(msg)310return {"code": 500, "msg": msg}311
312if new_model_name:313logger.info(f"开始切换LLM模型:从 {model_name} 到 {new_model_name}")314else:315logger.info(f"即将停止LLM模型: {model_name}")316
317if model_name not in available_models:318msg = f"the model {model_name} is not available"319logger.error(msg)320return {"code": 500, "msg": msg}321
322worker_address = app._controller.get_worker_address(model_name)323if not worker_address:324msg = f"can not find model_worker address for {model_name}"325logger.error(msg)326return {"code": 500, "msg": msg}327
328with get_httpx_client() as client:329r = client.post(worker_address + "/release",330json={"new_model_name": new_model_name, "keep_origin": keep_origin})331if r.status_code != 200:332msg = f"failed to release model: {model_name}"333logger.error(msg)334return {"code": 500, "msg": msg}335
336if new_model_name:337timer = HTTPX_DEFAULT_TIMEOUT # wait for new model_worker register338while timer > 0:339models = app._controller.list_models()340if new_model_name in models:341break342time.sleep(1)343timer -= 1344if timer > 0:345msg = f"sucess change model from {model_name} to {new_model_name}"346logger.info(msg)347return {"code": 200, "msg": msg}348else:349msg = f"failed change model from {model_name} to {new_model_name}"350logger.error(msg)351return {"code": 500, "msg": msg}352else:353msg = f"sucess to release model: {model_name}"354logger.info(msg)355return {"code": 200, "msg": msg}356
357host = FSCHAT_CONTROLLER["host"]358port = FSCHAT_CONTROLLER["port"]359
360if log_level == "ERROR":361sys.stdout = sys.__stdout__362sys.stderr = sys.__stderr__363
364uvicorn.run(app, host=host, port=port, log_level=log_level.lower())365
366
367def run_model_worker(368model_name: str = LLM_MODELS[0],369controller_address: str = "",370log_level: str = "INFO",371q: mp.Queue = None,372started_event: mp.Event = None,373):374import uvicorn375from fastapi import Body376import sys377from server.utils import set_httpx_config378set_httpx_config()379
380kwargs = get_model_worker_config(model_name)381host = kwargs.pop("host")382port = kwargs.pop("port")383kwargs["model_names"] = [model_name]384kwargs["controller_address"] = controller_address or fschat_controller_address()385kwargs["worker_address"] = fschat_model_worker_address(model_name)386model_path = kwargs.get("model_path", "")387kwargs["model_path"] = model_path388
389app = create_model_worker_app(log_level=log_level, **kwargs)390_set_app_event(app, started_event)391if log_level == "ERROR":392sys.stdout = sys.__stdout__393sys.stderr = sys.__stderr__394
395# add interface to release and load model396@app.post("/release")397def release_model(398new_model_name: str = Body(None, description="释放后加载该模型"),399keep_origin: bool = Body(False, description="不释放原模型,加载新模型")400) -> Dict:401if keep_origin:402if new_model_name:403q.put([model_name, "start", new_model_name])404else:405if new_model_name:406q.put([model_name, "replace", new_model_name])407else:408q.put([model_name, "stop", None])409return {"code": 200, "msg": "done"}410
411uvicorn.run(app, host=host, port=port, log_level=log_level.lower())412
413
414def run_openai_api(log_level: str = "INFO", started_event: mp.Event = None):415import uvicorn416import sys417from server.utils import set_httpx_config418set_httpx_config()419
420controller_addr = fschat_controller_address()421app = create_openai_api_app(controller_addr, log_level=log_level)422_set_app_event(app, started_event)423
424host = FSCHAT_OPENAI_API["host"]425port = FSCHAT_OPENAI_API["port"]426if log_level == "ERROR":427sys.stdout = sys.__stdout__428sys.stderr = sys.__stderr__429uvicorn.run(app, host=host, port=port)430
431
432def run_api_server(started_event: mp.Event = None, run_mode: str = None):433from server.api import create_app434import uvicorn435from server.utils import set_httpx_config436set_httpx_config()437
438app = create_app(run_mode=run_mode)439_set_app_event(app, started_event)440
441host = API_SERVER["host"]442port = API_SERVER["port"]443
444uvicorn.run(app, host=host, port=port)445
446
447def run_webui(started_event: mp.Event = None, run_mode: str = None):448from server.utils import set_httpx_config449set_httpx_config()450
451host = WEBUI_SERVER["host"]452port = WEBUI_SERVER["port"]453
454cmd = ["streamlit", "run", "webui.py",455"--server.address", host,456"--server.port", str(port),457"--theme.base", "light",458"--theme.primaryColor", "#165dff",459"--theme.secondaryBackgroundColor", "#f5f5f5",460"--theme.textColor", "#000000",461]462if run_mode == "lite":463cmd += [464"--",465"lite",466]467p = subprocess.Popen(cmd)468started_event.set()469p.wait()470
471
472def parse_args() -> argparse.ArgumentParser:473parser = argparse.ArgumentParser()474parser.add_argument(475"-a",476"--all-webui",477action="store_true",478help="run fastchat's controller/openai_api/model_worker servers, run api.py and webui.py",479dest="all_webui",480)481parser.add_argument(482"--all-api",483action="store_true",484help="run fastchat's controller/openai_api/model_worker servers, run api.py",485dest="all_api",486)487parser.add_argument(488"--llm-api",489action="store_true",490help="run fastchat's controller/openai_api/model_worker servers",491dest="llm_api",492)493parser.add_argument(494"-o",495"--openai-api",496action="store_true",497help="run fastchat's controller/openai_api servers",498dest="openai_api",499)500parser.add_argument(501"-m",502"--model-worker",503action="store_true",504help="run fastchat's model_worker server with specified model name. "505"specify --model-name if not using default LLM_MODELS",506dest="model_worker",507)508parser.add_argument(509"-n",510"--model-name",511type=str,512nargs="+",513default=LLM_MODELS,514help="specify model name for model worker. "515"add addition names with space seperated to start multiple model workers.",516dest="model_name",517)518parser.add_argument(519"-c",520"--controller",521type=str,522help="specify controller address the worker is registered to. default is FSCHAT_CONTROLLER",523dest="controller_address",524)525parser.add_argument(526"--api",527action="store_true",528help="run api.py server",529dest="api",530)531parser.add_argument(532"-p",533"--api-worker",534action="store_true",535help="run online model api such as zhipuai",536dest="api_worker",537)538parser.add_argument(539"-w",540"--webui",541action="store_true",542help="run webui.py server",543dest="webui",544)545parser.add_argument(546"-q",547"--quiet",548action="store_true",549help="减少fastchat服务log信息",550dest="quiet",551)552parser.add_argument(553"-i",554"--lite",555action="store_true",556help="以Lite模式运行:仅支持在线API的LLM对话、搜索引擎对话",557dest="lite",558)559args = parser.parse_args()560return args, parser561
562
563def dump_server_info(after_start=False, args=None):564import platform565import langchain566import fastchat567from server.utils import api_address, webui_address568
569print("\n")570print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30)571print(f"操作系统:{platform.platform()}.")572print(f"python版本:{sys.version}")573print(f"项目版本:{VERSION}")574print(f"langchain版本:{langchain.__version__}. fastchat版本:{fastchat.__version__}")575print("\n")576
577models = LLM_MODELS578if args and args.model_name:579models = args.model_name580
581print(f"当前使用的分词器:{TEXT_SPLITTER_NAME}")582print(f"当前启动的LLM模型:{models} @ {llm_device()}")583
584for model in models:585pprint(get_model_worker_config(model))586print(f"当前Embbedings模型: {EMBEDDING_MODEL} @ {embedding_device()}")587
588if after_start:589print("\n")590print(f"服务端运行信息:")591if args.openai_api:592print(f" OpenAI API Server: {fschat_openai_api_address()}")593if args.api:594print(f" Chatchat API Server: {api_address()}")595if args.webui:596print(f" Chatchat WEBUI Server: {webui_address()}")597print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30)598print("\n")599
600
601async def start_main_server():602import time603import signal604
605def handler(signalname):606"""607Python 3.9 has `signal.strsignal(signalnum)` so this closure would not be needed.
608Also, 3.8 includes `signal.valid_signals()` that can be used to create a mapping for the same purpose.
609"""
610
611def f(signal_received, frame):612raise KeyboardInterrupt(f"{signalname} received")613
614return f615
616# This will be inherited by the child process if it is forked (not spawned)617signal.signal(signal.SIGINT, handler("SIGINT"))618signal.signal(signal.SIGTERM, handler("SIGTERM"))619
620mp.set_start_method("spawn")621manager = mp.Manager()622run_mode = None623
624queue = manager.Queue()625args, parser = parse_args()626
627if args.all_webui:628args.openai_api = True629args.model_worker = True630args.api = True631args.api_worker = True632args.webui = True633
634elif args.all_api:635args.openai_api = True636args.model_worker = True637args.api = True638args.api_worker = True639args.webui = False640
641elif args.llm_api:642args.openai_api = True643args.model_worker = True644args.api_worker = True645args.api = False646args.webui = False647
648if args.lite:649args.model_worker = False650run_mode = "lite"651
652dump_server_info(args=args)653
654if len(sys.argv) > 1:655logger.info(f"正在启动服务:")656logger.info(f"如需查看 llm_api 日志,请前往 {LOG_PATH}")657
658processes = {"online_api": {}, "model_worker": {}}659
660def process_count():661return len(processes) + len(processes["online_api"]) + len(processes["model_worker"]) - 2662
663if args.quiet or not log_verbose:664log_level = "ERROR"665else:666log_level = "INFO"667
668controller_started = manager.Event()669if args.openai_api:670process = Process(671target=run_controller,672name=f"controller",673kwargs=dict(log_level=log_level, started_event=controller_started),674daemon=True,675)676processes["controller"] = process677
678process = Process(679target=run_openai_api,680name=f"openai_api",681daemon=True,682)683processes["openai_api"] = process684
685model_worker_started = []686if args.model_worker:687for model_name in args.model_name:688config = get_model_worker_config(model_name)689if not config.get("online_api"):690e = manager.Event()691model_worker_started.append(e)692process = Process(693target=run_model_worker,694name=f"model_worker - {model_name}",695kwargs=dict(model_name=model_name,696controller_address=args.controller_address,697log_level=log_level,698q=queue,699started_event=e),700daemon=True,701)702processes["model_worker"][model_name] = process703
704if args.api_worker:705for model_name in args.model_name:706config = get_model_worker_config(model_name)707if (config.get("online_api")708and config.get("worker_class")709and model_name in FSCHAT_MODEL_WORKERS):710e = manager.Event()711model_worker_started.append(e)712process = Process(713target=run_model_worker,714name=f"api_worker - {model_name}",715kwargs=dict(model_name=model_name,716controller_address=args.controller_address,717log_level=log_level,718q=queue,719started_event=e),720daemon=True,721)722processes["online_api"][model_name] = process723
724api_started = manager.Event()725if args.api:726process = Process(727target=run_api_server,728name=f"API Server",729kwargs=dict(started_event=api_started, run_mode=run_mode),730daemon=True,731)732processes["api"] = process733
734webui_started = manager.Event()735if args.webui:736process = Process(737target=run_webui,738name=f"WEBUI Server",739kwargs=dict(started_event=webui_started, run_mode=run_mode),740daemon=True,741)742processes["webui"] = process743
744if process_count() == 0:745parser.print_help()746else:747try:748# 保证任务收到SIGINT后,能够正常退出749if p := processes.get("controller"):750p.start()751p.name = f"{p.name} ({p.pid})"752controller_started.wait() # 等待controller启动完成753
754if p := processes.get("openai_api"):755p.start()756p.name = f"{p.name} ({p.pid})"757
758for n, p in processes.get("model_worker", {}).items():759p.start()760p.name = f"{p.name} ({p.pid})"761
762for n, p in processes.get("online_api", []).items():763p.start()764p.name = f"{p.name} ({p.pid})"765
766for e in model_worker_started:767e.wait()768
769if p := processes.get("api"):770p.start()771p.name = f"{p.name} ({p.pid})"772api_started.wait()773
774if p := processes.get("webui"):775p.start()776p.name = f"{p.name} ({p.pid})"777webui_started.wait()778
779dump_server_info(after_start=True, args=args)780
781while True:782cmd = queue.get()783e = manager.Event()784if isinstance(cmd, list):785model_name, cmd, new_model_name = cmd786if cmd == "start": # 运行新模型787logger.info(f"准备启动新模型进程:{new_model_name}")788process = Process(789target=run_model_worker,790name=f"model_worker - {new_model_name}",791kwargs=dict(model_name=new_model_name,792controller_address=args.controller_address,793log_level=log_level,794q=queue,795started_event=e),796daemon=True,797)798process.start()799process.name = f"{process.name} ({process.pid})"800processes["model_worker"][new_model_name] = process801e.wait()802logger.info(f"成功启动新模型进程:{new_model_name}")803elif cmd == "stop":804if process := processes["model_worker"].get(model_name):805time.sleep(1)806process.terminate()807process.join()808logger.info(f"停止模型进程:{model_name}")809else:810logger.error(f"未找到模型进程:{model_name}")811elif cmd == "replace":812if process := processes["model_worker"].pop(model_name, None):813logger.info(f"停止模型进程:{model_name}")814start_time = datetime.now()815time.sleep(1)816process.terminate()817process.join()818process = Process(819target=run_model_worker,820name=f"model_worker - {new_model_name}",821kwargs=dict(model_name=new_model_name,822controller_address=args.controller_address,823log_level=log_level,824q=queue,825started_event=e),826daemon=True,827)828process.start()829process.name = f"{process.name} ({process.pid})"830processes["model_worker"][new_model_name] = process831e.wait()832timing = datetime.now() - start_time833logger.info(f"成功启动新模型进程:{new_model_name}。用时:{timing}。")834else:835logger.error(f"未找到模型进程:{model_name}")836
837# for process in processes.get("model_worker", {}).values():838# process.join()839# for process in processes.get("online_api", {}).values():840# process.join()841
842# for name, process in processes.items():843# if name not in ["model_worker", "online_api"]:844# if isinstance(p, dict):845# for work_process in p.values():846# work_process.join()847# else:848# process.join()849except Exception as e:850logger.error(e)851logger.warning("Caught KeyboardInterrupt! Setting stop event...")852finally:853
854for p in processes.values():855logger.warning("Sending SIGKILL to %s", p)856# Queues and other inter-process communication primitives can break when857# process is killed, but we don't care here858
859if isinstance(p, dict):860for process in p.values():861process.kill()862else:863p.kill()864
865for p in processes.values():866logger.info("Process status: %s", p)867
868
869if __name__ == "__main__":870create_tables()871if sys.version_info < (3, 10):872loop = asyncio.get_event_loop()873else:874try:875loop = asyncio.get_running_loop()876except RuntimeError:877loop = asyncio.new_event_loop()878
879asyncio.set_event_loop(loop)880
881loop.run_until_complete(start_main_server())882
883# 服务启动后接口调用示例:
884# import openai
885# openai.api_key = "EMPTY" # Not support yet
886# openai.api_base = "http://localhost:8888/v1"
887
888# model = "chatglm3-6b"
889
890# # create a chat completion
891# completion = openai.ChatCompletion.create(
892# model=model,
893# messages=[{"role": "user", "content": "Hello! What is your name?"}]
894# )
895# # print the completion
896# print(completion.choices[0].message.content)
897