Langchain-Chatchat
/
init_database.py
120 строк · 3.7 Кб
1import sys2sys.path.append(".")3from server.knowledge_base.migrate import (create_tables, reset_tables, import_from_db,4folder2db, prune_db_docs, prune_folder_files)5from configs.model_config import NLTK_DATA_PATH, EMBEDDING_MODEL6import nltk7nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path8from datetime import datetime9
10
11if __name__ == "__main__":12import argparse13
14parser = argparse.ArgumentParser(description="please specify only one operate method once time.")15
16parser.add_argument(17"-r",18"--recreate-vs",19action="store_true",20help=('''21recreate vector store.
22use this option if you have copied document files to the content folder, but vector store has not been populated or DEFAUL_VS_TYPE/EMBEDDING_MODEL changed.
23'''
24)25)26parser.add_argument(27"--create-tables",28action="store_true",29help=("create empty tables if not existed")30)31parser.add_argument(32"--clear-tables",33action="store_true",34help=("create empty tables, or drop the database tables before recreate vector stores")35)36parser.add_argument(37"--import-db",38help="import tables from specified sqlite database"39)40parser.add_argument(41"-u",42"--update-in-db",43action="store_true",44help=('''45update vector store for files exist in database.
46use this option if you want to recreate vectors for files exist in db and skip files exist in local folder only.
47'''
48)49)50parser.add_argument(51"-i",52"--increment",53action="store_true",54help=('''55update vector store for files exist in local folder and not exist in database.
56use this option if you want to create vectors incrementally.
57'''
58)59)60parser.add_argument(61"--prune-db",62action="store_true",63help=('''64delete docs in database that not existed in local folder.
65it is used to delete database docs after user deleted some doc files in file browser
66'''
67)68)69parser.add_argument(70"--prune-folder",71action="store_true",72help=('''73delete doc files in local folder that not existed in database.
74is is used to free local disk space by delete unused doc files.
75'''
76)77)78parser.add_argument(79"-n",80"--kb-name",81type=str,82nargs="+",83default=[],84help=("specify knowledge base names to operate on. default is all folders exist in KB_ROOT_PATH.")85)86parser.add_argument(87"-e",88"--embed-model",89type=str,90default=EMBEDDING_MODEL,91help=("specify embeddings model.")92)93
94args = parser.parse_args()95start_time = datetime.now()96
97if args.create_tables:98create_tables() # confirm tables exist99
100if args.clear_tables:101reset_tables()102print("database tables reset")103
104if args.recreate_vs:105create_tables()106print("recreating all vector stores")107folder2db(kb_names=args.kb_name, mode="recreate_vs", embed_model=args.embed_model)108elif args.import_db:109import_from_db(args.import_db)110elif args.update_in_db:111folder2db(kb_names=args.kb_name, mode="update_in_db", embed_model=args.embed_model)112elif args.increment:113folder2db(kb_names=args.kb_name, mode="increment", embed_model=args.embed_model)114elif args.prune_db:115prune_db_docs(args.kb_name)116elif args.prune_folder:117prune_folder_files(args.kb_name)118
119end_time = datetime.now()120print(f"总计用时: {end_time-start_time}")121