Langchain-Chatchat

Форк
0
/
init_database.py 
120 строк · 3.7 Кб
1
import sys
2
sys.path.append(".")
3
from server.knowledge_base.migrate import (create_tables, reset_tables, import_from_db,
4
                                           folder2db, prune_db_docs, prune_folder_files)
5
from configs.model_config import NLTK_DATA_PATH, EMBEDDING_MODEL
6
import nltk
7
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
8
from datetime import datetime
9

10

11
if __name__ == "__main__":
12
    import argparse
13
    
14
    parser = argparse.ArgumentParser(description="please specify only one operate method once time.")
15

16
    parser.add_argument(
17
        "-r",
18
        "--recreate-vs",
19
        action="store_true",
20
        help=('''
21
            recreate vector store.
22
            use this option if you have copied document files to the content folder, but vector store has not been populated or DEFAUL_VS_TYPE/EMBEDDING_MODEL changed.
23
            '''
24
        )
25
    )
26
    parser.add_argument(
27
        "--create-tables",
28
        action="store_true",
29
        help=("create empty tables if not existed")
30
    )
31
    parser.add_argument(
32
        "--clear-tables",
33
        action="store_true",
34
        help=("create empty tables, or drop the database tables before recreate vector stores")
35
    )
36
    parser.add_argument(
37
        "--import-db",
38
        help="import tables from specified sqlite database"
39
    )
40
    parser.add_argument(
41
        "-u",
42
        "--update-in-db",
43
        action="store_true",
44
        help=('''
45
            update vector store for files exist in database.
46
            use this option if you want to recreate vectors for files exist in db and skip files exist in local folder only.
47
            '''
48
        )
49
    )
50
    parser.add_argument(
51
        "-i",
52
        "--increment",
53
        action="store_true",
54
        help=('''
55
            update vector store for files exist in local folder and not exist in database.
56
            use this option if you want to create vectors incrementally.
57
            '''
58
        )
59
    )
60
    parser.add_argument(
61
        "--prune-db",
62
        action="store_true",
63
        help=('''
64
            delete docs in database that not existed in local folder.
65
            it is used to delete database docs after user deleted some doc files in file browser
66
            '''
67
        )
68
    )
69
    parser.add_argument(
70
        "--prune-folder",
71
        action="store_true",
72
        help=('''
73
            delete doc files in local folder that not existed in database.
74
            is is used to free local disk space by delete unused doc files.
75
            '''
76
        )
77
    )
78
    parser.add_argument(
79
        "-n",
80
        "--kb-name",
81
        type=str,
82
        nargs="+",
83
        default=[],
84
        help=("specify knowledge base names to operate on. default is all folders exist in KB_ROOT_PATH.")
85
    )
86
    parser.add_argument(
87
        "-e",
88
        "--embed-model",
89
        type=str,
90
        default=EMBEDDING_MODEL,
91
        help=("specify embeddings model.")
92
    )
93

94
    args = parser.parse_args()
95
    start_time = datetime.now()
96

97
    if args.create_tables:
98
        create_tables() # confirm tables exist
99

100
    if args.clear_tables:
101
        reset_tables()
102
        print("database tables reset")
103

104
    if args.recreate_vs:
105
        create_tables()
106
        print("recreating all vector stores")
107
        folder2db(kb_names=args.kb_name, mode="recreate_vs", embed_model=args.embed_model)
108
    elif args.import_db:
109
        import_from_db(args.import_db)
110
    elif args.update_in_db:
111
        folder2db(kb_names=args.kb_name, mode="update_in_db", embed_model=args.embed_model)
112
    elif args.increment:
113
        folder2db(kb_names=args.kb_name, mode="increment", embed_model=args.embed_model)
114
    elif args.prune_db:
115
        prune_db_docs(args.kb_name)
116
    elif args.prune_folder:
117
        prune_folder_files(args.kb_name)
118

119
    end_time = datetime.now()
120
    print(f"总计用时: {end_time-start_time}")
121

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.