MetaGPT

mock.py
289 строк · 11.6 Кб
Перенос по словам
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
"""
4
@Time    : 2023/5/12 13:05
5
@Author  : alexanderwu
6
@File    : mock_markdown.py
7
"""
8
import json
9

10
from metagpt.actions import UserRequirement, WriteDesign, WritePRD, WriteTasks
11
from metagpt.schema import Message
12

13
USER_REQUIREMENT = """开发一个基于大语言模型与私有知识库的搜索引擎，希望可以基于大语言模型进行搜索总结"""
14

15
DETAIL_REQUIREMENT = """需求：开发一个基于LLM（大语言模型）与私有知识库的搜索引擎，希望有几点能力
16
1. 用户可以在私有知识库进行搜索，再根据大语言模型进行总结，输出的结果包括了总结
17
2. 私有知识库可以实时更新，底层基于 ElasticSearch
18
3. 私有知识库支持pdf、word、txt等各种文件格式上传，上传后可以在服务端解析为文本，存储ES
19

20
资源：
21
1. 大语言模型已经有前置的抽象、部署，可以通过 `from metagpt.llm import LLM`，再使用`LLM().ask(prompt)`直接调用
22
2. Elastic已有[部署](http://192.168.50.82:9200/)，代码可以直接使用这个部署"""
23

24

25
PRD = '''## 原始需求
26
```python
27
"""
28
我们希望开发一个基于大语言模型与私有知识库的搜索引擎。该搜索引擎应当能根据用户输入的查询进行智能搜索，并基于大语言模型对搜索结果进行总结，以便用户能够快速获取他们所需要的信息。该搜索引擎应当能够处理大规模的数据，同时保持搜索结果的准确性和相关性。我们希望这个产品能够降低用户在查找、筛选和理解信息时的工作负担，提高他们的工作效率。
29
"""
30
```
31

32
## 产品目标
33
```python
34
[
35
    "提供高准确性、高相关性的搜索结果，满足用户的查询需求",
36
    "基于大语言模型对搜索结果进行智能总结，帮助用户快速获取所需信息",
37
    "处理大规模数据，保证搜索的速度和效率，提高用户的工作效率"
38
]
39
```
40

41
## 用户故事
42
```python
43
[
44
    "假设用户是一名研究员，他正在为一项关于全球气候变化的报告做研究。他输入了'全球气候变化的最新研究'，我们的搜索引擎快速返回了相关的文章、报告、数据集等。并且基于大语言模型对这些信息进行了智能总结，研究员可以快速了解到最新的研究趋势和发现。",
45
    "用户是一名学生，正在为即将到来的历史考试复习。他输入了'二战的主要战役'，搜索引擎返回了相关的资料，大语言模型总结出主要战役的时间、地点、结果等关键信息，帮助学生快速记忆。",
46
    "用户是一名企业家，他正在寻找关于最新的市场趋势信息。他输入了'2023年人工智能市场趋势'，搜索引擎返回了各种报告、新闻和分析文章。大语言模型对这些信息进行了总结，用户能够快速了解到市场的最新动态和趋势。"
47
]
48
```
49

50
## 竞品分析
51
```python
52
[
53
    "Google Search：Google搜索是市场上最主要的搜索引擎，它能够提供海量的搜索结果。但Google搜索并不提供搜索结果的总结功能，用户需要自己去阅读和理解搜索结果。",
54
    "Microsoft Bing：Bing搜索也能提供丰富的搜索结果，同样没有提供搜索结果的总结功能。",
55
    "Wolfram Alpha：Wolfram Alpha是一个基于知识库的计算型搜索引擎，能够针对某些特定类型的查询提供直接的答案和总结，但它的知识库覆盖范围有限，无法处理大规模的数据。"
56
]
57
```
58

59
## 开发需求池
60
```python
61
[
62
    ("开发基于大语言模型的智能总结功能", 5),
63
    ("开发搜索引擎核心算法，包括索引构建、查询处理、结果排序等", 7),
64
    ("设计和实现用户界面，包括查询输入、搜索结果展示、总结结果展示等", 3),
65
    ("构建和维护私有知识库，包括数据采集、清洗、更新等", 7),
66
    ("优化搜索引擎性能，包括搜索速度、准确性、相关性等", 6),
67
    ("开发用户反馈机制，包括反馈界面、反馈处理等", 2),
68
    ("开发安全防护机制，防止恶意查询和攻击", 3),
69
    ("集成大语言模型，包括模型选择、优化、更新等", 5),
70
    ("进行大规模的测试，包括功能测试、性能测试、压力测试等", 5),
71
    ("开发数据监控和日志系统，用于监控搜索引擎的运行状态和性能", 4)
72
]
73
```
74
'''
75

76
SYSTEM_DESIGN = """## Project name
77
```python
78
"smart_search_engine"
79
```
80

81
## Task list:
82
```python
83
[
84
    "smart_search_engine/__init__.py",
85
    "smart_search_engine/main.py",
86
    "smart_search_engine/search.py",
87
    "smart_search_engine/index.py",
88
    "smart_search_engine/ranking.py",
89
    "smart_search_engine/summary.py",
90
    "smart_search_engine/knowledge_base.py",
91
    "smart_search_engine/interface.py",
92
    "smart_search_engine/user_feedback.py",
93
    "smart_search_engine/security.py",
94
    "smart_search_engine/testing.py",
95
    "smart_search_engine/monitoring.py"
96
]
97
```
98

99
## Data structures and interfaces
100
```mermaid
101
classDiagram
102
    class Main {
103
        -SearchEngine search_engine
104
        +main() str
105
    }
106
    class SearchEngine {
107
        -Index index
108
        -Ranking ranking
109
        -Summary summary
110
        +search(query: str) str
111
    }
112
    class Index {
113
        -KnowledgeBase knowledge_base
114
        +create_index(data: dict)
115
        +query_index(query: str) list
116
    }
117
    class Ranking {
118
        +rank_results(results: list) list
119
    }
120
    class Summary {
121
        +summarize_results(results: list) str
122
    }
123
    class KnowledgeBase {
124
        +update(data: dict)
125
        +fetch_data(query: str) dict
126
    }
127
    Main --> SearchEngine
128
    SearchEngine --> Index
129
    SearchEngine --> Ranking
130
    SearchEngine --> Summary
131
    Index --> KnowledgeBase
132
```
133

134
## Program call flow
135
```mermaid
136
sequenceDiagram
137
    participant M as Main
138
    participant SE as SearchEngine
139
    participant I as Index
140
    participant R as Ranking
141
    participant S as Summary
142
    participant KB as KnowledgeBase
143
    M->>SE: search(query)
144
    SE->>I: query_index(query)
145
    I->>KB: fetch_data(query)
146
    KB-->>I: return data
147
    I-->>SE: return results
148
    SE->>R: rank_results(results)
149
    R-->>SE: return ranked_results
150
    SE->>S: summarize_results(ranked_results)
151
    S-->>SE: return summary
152
    SE-->>M: return summary
153
```
154
"""
155

156
JSON_TASKS = {
157
    "Logic Analysis": """
158
    在这个项目中，所有的模块都依赖于“SearchEngine”类，这是主入口，其他的模块（Index、Ranking和Summary）都通过它交互。另外，"Index"类又依赖于"KnowledgeBase"类，因为它需要从知识库中获取数据。
159

160
- "main.py"包含"Main"类，是程序的入口点，它调用"SearchEngine"进行搜索操作，所以在其他任何模块之前，"SearchEngine"必须首先被定义。
161
- "search.py"定义了"SearchEngine"类，它依赖于"Index"、"Ranking"和"Summary"，因此，这些模块需要在"search.py"之前定义。
162
- "index.py"定义了"Index"类，它从"knowledge_base.py"获取数据来创建索引，所以"knowledge_base.py"需要在"index.py"之前定义。
163
- "ranking.py"和"summary.py"相对独立，只需确保在"search.py"之前定义。
164
- "knowledge_base.py"是独立的模块，可以优先开发。
165
- "interface.py"、"user_feedback.py"、"security.py"、"testing.py"和"monitoring.py"看起来像是功能辅助模块，可以在主要功能模块开发完成后并行开发。
166
    """,
167
    "Task list": [
168
        "smart_search_engine/knowledge_base.py",
169
        "smart_search_engine/index.py",
170
        "smart_search_engine/ranking.py",
171
        "smart_search_engine/summary.py",
172
        "smart_search_engine/search.py",
173
        "smart_search_engine/main.py",
174
        "smart_search_engine/interface.py",
175
        "smart_search_engine/user_feedback.py",
176
        "smart_search_engine/security.py",
177
        "smart_search_engine/testing.py",
178
        "smart_search_engine/monitoring.py",
179
    ],
180
}
181

182

183
TASKS = """## Logic Analysis
184

185
在这个项目中，所有的模块都依赖于“SearchEngine”类，这是主入口，其他的模块（Index、Ranking和Summary）都通过它交互。另外，"Index"类又依赖于"KnowledgeBase"类，因为它需要从知识库中获取数据。
186

187
- "main.py"包含"Main"类，是程序的入口点，它调用"SearchEngine"进行搜索操作，所以在其他任何模块之前，"SearchEngine"必须首先被定义。
188
- "search.py"定义了"SearchEngine"类，它依赖于"Index"、"Ranking"和"Summary"，因此，这些模块需要在"search.py"之前定义。
189
- "index.py"定义了"Index"类，它从"knowledge_base.py"获取数据来创建索引，所以"knowledge_base.py"需要在"index.py"之前定义。
190
- "ranking.py"和"summary.py"相对独立，只需确保在"search.py"之前定义。
191
- "knowledge_base.py"是独立的模块，可以优先开发。
192
- "interface.py"、"user_feedback.py"、"security.py"、"testing.py"和"monitoring.py"看起来像是功能辅助模块，可以在主要功能模块开发完成后并行开发。
193

194
## Task list
195

196
```python
197
task_list = [
198
    "smart_search_engine/knowledge_base.py",
199
    "smart_search_engine/index.py",
200
    "smart_search_engine/ranking.py",
201
    "smart_search_engine/summary.py",
202
    "smart_search_engine/search.py",
203
    "smart_search_engine/main.py",
204
    "smart_search_engine/interface.py",
205
    "smart_search_engine/user_feedback.py",
206
    "smart_search_engine/security.py",
207
    "smart_search_engine/testing.py",
208
    "smart_search_engine/monitoring.py",
209
]
210
```
211
这个任务列表首先定义了最基础的模块，然后是依赖这些模块的模块，最后是辅助模块。可以根据团队的能力和资源，同时开发多个任务，只要满足依赖关系。例如，在开发"search.py"之前，可以同时开发"knowledge_base.py"、"index.py"、"ranking.py"和"summary.py"。
212
"""
213

214

215
TASKS_TOMATO_CLOCK = '''## Required Python third-party packages: Provided in requirements.txt format
216
```python
217
Flask==2.1.1
218
Jinja2==3.1.0
219
Bootstrap==5.3.0-alpha1
220
```
221

222
## Logic Analysis: Provided as a Python str, analyze the dependencies between the files, which work should be done first
223
```python
224
"""
225
1. Start by setting up the Flask app, config.py, and requirements.txt to create the basic structure of the web application.
226
2. Create the timer functionality using JavaScript and the Web Audio API in the timer.js file.
227
3. Develop the frontend templates (index.html and settings.html) using Jinja2 and integrate the timer functionality.
228
4. Add the necessary static files (main.css, main.js, and notification.mp3) for styling and interactivity.
229
5. Implement the ProgressBar class in main.js and integrate it with the Timer class in timer.js.
230
6. Write tests for the application in test_app.py.
231
"""
232
```
233

234
## Task list: Provided as Python list[str], each str is a file, the more at the beginning, the more it is a prerequisite dependency, should be done first
235
```python
236
task_list = [
237
    'app.py',
238
    'config.py',
239
    'requirements.txt',
240
    'static/js/timer.js',
241
    'templates/index.html',
242
    'templates/settings.html',
243
    'static/css/main.css',
244
    'static/js/main.js',
245
    'static/audio/notification.mp3',
246
    'static/js/progressbar.js',
247
    'tests/test_app.py'
248
]
249
```
250
'''
251

252
TASK = """smart_search_engine/knowledge_base.py"""
253

254
STRS_FOR_PARSING = [
255
    """
256
## 1
257
```python
258
a
259
```
260
""",
261
    """
262
##2
263
```python
264
"a"
265
```
266
""",
267
    """
268
##  3
269
```python
270
a = "a"
271
```
272
""",
273
    """
274
## 4
275
```python
276
a =  'a'
277
```
278
""",
279
]
280

281

282
class MockMessages:
283
    req = Message(role="User", content=USER_REQUIREMENT, cause_by=UserRequirement)
284
    prd = Message(role="Product Manager", content=PRD, cause_by=WritePRD)
285
    system_design = Message(role="Architect", content=SYSTEM_DESIGN, cause_by=WriteDesign)
286
    tasks = Message(role="Project Manager", content=TASKS, cause_by=WriteTasks)
287
    json_tasks = Message(
288
        role="Project Manager", content=json.dumps(JSON_TASKS, ensure_ascii=False), cause_by=WriteTasks
289
    )
290
MetaGPT

Использование cookies