llm-applications

search.py

30 строк · 1.1 Кб

Перенос по словам

1
import os
2

3
import numpy as np
4
import psycopg
5
from pgvector.psycopg import register_vector
6

7

8
def semantic_search(query, embedding_model, k):
9
    embedding = np.array(embedding_model.embed_query(query))
10
    with psycopg.connect(os.environ["DB_CONNECTION_STRING"]) as conn:
11
        register_vector(conn)
12
        with conn.cursor() as cur:
13
            cur.execute(
14
                "SELECT * FROM document ORDER BY embedding <=> %s LIMIT %s",
15
                (embedding, k),
16
            )
17
            rows = cur.fetchall()
18
            semantic_context = [{"id": row[0], "text": row[1], "source": row[2]} for row in rows]
19
    return semantic_context
20

21

22
def lexical_search(index, query, chunks, k):
23
    query_tokens = query.lower().split()  # preprocess query
24
    scores = index.get_scores(query_tokens)  # get best matching (BM) scores
25
    indices = sorted(range(len(scores)), key=lambda i: -scores[i])[:k]  # sort and get top k
26
    lexical_context = [
27
        {"id": chunks[i][0], "text": chunks[i][1], "source": chunks[i][2], "score": scores[i]}
28
        for i in indices
29
    ]
30
    return lexical_context
31

llm-applications

Использование cookies