llm-applications
30 строк · 1.1 Кб
1import os
2
3import numpy as np
4import psycopg
5from pgvector.psycopg import register_vector
6
7
8def semantic_search(query, embedding_model, k):
9embedding = np.array(embedding_model.embed_query(query))
10with psycopg.connect(os.environ["DB_CONNECTION_STRING"]) as conn:
11register_vector(conn)
12with conn.cursor() as cur:
13cur.execute(
14"SELECT * FROM document ORDER BY embedding <=> %s LIMIT %s",
15(embedding, k),
16)
17rows = cur.fetchall()
18semantic_context = [{"id": row[0], "text": row[1], "source": row[2]} for row in rows]
19return semantic_context
20
21
22def lexical_search(index, query, chunks, k):
23query_tokens = query.lower().split() # preprocess query
24scores = index.get_scores(query_tokens) # get best matching (BM) scores
25indices = sorted(range(len(scores)), key=lambda i: -scores[i])[:k] # sort and get top k
26lexical_context = [
27{"id": chunks[i][0], "text": chunks[i][1], "source": chunks[i][2], "score": scores[i]}
28for i in indices
29]
30return lexical_context
31