weaviate
159 строк · 4.6 Кб
1// _ _
2// __ _____ __ ___ ___ __ _| |_ ___
3// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
4// \ V V / __/ (_| |\ V /| | (_| | || __/
5// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
6//
7// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
8//
9// CONTACT: hello@weaviate.io
10//
11
12package modclip
13
14import (
15"context"
16"net/http"
17"os"
18"time"
19
20"github.com/weaviate/weaviate/usecases/modulecomponents/batch"
21
22"github.com/pkg/errors"
23"github.com/sirupsen/logrus"
24"github.com/weaviate/weaviate/entities/models"
25"github.com/weaviate/weaviate/entities/modulecapabilities"
26"github.com/weaviate/weaviate/entities/moduletools"
27"github.com/weaviate/weaviate/modules/multi2vec-clip/clients"
28"github.com/weaviate/weaviate/modules/multi2vec-clip/vectorizer"
29)
30
31func New() *ClipModule {
32return &ClipModule{}
33}
34
35type ClipModule struct {
36imageVectorizer imageVectorizer
37nearImageGraphqlProvider modulecapabilities.GraphQLArguments
38nearImageSearcher modulecapabilities.Searcher
39textVectorizer textVectorizer
40nearTextGraphqlProvider modulecapabilities.GraphQLArguments
41nearTextSearcher modulecapabilities.Searcher
42nearTextTransformer modulecapabilities.TextTransform
43metaClient metaClient
44logger logrus.FieldLogger
45}
46
47type metaClient interface {
48MetaInfo() (map[string]interface{}, error)
49}
50
51type imageVectorizer interface {
52Object(ctx context.Context, obj *models.Object, cfg moduletools.ClassConfig) ([]float32, models.AdditionalProperties, error)
53VectorizeImage(ctx context.Context, id, image string, cfg moduletools.ClassConfig) ([]float32, error)
54}
55
56type textVectorizer interface {
57Texts(ctx context.Context, input []string,
58cfg moduletools.ClassConfig) ([]float32, error)
59}
60
61func (m *ClipModule) Name() string {
62return "multi2vec-clip"
63}
64
65func (m *ClipModule) Type() modulecapabilities.ModuleType {
66return modulecapabilities.Multi2Vec
67}
68
69func (m *ClipModule) Init(ctx context.Context,
70params moduletools.ModuleInitParams,
71) error {
72m.logger = params.GetLogger()
73if err := m.initVectorizer(ctx, params.GetConfig().ModuleHttpClientTimeout, params.GetLogger()); err != nil {
74return errors.Wrap(err, "init vectorizer")
75}
76
77if err := m.initNearImage(); err != nil {
78return errors.Wrap(err, "init near text")
79}
80
81return nil
82}
83
84func (m *ClipModule) InitExtension(modules []modulecapabilities.Module) error {
85for _, module := range modules {
86if module.Name() == m.Name() {
87continue
88}
89if arg, ok := module.(modulecapabilities.TextTransformers); ok {
90if arg != nil && arg.TextTransformers() != nil {
91m.nearTextTransformer = arg.TextTransformers()["nearText"]
92}
93}
94}
95
96if err := m.initNearText(); err != nil {
97return errors.Wrap(err, "init near text")
98}
99
100return nil
101}
102
103func (m *ClipModule) initVectorizer(ctx context.Context, timeout time.Duration,
104logger logrus.FieldLogger,
105) error {
106uri := os.Getenv("CLIP_INFERENCE_API")
107if uri == "" {
108return errors.Errorf("required variable CLIP_INFERENCE_API is not set")
109}
110
111client := clients.New(uri, timeout, logger)
112if err := client.WaitForStartup(ctx, 1*time.Second); err != nil {
113return errors.Wrap(err, "init remote vectorizer")
114}
115
116m.imageVectorizer = vectorizer.New(client)
117m.textVectorizer = vectorizer.New(client)
118m.metaClient = client
119
120return nil
121}
122
123func (m *ClipModule) RootHandler() http.Handler {
124// TODO: remove once this is a capability interface
125return nil
126}
127
128func (m *ClipModule) VectorizeObject(ctx context.Context,
129obj *models.Object, cfg moduletools.ClassConfig,
130) ([]float32, models.AdditionalProperties, error) {
131return m.imageVectorizer.Object(ctx, obj, cfg)
132}
133
134func (m *ClipModule) VectorizeBatch(ctx context.Context, objs []*models.Object, skipObject []bool, cfg moduletools.ClassConfig) ([][]float32, []models.AdditionalProperties, map[int]error) {
135return batch.VectorizeBatch(ctx, objs, skipObject, cfg, m.logger, m.imageVectorizer.Object)
136}
137
138func (m *ClipModule) MetaInfo() (map[string]interface{}, error) {
139return m.metaClient.MetaInfo()
140}
141
142func (m *ClipModule) VectorizeInput(ctx context.Context,
143input string, cfg moduletools.ClassConfig,
144) ([]float32, error) {
145return m.textVectorizer.Texts(ctx, []string{input}, cfg)
146}
147
148func (m *ClipModule) VectorizableProperties(cfg moduletools.ClassConfig) (bool, []string, error) {
149ichek := vectorizer.NewClassSettings(cfg)
150mediaProps, err := ichek.Properties()
151return false, mediaProps, err
152}
153
154// verify we implement the modules.Module interface
155var (
156_ = modulecapabilities.Module(New())
157_ = modulecapabilities.Vectorizer(New())
158_ = modulecapabilities.InputVectorizer(New())
159)
160