weaviate
149 строк · 5.0 Кб
1// _ _
2// __ _____ __ ___ ___ __ _| |_ ___
3// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
4// \ V V / __/ (_| |\ V /| | (_| | || __/
5// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
6//
7// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
8//
9// CONTACT: hello@weaviate.io
10//
11
12package modopenai
13
14import (
15"context"
16
17"github.com/weaviate/weaviate/modules/text2vec-openai/ent"
18
19"github.com/weaviate/weaviate/entities/models"
20"github.com/weaviate/weaviate/entities/modulecapabilities"
21"github.com/weaviate/weaviate/entities/moduletools"
22"github.com/weaviate/weaviate/entities/schema"
23)
24
25func (m *OpenAIModule) ClassConfigDefaults() map[string]interface{} {
26return map[string]interface{}{
27"vectorizeClassName": ent.DefaultVectorizeClassName,
28"baseURL": ent.DefaultBaseURL,
29"model": ent.DefaultOpenAIModel,
30}
31}
32
33func (m *OpenAIModule) PropertyConfigDefaults(
34dt *schema.DataType,
35) map[string]interface{} {
36return map[string]interface{}{
37"skip": !ent.DefaultPropertyIndexed,
38"vectorizePropertyName": ent.DefaultVectorizePropertyName,
39}
40}
41
42func (m *OpenAIModule) ValidateClass(ctx context.Context,
43class *models.Class, cfg moduletools.ClassConfig,
44) error {
45settings := ent.NewClassSettings(cfg)
46return settings.Validate(class)
47}
48
49var _ = modulecapabilities.ClassConfigurator(New())
50
51// type ConfigValidator struct {
52// logger logrus.FieldLogger
53// }
54
55// type ClassSettings interface {
56// VectorizeClassName() bool
57// VectorizePropertyName(propName string) bool
58// PropertyIndexed(propName string) bool
59// }
60
61// func NewConfigValidator(logger logrus.FieldLogger) *ConfigValidator {
62// return &ConfigValidator{logger: logger}
63// }
64
65// func (cv *ConfigValidator) Do(ctx context.Context, class *models.Class,
66// cfg moduletools.ClassConfig, settings ClassSettings) error {
67// // In text2vec-openai (as opposed to e.g. text2vec-contextionary) the
68// // assumption is that the models will be able to deal with any words, even
69// // previously unseen ones. Therefore we do not need to validate individual
70// // properties, but only the overall "index state"
71
72// if err := cv.validateIndexState(ctx, class, settings); err != nil {
73// return errors.Errorf("invalid combination of properties")
74// }
75
76// cv.checkForPossibilityOfDuplicateVectors(ctx, class, settings)
77
78// return nil
79// }
80
81// func (cv *ConfigValidator) validateIndexState(ctx context.Context,
82// class *models.Class, settings ClassSettings) error {
83// if settings.VectorizeClassName() {
84// // if the user chooses to vectorize the classname, vector-building will
85// // always be possible, no need to investigate further
86
87// return nil
88// }
89
90// // search if there is at least one indexed, string/text prop. If found pass
91// // validation
92// for _, prop := range class.Properties {
93// if len(prop.DataType) < 1 {
94// return errors.Errorf("property %s must have at least one datatype: "+
95// "got %v", prop.Name, prop.DataType)
96// }
97
98// if prop.DataType[0] != string(schema.DataTypeText) {
99// // we can only vectorize text-like props
100// continue
101// }
102
103// if settings.PropertyIndexed(prop.Name) {
104// // found at least one, this is a valid schema
105// return nil
106// }
107// }
108
109// return fmt.Errorf("invalid properties: didn't find a single property which is " +
110// "of type string or text and is not excluded from indexing. In addition the " +
111// "class name is excluded from vectorization as well, meaning that it cannot be " +
112// "used to determine the vector position. To fix this, set 'vectorizeClassName' " +
113// "to true if the class name is contextionary-valid. Alternatively add at least " +
114// "contextionary-valid text/string property which is not excluded from " +
115// "indexing.")
116// }
117
118// func (cv *ConfigValidator) checkForPossibilityOfDuplicateVectors(
119// ctx context.Context, class *models.Class, settings ClassSettings) {
120// if !settings.VectorizeClassName() {
121// // if the user choses not to vectorize the class name, this means they must
122// // have chosen something else to vectorize, otherwise the validation would
123// // have error'd before we ever got here. We can skip further checking.
124
125// return
126// }
127
128// // search if there is at least one indexed, string/text prop. If found exit
129// for _, prop := range class.Properties {
130// // length check skipped, because validation has already passed
131// if prop.DataType[0] != string(schema.DataTypeText) {
132// // we can only vectorize text-like props
133// continue
134// }
135
136// if settings.PropertyIndexed(prop.Name) {
137// // found at least one
138// return
139// }
140// }
141
142// cv.logger.WithField("module", "text2vec-openai").
143// WithField("class", class.Class).
144// Warnf("text2vec-openai: Class %q does not have any properties "+
145// "indexed (or only non text-properties indexed) and the vector position is "+
146// "only determined by the class name. Each object will end up with the same "+
147// "vector which leads to a severe performance penalty on imports. Consider "+
148// "setting vectorIndexConfig.skip=true for this property", class.Class)
149// }
150