weaviate

Форк
0
149 строк · 5.0 Кб
1
//                           _       _
2
// __      _____  __ ___   ___  __ _| |_ ___
3
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
4
//  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
5
//   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
6
//
7
//  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
8
//
9
//  CONTACT: hello@weaviate.io
10
//
11

12
package modopenai
13

14
import (
15
	"context"
16

17
	"github.com/weaviate/weaviate/modules/text2vec-openai/ent"
18

19
	"github.com/weaviate/weaviate/entities/models"
20
	"github.com/weaviate/weaviate/entities/modulecapabilities"
21
	"github.com/weaviate/weaviate/entities/moduletools"
22
	"github.com/weaviate/weaviate/entities/schema"
23
)
24

25
func (m *OpenAIModule) ClassConfigDefaults() map[string]interface{} {
26
	return map[string]interface{}{
27
		"vectorizeClassName": ent.DefaultVectorizeClassName,
28
		"baseURL":            ent.DefaultBaseURL,
29
		"model":              ent.DefaultOpenAIModel,
30
	}
31
}
32

33
func (m *OpenAIModule) PropertyConfigDefaults(
34
	dt *schema.DataType,
35
) map[string]interface{} {
36
	return map[string]interface{}{
37
		"skip":                  !ent.DefaultPropertyIndexed,
38
		"vectorizePropertyName": ent.DefaultVectorizePropertyName,
39
	}
40
}
41

42
func (m *OpenAIModule) ValidateClass(ctx context.Context,
43
	class *models.Class, cfg moduletools.ClassConfig,
44
) error {
45
	settings := ent.NewClassSettings(cfg)
46
	return settings.Validate(class)
47
}
48

49
var _ = modulecapabilities.ClassConfigurator(New())
50

51
// type ConfigValidator struct {
52
// 	logger logrus.FieldLogger
53
// }
54

55
// type ClassSettings interface {
56
// 	VectorizeClassName() bool
57
// 	VectorizePropertyName(propName string) bool
58
// 	PropertyIndexed(propName string) bool
59
// }
60

61
// func NewConfigValidator(logger logrus.FieldLogger) *ConfigValidator {
62
// 	return &ConfigValidator{logger: logger}
63
// }
64

65
// func (cv *ConfigValidator) Do(ctx context.Context, class *models.Class,
66
// 	cfg moduletools.ClassConfig, settings ClassSettings) error {
67
// 	// In text2vec-openai (as opposed to e.g. text2vec-contextionary) the
68
// 	// assumption is that the models will be able to deal with any words, even
69
// 	// previously unseen ones. Therefore we do not need to validate individual
70
// 	// properties, but only the overall "index state"
71

72
// 	if err := cv.validateIndexState(ctx, class, settings); err != nil {
73
// 		return errors.Errorf("invalid combination of properties")
74
// 	}
75

76
// 	cv.checkForPossibilityOfDuplicateVectors(ctx, class, settings)
77

78
// 	return nil
79
// }
80

81
// func (cv *ConfigValidator) validateIndexState(ctx context.Context,
82
// 	class *models.Class, settings ClassSettings) error {
83
// 	if settings.VectorizeClassName() {
84
// 		// if the user chooses to vectorize the classname, vector-building will
85
// 		// always be possible, no need to investigate further
86

87
// 		return nil
88
// 	}
89

90
// 	// search if there is at least one indexed, string/text prop. If found pass
91
// 	// validation
92
// 	for _, prop := range class.Properties {
93
// 		if len(prop.DataType) < 1 {
94
// 			return errors.Errorf("property %s must have at least one datatype: "+
95
// 				"got %v", prop.Name, prop.DataType)
96
// 		}
97

98
// 		if prop.DataType[0] != string(schema.DataTypeText) {
99
// 			// we can only vectorize text-like props
100
// 			continue
101
// 		}
102

103
// 		if settings.PropertyIndexed(prop.Name) {
104
// 			// found at least one, this is a valid schema
105
// 			return nil
106
// 		}
107
// 	}
108

109
// 	return fmt.Errorf("invalid properties: didn't find a single property which is " +
110
// 		"of type string or text and is not excluded from indexing. In addition the " +
111
// 		"class name is excluded from vectorization as well, meaning that it cannot be " +
112
// 		"used to determine the vector position. To fix this, set 'vectorizeClassName' " +
113
// 		"to true if the class name is contextionary-valid. Alternatively add at least " +
114
// 		"contextionary-valid text/string property which is not excluded from " +
115
// 		"indexing.")
116
// }
117

118
// func (cv *ConfigValidator) checkForPossibilityOfDuplicateVectors(
119
// 	ctx context.Context, class *models.Class, settings ClassSettings) {
120
// 	if !settings.VectorizeClassName() {
121
// 		// if the user choses not to vectorize the class name, this means they must
122
// 		// have chosen something else to vectorize, otherwise the validation would
123
// 		// have error'd before we ever got here. We can skip further checking.
124

125
// 		return
126
// 	}
127

128
// 	// search if there is at least one indexed, string/text prop. If found exit
129
// 	for _, prop := range class.Properties {
130
// 		// length check skipped, because validation has already passed
131
// 		if prop.DataType[0] != string(schema.DataTypeText) {
132
// 			// we can only vectorize text-like props
133
// 			continue
134
// 		}
135

136
// 		if settings.PropertyIndexed(prop.Name) {
137
// 			// found at least one
138
// 			return
139
// 		}
140
// 	}
141

142
// 	cv.logger.WithField("module", "text2vec-openai").
143
// 		WithField("class", class.Class).
144
// 		Warnf("text2vec-openai: Class %q does not have any properties "+
145
// 			"indexed (or only non text-properties indexed) and the vector position is "+
146
// 			"only determined by the class name. Each object will end up with the same "+
147
// 			"vector which leads to a severe performance penalty on imports. Consider "+
148
// 			"setting vectorIndexConfig.skip=true for this property", class.Class)
149
// }
150

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.