weaviate
100 строк · 3.4 Кб
1// _ _
2// __ _____ __ ___ ___ __ _| |_ ___
3// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
4// \ V V / __/ (_| |\ V /| | (_| | || __/
5// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
6//
7// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
8//
9// CONTACT: hello@weaviate.io
10//
11
12package vectorizer13
14import (15"context"16"testing"17
18"github.com/stretchr/testify/assert"19"github.com/stretchr/testify/require"20)
21
22// as used in the nearText searcher
23func TestVectorizingTexts(t *testing.T) {24type testCase struct {25name string26input []string27expectedHuggingFaceModel string28huggingFaceModel string29huggingFaceEndpointURL string30}31
32tests := []testCase{33{34name: "single word",35input: []string{"hello"},36huggingFaceModel: "sentence-transformers/gtr-t5-xl",37expectedHuggingFaceModel: "sentence-transformers/gtr-t5-xl",38},39{40name: "multiple words",41input: []string{"hello world, this is me!"},42huggingFaceModel: "sentence-transformers/gtr-t5-xl",43expectedHuggingFaceModel: "sentence-transformers/gtr-t5-xl",44},45{46name: "multiple sentences (joined with a dot)",47input: []string{"this is sentence 1", "and here's number 2"},48huggingFaceModel: "sentence-transformers/gtr-t5-xl",49expectedHuggingFaceModel: "sentence-transformers/gtr-t5-xl",50},51{52name: "multiple sentences already containing a dot",53input: []string{"this is sentence 1.", "and here's number 2"},54huggingFaceModel: "sentence-transformers/gtr-t5-xl",55expectedHuggingFaceModel: "sentence-transformers/gtr-t5-xl",56},57{58name: "multiple sentences already containing a question mark",59input: []string{"this is sentence 1?", "and here's number 2"},60huggingFaceModel: "sentence-transformers/gtr-t5-xl",61expectedHuggingFaceModel: "sentence-transformers/gtr-t5-xl",62},63{64name: "multiple sentences already containing an exclamation mark",65input: []string{"this is sentence 1!", "and here's number 2"},66huggingFaceModel: "sentence-transformers/gtr-t5-xl",67expectedHuggingFaceModel: "sentence-transformers/gtr-t5-xl",68},69{70name: "multiple sentences already containing comma",71input: []string{"this is sentence 1,", "and here's number 2"},72huggingFaceModel: "sentence-transformers/gtr-t5-xl",73expectedHuggingFaceModel: "sentence-transformers/gtr-t5-xl",74},75{76name: "single word with inference url",77input: []string{"hello"},78huggingFaceEndpointURL: "http://url.cloud",79expectedHuggingFaceModel: "sentence-transformers/msmarco-bert-base-dot-v5",80},81}82
83for _, test := range tests {84t.Run(test.name, func(t *testing.T) {85client := &fakeClient{}86
87v := New(client)88
89settings := &fakeClassConfig{90model: test.huggingFaceModel,91endpointURL: test.huggingFaceEndpointURL,92}93vec, err := v.Texts(context.Background(), test.input, settings)94
95require.Nil(t, err)96assert.Equal(t, []float32{0.1, 1.1, 2.1, 3.1}, vec)97assert.Equal(t, client.lastConfig.Model, test.expectedHuggingFaceModel)98})99}100}
101