ollama

Форк
0
/
payload_common.go 
284 строки · 7.9 Кб
1
package llm
2

3
import (
4
	"compress/gzip"
5
	"errors"
6
	"fmt"
7
	"io"
8
	"io/fs"
9
	"log/slog"
10
	"os"
11
	"path/filepath"
12
	"runtime"
13
	"strings"
14

15
	"golang.org/x/exp/slices"
16
	"golang.org/x/sync/errgroup"
17

18
	"github.com/jmorganca/ollama/gpu"
19
)
20

21
// Libraries names may contain an optional variant separated by '_'
22
// For example, "rocm_v6" and "rocm_v5" or "cpu" and "cpu_avx2"
23
// Any library without a variant is the lowest common denominator
24
var availableDynLibs = map[string]string{}
25

26
const pathComponentCount = 7
27

28
// getDynLibs returns an ordered list of LLM libraries to try, starting with the best
29
func getDynLibs(gpuInfo gpu.GpuInfo) []string {
30
	// Short circuit if we know we're using the default built-in (darwin only)
31
	if gpuInfo.Library == "default" {
32
		return []string{"default"}
33
	}
34
	// TODO - temporary until we have multiple CPU variations for Darwin
35
	// Short circuit on darwin with metal only
36
	if len(availableDynLibs) == 1 {
37
		if _, onlyMetal := availableDynLibs["metal"]; onlyMetal {
38
			return []string{availableDynLibs["metal"]}
39
		}
40
	}
41

42
	exactMatch := ""
43
	dynLibs := []string{}
44
	altDynLibs := []string{}
45
	requested := gpuInfo.Library
46
	if gpuInfo.Variant != "" {
47
		requested += "_" + gpuInfo.Variant
48
	}
49
	// Try to find an exact match
50
	for cmp := range availableDynLibs {
51
		if requested == cmp {
52
			exactMatch = cmp
53
			dynLibs = []string{availableDynLibs[cmp]}
54
			break
55
		}
56
	}
57
	// Then for GPUs load alternates and sort the list for consistent load ordering
58
	if gpuInfo.Library != "cpu" {
59
		for cmp := range availableDynLibs {
60
			if gpuInfo.Library == strings.Split(cmp, "_")[0] && cmp != exactMatch {
61
				altDynLibs = append(altDynLibs, cmp)
62
			}
63
		}
64
		slices.Sort(altDynLibs)
65
		for _, altDynLib := range altDynLibs {
66
			dynLibs = append(dynLibs, availableDynLibs[altDynLib])
67
		}
68
	}
69

70
	// Load up the best CPU variant if not primary requested
71
	if gpuInfo.Library != "cpu" {
72
		variant := gpu.GetCPUVariant()
73
		// If no variant, then we fall back to default
74
		// If we have a variant, try that if we find an exact match
75
		// Attempting to run the wrong CPU instructions will panic the
76
		// process
77
		if variant != "" {
78
			for cmp := range availableDynLibs {
79
				if cmp == "cpu_"+variant {
80
					dynLibs = append(dynLibs, availableDynLibs[cmp])
81
					break
82
				}
83
			}
84
		} else {
85
			dynLibs = append(dynLibs, availableDynLibs["cpu"])
86
		}
87
	}
88

89
	// Finally, if we didn't find any matches, LCD CPU FTW
90
	if len(dynLibs) == 0 {
91
		dynLibs = []string{availableDynLibs["cpu"]}
92
	}
93
	slog.Debug(fmt.Sprintf("ordered list of LLM libraries to try %v", dynLibs))
94
	return dynLibs
95
}
96

97
func rocmDynLibPresent() bool {
98
	for dynLibName := range availableDynLibs {
99
		if strings.HasPrefix(dynLibName, "rocm") {
100
			return true
101
		}
102
	}
103
	return false
104
}
105

106
func nativeInit(workdir string) error {
107
	slog.Info("Extracting dynamic libraries...")
108
	if runtime.GOOS == "darwin" {
109
		err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
110
		if err != nil {
111
			if err == payloadMissing {
112
				// TODO perhaps consider this a hard failure on arm macs?
113
				slog.Info("ggml-meta.metal payload missing")
114
				return nil
115
			}
116
			return err
117
		}
118
		os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
119
	}
120

121
	libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*")
122
	if err != nil {
123
		if err == payloadMissing {
124
			slog.Info(fmt.Sprintf("%s", payloadMissing))
125
			return nil
126
		}
127
		return err
128
	}
129
	for _, lib := range libs {
130
		// The last dir component is the variant name
131
		variant := filepath.Base(filepath.Dir(lib))
132
		availableDynLibs[variant] = lib
133
	}
134

135
	if err := verifyDriverAccess(); err != nil {
136
		return err
137
	}
138

139
	// Report which dynamic libraries we have loaded to assist troubleshooting
140
	variants := make([]string, len(availableDynLibs))
141
	i := 0
142
	for variant := range availableDynLibs {
143
		variants[i] = variant
144
		i++
145
	}
146
	slog.Info(fmt.Sprintf("Dynamic LLM libraries %v", variants))
147
	slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
148

149
	return nil
150
}
151

152
func extractDynamicLibs(workDir, glob string) ([]string, error) {
153
	files, err := fs.Glob(libEmbed, glob)
154
	if err != nil || len(files) == 0 {
155
		return nil, payloadMissing
156
	}
157
	libs := []string{}
158

159
	// TODO consider making this idempotent with some sort of persistent directory (where we store models probably)
160
	// and tracking by version so we don't reexpand the files every time
161
	// Also maybe consider lazy loading only what is needed
162

163
	g := new(errgroup.Group)
164
	for _, file := range files {
165
		pathComps := strings.Split(file, "/")
166
		if len(pathComps) != pathComponentCount {
167
			slog.Error(fmt.Sprintf("unexpected payload components: %v", pathComps))
168
			continue
169
		}
170

171
		file := file
172
		g.Go(func() error {
173
			// llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
174
			// Include the variant in the path to avoid conflicts between multiple server libs
175
			targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
176
			srcFile, err := libEmbed.Open(file)
177
			if err != nil {
178
				return fmt.Errorf("read payload %s: %v", file, err)
179
			}
180
			defer srcFile.Close()
181
			if err := os.MkdirAll(targetDir, 0o755); err != nil {
182
				return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
183
			}
184
			src := io.Reader(srcFile)
185
			filename := file
186
			if strings.HasSuffix(file, ".gz") {
187
				src, err = gzip.NewReader(src)
188
				if err != nil {
189
					return fmt.Errorf("decompress payload %s: %v", file, err)
190
				}
191
				filename = strings.TrimSuffix(filename, ".gz")
192
			}
193

194
			destFile := filepath.Join(targetDir, filepath.Base(filename))
195
			if strings.Contains(destFile, "server") {
196
				libs = append(libs, destFile)
197
			}
198

199
			_, err = os.Stat(destFile)
200
			switch {
201
			case errors.Is(err, os.ErrNotExist):
202
				destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
203
				if err != nil {
204
					return fmt.Errorf("write payload %s: %v", file, err)
205
				}
206
				defer destFile.Close()
207
				if _, err := io.Copy(destFile, src); err != nil {
208
					return fmt.Errorf("copy payload %s: %v", file, err)
209
				}
210
			case err != nil:
211
				return fmt.Errorf("stat payload %s: %v", file, err)
212
			}
213
			return nil
214
		})
215
	}
216
	return libs, g.Wait()
217
}
218

219
func extractPayloadFiles(workDir, glob string) error {
220
	files, err := fs.Glob(libEmbed, glob)
221
	if err != nil || len(files) == 0 {
222
		return payloadMissing
223
	}
224

225
	for _, file := range files {
226
		srcFile, err := libEmbed.Open(file)
227
		if err != nil {
228
			return fmt.Errorf("read payload %s: %v", file, err)
229
		}
230
		defer srcFile.Close()
231
		if err := os.MkdirAll(workDir, 0o755); err != nil {
232
			return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
233
		}
234
		src := io.Reader(srcFile)
235
		filename := file
236
		if strings.HasSuffix(file, ".gz") {
237
			src, err = gzip.NewReader(src)
238
			if err != nil {
239
				return fmt.Errorf("decompress payload %s: %v", file, err)
240
			}
241
			filename = strings.TrimSuffix(filename, ".gz")
242
		}
243

244
		destFile := filepath.Join(workDir, filepath.Base(filename))
245
		_, err = os.Stat(destFile)
246
		switch {
247
		case errors.Is(err, os.ErrNotExist):
248
			destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
249
			if err != nil {
250
				return fmt.Errorf("write payload %s: %v", file, err)
251
			}
252
			defer destFile.Close()
253
			if _, err := io.Copy(destFile, src); err != nil {
254
				return fmt.Errorf("copy payload %s: %v", file, err)
255
			}
256
		case err != nil:
257
			return fmt.Errorf("stat payload %s: %v", file, err)
258
		}
259
	}
260
	return nil
261
}
262

263
func verifyDriverAccess() error {
264
	if runtime.GOOS != "linux" {
265
		return nil
266
	}
267
	// Only check ROCm access if we have the dynamic lib loaded
268
	if rocmDynLibPresent() {
269
		// Verify we have permissions - either running as root, or we have group access to the driver
270
		fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0666)
271
		if err != nil {
272
			if errors.Is(err, fs.ErrPermission) {
273
				return fmt.Errorf("Radeon card detected, but permissions not set up properly.  Either run ollama as root, or add you user account to the render group.")
274
			} else if errors.Is(err, fs.ErrNotExist) {
275
				// expected behavior without a radeon card
276
				return nil
277
			}
278

279
			return fmt.Errorf("failed to check permission on /dev/kfd: %w", err)
280
		}
281
		fd.Close()
282
	}
283
	return nil
284
}
285

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.