15
"golang.org/x/exp/slices"
16
"golang.org/x/sync/errgroup"
18
"github.com/jmorganca/ollama/gpu"
21
// Libraries names may contain an optional variant separated by '_'
22
// For example, "rocm_v6" and "rocm_v5" or "cpu" and "cpu_avx2"
23
// Any library without a variant is the lowest common denominator
24
var availableDynLibs = map[string]string{}
26
const pathComponentCount = 7
28
// getDynLibs returns an ordered list of LLM libraries to try, starting with the best
29
func getDynLibs(gpuInfo gpu.GpuInfo) []string {
30
// Short circuit if we know we're using the default built-in (darwin only)
31
if gpuInfo.Library == "default" {
32
return []string{"default"}
34
// TODO - temporary until we have multiple CPU variations for Darwin
35
// Short circuit on darwin with metal only
36
if len(availableDynLibs) == 1 {
37
if _, onlyMetal := availableDynLibs["metal"]; onlyMetal {
38
return []string{availableDynLibs["metal"]}
44
altDynLibs := []string{}
45
requested := gpuInfo.Library
46
if gpuInfo.Variant != "" {
47
requested += "_" + gpuInfo.Variant
49
// Try to find an exact match
50
for cmp := range availableDynLibs {
53
dynLibs = []string{availableDynLibs[cmp]}
57
// Then for GPUs load alternates and sort the list for consistent load ordering
58
if gpuInfo.Library != "cpu" {
59
for cmp := range availableDynLibs {
60
if gpuInfo.Library == strings.Split(cmp, "_")[0] && cmp != exactMatch {
61
altDynLibs = append(altDynLibs, cmp)
64
slices.Sort(altDynLibs)
65
for _, altDynLib := range altDynLibs {
66
dynLibs = append(dynLibs, availableDynLibs[altDynLib])
70
// Load up the best CPU variant if not primary requested
71
if gpuInfo.Library != "cpu" {
72
variant := gpu.GetCPUVariant()
73
// If no variant, then we fall back to default
74
// If we have a variant, try that if we find an exact match
75
// Attempting to run the wrong CPU instructions will panic the
78
for cmp := range availableDynLibs {
79
if cmp == "cpu_"+variant {
80
dynLibs = append(dynLibs, availableDynLibs[cmp])
85
dynLibs = append(dynLibs, availableDynLibs["cpu"])
89
// Finally, if we didn't find any matches, LCD CPU FTW
90
if len(dynLibs) == 0 {
91
dynLibs = []string{availableDynLibs["cpu"]}
93
slog.Debug(fmt.Sprintf("ordered list of LLM libraries to try %v", dynLibs))
97
func rocmDynLibPresent() bool {
98
for dynLibName := range availableDynLibs {
99
if strings.HasPrefix(dynLibName, "rocm") {
106
func nativeInit(workdir string) error {
107
slog.Info("Extracting dynamic libraries...")
108
if runtime.GOOS == "darwin" {
109
err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
111
if err == payloadMissing {
112
// TODO perhaps consider this a hard failure on arm macs?
113
slog.Info("ggml-meta.metal payload missing")
118
os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
121
libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*")
123
if err == payloadMissing {
124
slog.Info(fmt.Sprintf("%s", payloadMissing))
129
for _, lib := range libs {
130
// The last dir component is the variant name
131
variant := filepath.Base(filepath.Dir(lib))
132
availableDynLibs[variant] = lib
135
if err := verifyDriverAccess(); err != nil {
139
// Report which dynamic libraries we have loaded to assist troubleshooting
140
variants := make([]string, len(availableDynLibs))
142
for variant := range availableDynLibs {
143
variants[i] = variant
146
slog.Info(fmt.Sprintf("Dynamic LLM libraries %v", variants))
147
slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
152
func extractDynamicLibs(workDir, glob string) ([]string, error) {
153
files, err := fs.Glob(libEmbed, glob)
154
if err != nil || len(files) == 0 {
155
return nil, payloadMissing
159
// TODO consider making this idempotent with some sort of persistent directory (where we store models probably)
160
// and tracking by version so we don't reexpand the files every time
161
// Also maybe consider lazy loading only what is needed
163
g := new(errgroup.Group)
164
for _, file := range files {
165
pathComps := strings.Split(file, "/")
166
if len(pathComps) != pathComponentCount {
167
slog.Error(fmt.Sprintf("unexpected payload components: %v", pathComps))
173
// llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
174
// Include the variant in the path to avoid conflicts between multiple server libs
175
targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
176
srcFile, err := libEmbed.Open(file)
178
return fmt.Errorf("read payload %s: %v", file, err)
180
defer srcFile.Close()
181
if err := os.MkdirAll(targetDir, 0o755); err != nil {
182
return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
184
src := io.Reader(srcFile)
186
if strings.HasSuffix(file, ".gz") {
187
src, err = gzip.NewReader(src)
189
return fmt.Errorf("decompress payload %s: %v", file, err)
191
filename = strings.TrimSuffix(filename, ".gz")
194
destFile := filepath.Join(targetDir, filepath.Base(filename))
195
if strings.Contains(destFile, "server") {
196
libs = append(libs, destFile)
199
_, err = os.Stat(destFile)
201
case errors.Is(err, os.ErrNotExist):
202
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
204
return fmt.Errorf("write payload %s: %v", file, err)
206
defer destFile.Close()
207
if _, err := io.Copy(destFile, src); err != nil {
208
return fmt.Errorf("copy payload %s: %v", file, err)
211
return fmt.Errorf("stat payload %s: %v", file, err)
216
return libs, g.Wait()
219
func extractPayloadFiles(workDir, glob string) error {
220
files, err := fs.Glob(libEmbed, glob)
221
if err != nil || len(files) == 0 {
222
return payloadMissing
225
for _, file := range files {
226
srcFile, err := libEmbed.Open(file)
228
return fmt.Errorf("read payload %s: %v", file, err)
230
defer srcFile.Close()
231
if err := os.MkdirAll(workDir, 0o755); err != nil {
232
return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
234
src := io.Reader(srcFile)
236
if strings.HasSuffix(file, ".gz") {
237
src, err = gzip.NewReader(src)
239
return fmt.Errorf("decompress payload %s: %v", file, err)
241
filename = strings.TrimSuffix(filename, ".gz")
244
destFile := filepath.Join(workDir, filepath.Base(filename))
245
_, err = os.Stat(destFile)
247
case errors.Is(err, os.ErrNotExist):
248
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
250
return fmt.Errorf("write payload %s: %v", file, err)
252
defer destFile.Close()
253
if _, err := io.Copy(destFile, src); err != nil {
254
return fmt.Errorf("copy payload %s: %v", file, err)
257
return fmt.Errorf("stat payload %s: %v", file, err)
263
func verifyDriverAccess() error {
264
if runtime.GOOS != "linux" {
267
// Only check ROCm access if we have the dynamic lib loaded
268
if rocmDynLibPresent() {
269
// Verify we have permissions - either running as root, or we have group access to the driver
270
fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0666)
272
if errors.Is(err, fs.ErrPermission) {
273
return fmt.Errorf("Radeon card detected, but permissions not set up properly. Either run ollama as root, or add you user account to the render group.")
274
} else if errors.Is(err, fs.ErrNotExist) {
275
// expected behavior without a radeon card
279
return fmt.Errorf("failed to check permission on /dev/kfd: %w", err)