9
"github.com/jmorganca/ollama/format"
12
type containerGGUF struct {
28
func (c *containerGGUF) Name() string {
32
func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
33
binary.Read(rso, c.bo, &c.Version)
37
binary.Read(rso, c.bo, &c.V1)
39
binary.Read(rso, c.bo, &c.V2)
42
model := newGGUFModel(c)
43
if err := model.Decode(rso); err != nil {
51
ggufTypeUint8 uint32 = iota
73
// shape is the number of elements in each dimension
77
func (t tensor) blockSize() uint64 {
88
func (t tensor) typeSize() uint64 {
89
blockSize := t.blockSize()
97
return 2 + blockSize/2
99
return 2 + 2 + blockSize/2
101
return 2 + 4 + blockSize/2
103
return 2 + 2 + 4 + blockSize/2
107
return 4 + 4 + blockSize
109
return blockSize/16 + blockSize/4 + 2 + 2
111
return blockSize/8 + blockSize/4 + 12 + 2
113
return 2 + 2 + 12 + blockSize/2
115
return 2 + 2 + 12 + blockSize/8 + blockSize/2
117
return blockSize/2 + blockSize/4 + blockSize/16 + 2
119
return 2 + blockSize + 2*blockSize/16
121
return 2 + 2*blockSize/8
123
return 2 + 2*blockSize/8 + blockSize/32
125
return 2 + 3*blockSize/8
131
func (t tensor) parameters() uint64 {
132
return t.shape[0] * t.shape[1] * t.shape[2] * t.shape[3]
135
func (t tensor) size() uint64 {
136
return t.parameters() * t.typeSize() / t.blockSize()
139
type ggufModel struct {
148
func newGGUFModel(container *containerGGUF) *ggufModel {
150
containerGGUF: container,
155
func (llm *ggufModel) NumTensor() uint64 {
156
if llm.Version == 1 {
157
return uint64(llm.V1.NumTensor)
160
return llm.V2.NumTensor
163
func (llm *ggufModel) NumKV() uint64 {
164
if llm.Version == 1 {
165
return uint64(llm.V1.NumKV)
171
func (llm *ggufModel) ModelFamily() string {
172
if t, ok := llm.kv["general.architecture"].(string); ok {
179
func (llm *ggufModel) ModelType() string {
180
if llm.parameters > 0 {
181
return format.HumanNumber(llm.parameters)
187
func (llm *ggufModel) FileType() string {
188
if t, ok := llm.kv["general.file_type"].(uint32); ok {
195
func (llm *ggufModel) Decode(rso *readSeekOffset) error {
197
for i := 0; uint64(i) < llm.NumKV(); i++ {
198
k, err := llm.readString(rso)
203
vtype := llm.readU32(rso)
223
case ggufTypeFloat32:
225
case ggufTypeFloat64:
228
v = llm.readBool(rso)
230
s, err := llm.readString(rso)
237
a, err := llm.readArray(rso)
244
return fmt.Errorf("invalid type: %d", vtype)
251
for i := 0; uint64(i) < llm.NumTensor(); i++ {
252
name, err := llm.readString(rso)
257
// dims is the number of dimensions in the tensor
258
dims := llm.readU32(rso)
260
shape := [4]uint64{1, 1, 1, 1}
261
for i := 0; uint32(i) < dims; i++ {
262
shape[i] = llm.readU64(rso)
267
kind: llm.readU32(rso),
268
offset: llm.readU64(rso),
272
llm.tensors = append(llm.tensors, tensor)
273
llm.parameters += tensor.parameters()
276
alignment, ok := llm.kv["general.alignment"].(uint32)
281
rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
282
for _, tensor := range llm.tensors {
283
padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
284
rso.Seek(padded, io.SeekCurrent)
290
func (llm *ggufModel) NumLayers() uint32 {
291
value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
296
return value.(uint32)
299
func (llm *ggufModel) NumHead() uint32 {
300
value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
305
return value.(uint32)
308
func (llm *ggufModel) NumEmbed() uint32 {
309
value, exists := llm.kv[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
314
return value.(uint32)
317
func (llm *ggufModel) NumHeadKv() uint32 {
318
value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
323
return value.(uint32)
326
func (llm *ggufModel) NumCtx() uint32 {
327
value, exists := llm.kv[fmt.Sprintf("%s.context_length", llm.ModelFamily())]
332
return value.(uint32)
335
func (llm *ggufModel) NumGQA() uint32 {
336
numHeadKv := llm.NumHeadKv()
341
return llm.NumHead() / numHeadKv
344
func (llm ggufModel) readU8(r io.Reader) uint8 {
346
binary.Read(r, llm.bo, &u8)
350
func (llm ggufModel) readI8(r io.Reader) int8 {
352
binary.Read(r, llm.bo, &i8)
356
func (llm ggufModel) readU16(r io.Reader) uint16 {
358
binary.Read(r, llm.bo, &u16)
362
func (llm ggufModel) readI16(r io.Reader) int16 {
364
binary.Read(r, llm.bo, &i16)
368
func (llm ggufModel) readU32(r io.Reader) uint32 {
370
binary.Read(r, llm.bo, &u32)
374
func (llm ggufModel) readI32(r io.Reader) int32 {
376
binary.Read(r, llm.bo, &i32)
380
func (llm ggufModel) readU64(r io.Reader) uint64 {
382
binary.Read(r, llm.bo, &u64)
386
func (llm ggufModel) readI64(r io.Reader) int64 {
388
binary.Read(r, llm.bo, &i64)
392
func (llm ggufModel) readF32(r io.Reader) float32 {
394
binary.Read(r, llm.bo, &f32)
398
func (llm ggufModel) readF64(r io.Reader) float64 {
400
binary.Read(r, llm.bo, &f64)
404
func (llm ggufModel) readBool(r io.Reader) bool {
406
binary.Read(r, llm.bo, &b)
410
func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
411
var nameLength uint32
412
binary.Read(r, llm.bo, &nameLength)
415
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
419
// gguf v1 strings are null-terminated
420
b.Truncate(b.Len() - 1)
422
return b.String(), nil
425
func (llm ggufModel) readString(r io.Reader) (string, error) {
426
if llm.Version == 1 {
427
return llm.readStringV1(r)
430
var nameLength uint64
431
binary.Read(r, llm.bo, &nameLength)
434
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
438
return b.String(), nil
441
func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
442
atype := llm.readU32(r)
445
for i := 0; uint32(i) < n; i++ {
448
arr = append(arr, llm.readU8(r))
450
arr = append(arr, llm.readI8(r))
452
arr = append(arr, llm.readU16(r))
454
arr = append(arr, llm.readI16(r))
456
arr = append(arr, llm.readU32(r))
458
arr = append(arr, llm.readI32(r))
459
case ggufTypeFloat32:
460
arr = append(arr, llm.readF32(r))
462
arr = append(arr, llm.readBool(r))
464
s, err := llm.readStringV1(r)
471
return nil, fmt.Errorf("invalid array type: %d", atype)
478
func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
479
if llm.Version == 1 {
480
return llm.readArrayV1(r)
483
atype := llm.readU32(r)
486
for i := 0; uint64(i) < n; i++ {
489
arr = append(arr, llm.readU8(r))
491
arr = append(arr, llm.readI8(r))
493
arr = append(arr, llm.readU16(r))
495
arr = append(arr, llm.readI16(r))
497
arr = append(arr, llm.readU32(r))
499
arr = append(arr, llm.readI32(r))
501
arr = append(arr, llm.readU64(r))
503
arr = append(arr, llm.readI64(r))
504
case ggufTypeFloat32:
505
arr = append(arr, llm.readF32(r))
506
case ggufTypeFloat64:
507
arr = append(arr, llm.readF64(r))
509
arr = append(arr, llm.readBool(r))
511
s, err := llm.readString(r)
518
return nil, fmt.Errorf("invalid array type: %d", atype)