wandb
1package server2
3import (4"bytes"5"encoding/binary"6"log/slog"7)
8
9type Header struct {10Magic uint811DataLength uint3212}
13
14type Tokenizer struct {15header Header
16headerLength int17headerValid bool18}
19
20func (x *Tokenizer) Split(data []byte, _ bool) (advance int, token []byte, err error) {21if x.headerLength == 0 {22x.headerLength = binary.Size(x.header)23}24
25advance = 026
27if !x.headerValid {28if len(data) < x.headerLength {29return30}31buf := bytes.NewReader(data)32err := binary.Read(buf, binary.LittleEndian, &x.header)33if err != nil {34slog.Error("can't read token", "err", err)35return 0, nil, err36}37if x.header.Magic != uint8('W') {38slog.Error("Invalid magic byte in header")39}40x.headerValid = true41advance += x.headerLength42data = data[advance:]43}44
45if len(data) < int(x.header.DataLength) {46return47}48
49advance += int(x.header.DataLength)50token = data[:x.header.DataLength]51x.headerValid = false52return53}
54