v
Зеркало из https://github.com/vlang/v
1// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4module csv
5
6// Once interfaces are further along the idea would be to have something similar to
7// go's io.reader & bufio.reader rather than reading the whole file into string, this
8// would then satisfy that interface. I designed it this way to be easily adapted.
9struct CommentIsDelimiterError {
10Error
11}
12
13fn (err CommentIsDelimiterError) msg() string {
14return 'encoding.csv: comment cannot be the same as delimiter'
15}
16
17struct InvalidDelimiterError {
18Error
19}
20
21fn (err InvalidDelimiterError) msg() string {
22return 'encoding.csv: invalid delimiter'
23}
24
25struct EndOfFileError {
26Error
27}
28
29fn (err EndOfFileError) msg() string {
30return 'encoding.csv: end of file'
31}
32
33struct InvalidLineEndingError {
34Error
35}
36
37fn (err InvalidLineEndingError) msg() string {
38return 'encoding.csv: could not find any valid line endings'
39}
40
41struct Reader {
42// not used yet
43// has_header bool
44// headings []string
45data string
46delimiter u8
47comment u8
48mut:
49is_mac_pre_osx_le bool
50row_pos int
51}
52
53@[params]
54pub struct ReaderConfig {
55pub:
56delimiter u8 = `,`
57comment u8 = `#`
58}
59
60// new_reader initializes a Reader with string data to parse and,
61// optionally, a custom delimiter.
62pub fn new_reader(data string, config ReaderConfig) &Reader {
63return &Reader{
64data: data
65delimiter: config.delimiter
66comment: config.comment
67}
68}
69
70// read reads a row from the CSV data.
71// If successful, the result holds an array of each column's data.
72pub fn (mut r Reader) read() ![]string {
73l := r.read_record()!
74return l
75}
76
77// Once we have multi dimensional array
78// pub fn (mut r Reader) read_all() ?[][]string {
79// mut records := []string{}
80// for {
81// record := r.read_record() or {
82// if err.error == err_eof.error {
83// return records
84// } else {
85// return err
86// }
87// }
88// records << record
89// }
90// return records
91// }
92fn (mut r Reader) read_line() !string {
93// last record
94if r.row_pos >= r.data.len {
95return &EndOfFileError{}
96}
97le := if r.is_mac_pre_osx_le { '\r' } else { '\n' }
98mut i := r.data.index_after(le, r.row_pos)
99if i == -1 {
100if r.row_pos == 0 {
101// check for pre osx mac line endings
102i = r.data.index_after('\r', r.row_pos)
103if i != -1 {
104r.is_mac_pre_osx_le = true
105} else {
106// no valid line endings found
107return &InvalidLineEndingError{}
108}
109} else {
110// No line ending on file
111i = r.data.len
112}
113}
114mut line := r.data[r.row_pos..i]
115r.row_pos = i + 1
116// normalize win line endings (remove extra \r)
117if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len - 1] == `\r`) {
118line = line[..line.len - 1]
119}
120return line
121}
122
123fn (mut r Reader) read_record() ![]string {
124if r.delimiter == r.comment {
125return &CommentIsDelimiterError{}
126}
127if !valid_delim(r.delimiter) {
128return &InvalidDelimiterError{}
129}
130mut need_read := true
131mut keep_raw := false
132mut line := ''
133mut fields := []string{}
134mut i := -1
135for {
136if need_read {
137l := r.read_line()!
138if l.len <= 0 {
139if keep_raw {
140line += '\n'
141}
142continue
143} else if l[0] == r.comment {
144if keep_raw {
145line += '\n' + l
146}
147continue
148} else {
149if keep_raw {
150line += '\n'
151}
152line += l
153}
154need_read = false
155keep_raw = false
156}
157if line.len == 0 || line[0] != `"` { // not quoted
158j := line.index(r.delimiter.ascii_str()) or {
159// last
160fields << line[..line.len]
161break
162}
163i = j
164fields << line[..i]
165line = line[i + 1..]
166continue
167} else { // quoted
168mut need_more := true
169mut has_double_quotes := false
170mut j := 0
171mut n := 1
172for n < line.len {
173if line[n] == `"` {
174if n == line.len - 1 || line[n + 1] != `"` {
175need_more = false
176j = n - 1
177break
178} else {
179has_double_quotes = true
180n++
181}
182}
183n++
184}
185if need_more {
186need_read = true
187keep_raw = true
188continue
189}
190line = line[1..]
191if j + 1 == line.len {
192// last record
193fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
194break
195}
196next := line[j + 1]
197if next == r.delimiter {
198fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
199if j + 2 == line.len {
200line = ''
201} else {
202line = line[j + 2..]
203}
204continue
205}
206}
207if i <= -1 && fields.len == 0 {
208return &InvalidDelimiterError{}
209}
210}
211return fields
212}
213
214fn valid_delim(b u8) bool {
215return b != 0 && b != `"` && b != `\r` && b != `\n`
216}
217