v

Зеркало из https://github.com/vlang/v
Форк
0
/
reader.v 
216 строк · 4.5 Кб
1
// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
2
// Use of this source code is governed by an MIT license
3
// that can be found in the LICENSE file.
4
module csv
5

6
// Once interfaces are further along the idea would be to have something similar to
7
// go's io.reader & bufio.reader rather than reading the whole file into string, this
8
// would then satisfy that interface. I designed it this way to be easily adapted.
9
struct CommentIsDelimiterError {
10
	Error
11
}
12

13
fn (err CommentIsDelimiterError) msg() string {
14
	return 'encoding.csv: comment cannot be the same as delimiter'
15
}
16

17
struct InvalidDelimiterError {
18
	Error
19
}
20

21
fn (err InvalidDelimiterError) msg() string {
22
	return 'encoding.csv: invalid delimiter'
23
}
24

25
struct EndOfFileError {
26
	Error
27
}
28

29
fn (err EndOfFileError) msg() string {
30
	return 'encoding.csv: end of file'
31
}
32

33
struct InvalidLineEndingError {
34
	Error
35
}
36

37
fn (err InvalidLineEndingError) msg() string {
38
	return 'encoding.csv: could not find any valid line endings'
39
}
40

41
struct Reader {
42
	// not used yet
43
	// has_header        bool
44
	// headings          []string
45
	data      string
46
	delimiter u8
47
	comment   u8
48
mut:
49
	is_mac_pre_osx_le bool
50
	row_pos           int
51
}
52

53
@[params]
54
pub struct ReaderConfig {
55
pub:
56
	delimiter u8 = `,`
57
	comment   u8 = `#`
58
}
59

60
// new_reader initializes a Reader with string data to parse and,
61
// optionally, a custom delimiter.
62
pub fn new_reader(data string, config ReaderConfig) &Reader {
63
	return &Reader{
64
		data:      data
65
		delimiter: config.delimiter
66
		comment:   config.comment
67
	}
68
}
69

70
// read reads a row from the CSV data.
71
// If successful, the result holds an array of each column's data.
72
pub fn (mut r Reader) read() ![]string {
73
	l := r.read_record()!
74
	return l
75
}
76

77
// Once we have multi dimensional array
78
// pub fn (mut r Reader) read_all() ?[][]string {
79
// 	mut records := []string{}
80
// 	for {
81
// 		record := r.read_record() or {
82
// 			if err.error == err_eof.error {
83
// 				return records
84
// 			} else {
85
// 				return err
86
// 			}
87
// 		}
88
// 		records << record
89
// 	}
90
// 	return records
91
// }
92
fn (mut r Reader) read_line() !string {
93
	// last record
94
	if r.row_pos >= r.data.len {
95
		return &EndOfFileError{}
96
	}
97
	le := if r.is_mac_pre_osx_le { '\r' } else { '\n' }
98
	mut i := r.data.index_after(le, r.row_pos)
99
	if i == -1 {
100
		if r.row_pos == 0 {
101
			// check for pre osx mac line endings
102
			i = r.data.index_after('\r', r.row_pos)
103
			if i != -1 {
104
				r.is_mac_pre_osx_le = true
105
			} else {
106
				// no valid line endings found
107
				return &InvalidLineEndingError{}
108
			}
109
		} else {
110
			// No line ending on file
111
			i = r.data.len
112
		}
113
	}
114
	mut line := r.data[r.row_pos..i]
115
	r.row_pos = i + 1
116
	// normalize win line endings (remove extra \r)
117
	if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len - 1] == `\r`) {
118
		line = line[..line.len - 1]
119
	}
120
	return line
121
}
122

123
fn (mut r Reader) read_record() ![]string {
124
	if r.delimiter == r.comment {
125
		return &CommentIsDelimiterError{}
126
	}
127
	if !valid_delim(r.delimiter) {
128
		return &InvalidDelimiterError{}
129
	}
130
	mut need_read := true
131
	mut keep_raw := false
132
	mut line := ''
133
	mut fields := []string{}
134
	mut i := -1
135
	for {
136
		if need_read {
137
			l := r.read_line()!
138
			if l.len <= 0 {
139
				if keep_raw {
140
					line += '\n'
141
				}
142
				continue
143
			} else if l[0] == r.comment {
144
				if keep_raw {
145
					line += '\n' + l
146
				}
147
				continue
148
			} else {
149
				if keep_raw {
150
					line += '\n'
151
				}
152
				line += l
153
			}
154
			need_read = false
155
			keep_raw = false
156
		}
157
		if line.len == 0 || line[0] != `"` { // not quoted
158
			j := line.index(r.delimiter.ascii_str()) or {
159
				// last
160
				fields << line[..line.len]
161
				break
162
			}
163
			i = j
164
			fields << line[..i]
165
			line = line[i + 1..]
166
			continue
167
		} else { // quoted
168
			mut need_more := true
169
			mut has_double_quotes := false
170
			mut j := 0
171
			mut n := 1
172
			for n < line.len {
173
				if line[n] == `"` {
174
					if n == line.len - 1 || line[n + 1] != `"` {
175
						need_more = false
176
						j = n - 1
177
						break
178
					} else {
179
						has_double_quotes = true
180
						n++
181
					}
182
				}
183
				n++
184
			}
185
			if need_more {
186
				need_read = true
187
				keep_raw = true
188
				continue
189
			}
190
			line = line[1..]
191
			if j + 1 == line.len {
192
				// last record
193
				fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
194
				break
195
			}
196
			next := line[j + 1]
197
			if next == r.delimiter {
198
				fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
199
				if j + 2 == line.len {
200
					line = ''
201
				} else {
202
					line = line[j + 2..]
203
				}
204
				continue
205
			}
206
		}
207
		if i <= -1 && fields.len == 0 {
208
			return &InvalidDelimiterError{}
209
		}
210
	}
211
	return fields
212
}
213

214
fn valid_delim(b u8) bool {
215
	return b != 0 && b != `"` && b != `\r` && b != `\n`
216
}
217

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.