podman

Форк
0
546 строк · 11.8 Кб
1
// Copyright 2015 Huan Du. All rights reserved.
2
// Licensed under the MIT license that can be found in the LICENSE file.
3

4
package xstrings
5

6
import (
7
	"unicode"
8
	"unicode/utf8"
9
)
10

11
type runeRangeMap struct {
12
	FromLo rune // Lower bound of range map.
13
	FromHi rune // An inclusive higher bound of range map.
14
	ToLo   rune
15
	ToHi   rune
16
}
17

18
type runeDict struct {
19
	Dict [unicode.MaxASCII + 1]rune
20
}
21

22
type runeMap map[rune]rune
23

24
// Translator can translate string with pre-compiled from and to patterns.
25
// If a from/to pattern pair needs to be used more than once, it's recommended
26
// to create a Translator and reuse it.
27
type Translator struct {
28
	quickDict  *runeDict       // A quick dictionary to look up rune by index. Only available for latin runes.
29
	runeMap    runeMap         // Rune map for translation.
30
	ranges     []*runeRangeMap // Ranges of runes.
31
	mappedRune rune            // If mappedRune >= 0, all matched runes are translated to the mappedRune.
32
	reverted   bool            // If to pattern is empty, all matched characters will be deleted.
33
	hasPattern bool
34
}
35

36
// NewTranslator creates new Translator through a from/to pattern pair.
37
func NewTranslator(from, to string) *Translator {
38
	tr := &Translator{}
39

40
	if from == "" {
41
		return tr
42
	}
43

44
	reverted := from[0] == '^'
45
	deletion := len(to) == 0
46

47
	if reverted {
48
		from = from[1:]
49
	}
50

51
	var fromStart, fromEnd, fromRangeStep rune
52
	var toStart, toEnd, toRangeStep rune
53
	var fromRangeSize, toRangeSize rune
54
	var singleRunes []rune
55

56
	// Update the to rune range.
57
	updateRange := func() {
58
		// No more rune to read in the to rune pattern.
59
		if toEnd == utf8.RuneError {
60
			return
61
		}
62

63
		if toRangeStep == 0 {
64
			to, toStart, toEnd, toRangeStep = nextRuneRange(to, toEnd)
65
			return
66
		}
67

68
		// Current range is not empty. Consume 1 rune from start.
69
		if toStart != toEnd {
70
			toStart += toRangeStep
71
			return
72
		}
73

74
		// No more rune. Repeat the last rune.
75
		if to == "" {
76
			toEnd = utf8.RuneError
77
			return
78
		}
79

80
		// Both start and end are used. Read two more runes from the to pattern.
81
		to, toStart, toEnd, toRangeStep = nextRuneRange(to, utf8.RuneError)
82
	}
83

84
	if deletion {
85
		toStart = utf8.RuneError
86
		toEnd = utf8.RuneError
87
	} else {
88
		// If from pattern is reverted, only the last rune in the to pattern will be used.
89
		if reverted {
90
			var size int
91

92
			for len(to) > 0 {
93
				toStart, size = utf8.DecodeRuneInString(to)
94
				to = to[size:]
95
			}
96

97
			toEnd = utf8.RuneError
98
		} else {
99
			to, toStart, toEnd, toRangeStep = nextRuneRange(to, utf8.RuneError)
100
		}
101
	}
102

103
	fromEnd = utf8.RuneError
104

105
	for len(from) > 0 {
106
		from, fromStart, fromEnd, fromRangeStep = nextRuneRange(from, fromEnd)
107

108
		// fromStart is a single character. Just map it with a rune in the to pattern.
109
		if fromRangeStep == 0 {
110
			singleRunes = tr.addRune(fromStart, toStart, singleRunes)
111
			updateRange()
112
			continue
113
		}
114

115
		for toEnd != utf8.RuneError && fromStart != fromEnd {
116
			// If mapped rune is a single character instead of a range, simply shift first
117
			// rune in the range.
118
			if toRangeStep == 0 {
119
				singleRunes = tr.addRune(fromStart, toStart, singleRunes)
120
				updateRange()
121
				fromStart += fromRangeStep
122
				continue
123
			}
124

125
			fromRangeSize = (fromEnd - fromStart) * fromRangeStep
126
			toRangeSize = (toEnd - toStart) * toRangeStep
127

128
			// Not enough runes in the to pattern. Need to read more.
129
			if fromRangeSize > toRangeSize {
130
				fromStart, toStart = tr.addRuneRange(fromStart, fromStart+toRangeSize*fromRangeStep, toStart, toEnd, singleRunes)
131
				fromStart += fromRangeStep
132
				updateRange()
133

134
				// Edge case: If fromRangeSize == toRangeSize + 1, the last fromStart value needs be considered
135
				// as a single rune.
136
				if fromStart == fromEnd {
137
					singleRunes = tr.addRune(fromStart, toStart, singleRunes)
138
					updateRange()
139
				}
140

141
				continue
142
			}
143

144
			fromStart, toStart = tr.addRuneRange(fromStart, fromEnd, toStart, toStart+fromRangeSize*toRangeStep, singleRunes)
145
			updateRange()
146
			break
147
		}
148

149
		if fromStart == fromEnd {
150
			fromEnd = utf8.RuneError
151
			continue
152
		}
153

154
		_, toStart = tr.addRuneRange(fromStart, fromEnd, toStart, toStart, singleRunes)
155
		fromEnd = utf8.RuneError
156
	}
157

158
	if fromEnd != utf8.RuneError {
159
		tr.addRune(fromEnd, toStart, singleRunes)
160
	}
161

162
	tr.reverted = reverted
163
	tr.mappedRune = -1
164
	tr.hasPattern = true
165

166
	// Translate RuneError only if in deletion or reverted mode.
167
	if deletion || reverted {
168
		tr.mappedRune = toStart
169
	}
170

171
	return tr
172
}
173

174
func (tr *Translator) addRune(from, to rune, singleRunes []rune) []rune {
175
	if from <= unicode.MaxASCII {
176
		if tr.quickDict == nil {
177
			tr.quickDict = &runeDict{}
178
		}
179

180
		tr.quickDict.Dict[from] = to
181
	} else {
182
		if tr.runeMap == nil {
183
			tr.runeMap = make(runeMap)
184
		}
185

186
		tr.runeMap[from] = to
187
	}
188

189
	singleRunes = append(singleRunes, from)
190
	return singleRunes
191
}
192

193
func (tr *Translator) addRuneRange(fromLo, fromHi, toLo, toHi rune, singleRunes []rune) (rune, rune) {
194
	var r rune
195
	var rrm *runeRangeMap
196

197
	if fromLo < fromHi {
198
		rrm = &runeRangeMap{
199
			FromLo: fromLo,
200
			FromHi: fromHi,
201
			ToLo:   toLo,
202
			ToHi:   toHi,
203
		}
204
	} else {
205
		rrm = &runeRangeMap{
206
			FromLo: fromHi,
207
			FromHi: fromLo,
208
			ToLo:   toHi,
209
			ToHi:   toLo,
210
		}
211
	}
212

213
	// If there is any single rune conflicts with this rune range, clear single rune record.
214
	for _, r = range singleRunes {
215
		if rrm.FromLo <= r && r <= rrm.FromHi {
216
			if r <= unicode.MaxASCII {
217
				tr.quickDict.Dict[r] = 0
218
			} else {
219
				delete(tr.runeMap, r)
220
			}
221
		}
222
	}
223

224
	tr.ranges = append(tr.ranges, rrm)
225
	return fromHi, toHi
226
}
227

228
func nextRuneRange(str string, last rune) (remaining string, start, end rune, rangeStep rune) {
229
	var r rune
230
	var size int
231

232
	remaining = str
233
	escaping := false
234
	isRange := false
235

236
	for len(remaining) > 0 {
237
		r, size = utf8.DecodeRuneInString(remaining)
238
		remaining = remaining[size:]
239

240
		// Parse special characters.
241
		if !escaping {
242
			if r == '\\' {
243
				escaping = true
244
				continue
245
			}
246

247
			if r == '-' {
248
				// Ignore slash at beginning of string.
249
				if last == utf8.RuneError {
250
					continue
251
				}
252

253
				start = last
254
				isRange = true
255
				continue
256
			}
257
		}
258

259
		escaping = false
260

261
		if last != utf8.RuneError {
262
			// This is a range which start and end are the same.
263
			// Considier it as a normal character.
264
			if isRange && last == r {
265
				isRange = false
266
				continue
267
			}
268

269
			start = last
270
			end = r
271

272
			if isRange {
273
				if start < end {
274
					rangeStep = 1
275
				} else {
276
					rangeStep = -1
277
				}
278
			}
279

280
			return
281
		}
282

283
		last = r
284
	}
285

286
	start = last
287
	end = utf8.RuneError
288
	return
289
}
290

291
// Translate str with a from/to pattern pair.
292
//
293
// See comment in Translate function for usage and samples.
294
func (tr *Translator) Translate(str string) string {
295
	if !tr.hasPattern || str == "" {
296
		return str
297
	}
298

299
	var r rune
300
	var size int
301
	var needTr bool
302

303
	orig := str
304

305
	var output *stringBuilder
306

307
	for len(str) > 0 {
308
		r, size = utf8.DecodeRuneInString(str)
309
		r, needTr = tr.TranslateRune(r)
310

311
		if needTr && output == nil {
312
			output = allocBuffer(orig, str)
313
		}
314

315
		if r != utf8.RuneError && output != nil {
316
			output.WriteRune(r)
317
		}
318

319
		str = str[size:]
320
	}
321

322
	// No character is translated.
323
	if output == nil {
324
		return orig
325
	}
326

327
	return output.String()
328
}
329

330
// TranslateRune return translated rune and true if r matches the from pattern.
331
// If r doesn't match the pattern, original r is returned and translated is false.
332
func (tr *Translator) TranslateRune(r rune) (result rune, translated bool) {
333
	switch {
334
	case tr.quickDict != nil:
335
		if r <= unicode.MaxASCII {
336
			result = tr.quickDict.Dict[r]
337

338
			if result != 0 {
339
				translated = true
340

341
				if tr.mappedRune >= 0 {
342
					result = tr.mappedRune
343
				}
344

345
				break
346
			}
347
		}
348

349
		fallthrough
350

351
	case tr.runeMap != nil:
352
		var ok bool
353

354
		if result, ok = tr.runeMap[r]; ok {
355
			translated = true
356

357
			if tr.mappedRune >= 0 {
358
				result = tr.mappedRune
359
			}
360

361
			break
362
		}
363

364
		fallthrough
365

366
	default:
367
		var rrm *runeRangeMap
368
		ranges := tr.ranges
369

370
		for i := len(ranges) - 1; i >= 0; i-- {
371
			rrm = ranges[i]
372

373
			if rrm.FromLo <= r && r <= rrm.FromHi {
374
				translated = true
375

376
				if tr.mappedRune >= 0 {
377
					result = tr.mappedRune
378
					break
379
				}
380

381
				if rrm.ToLo < rrm.ToHi {
382
					result = rrm.ToLo + r - rrm.FromLo
383
				} else if rrm.ToLo > rrm.ToHi {
384
					// ToHi can be smaller than ToLo if range is from higher to lower.
385
					result = rrm.ToLo - r + rrm.FromLo
386
				} else {
387
					result = rrm.ToLo
388
				}
389

390
				break
391
			}
392
		}
393
	}
394

395
	if tr.reverted {
396
		if !translated {
397
			result = tr.mappedRune
398
		}
399

400
		translated = !translated
401
	}
402

403
	if !translated {
404
		result = r
405
	}
406

407
	return
408
}
409

410
// HasPattern returns true if Translator has one pattern at least.
411
func (tr *Translator) HasPattern() bool {
412
	return tr.hasPattern
413
}
414

415
// Translate str with the characters defined in from replaced by characters defined in to.
416
//
417
// From and to are patterns representing a set of characters. Pattern is defined as following.
418
//
419
//     * Special characters
420
//       * '-' means a range of runes, e.g.
421
//         * "a-z" means all characters from 'a' to 'z' inclusive;
422
//         * "z-a" means all characters from 'z' to 'a' inclusive.
423
//       * '^' as first character means a set of all runes excepted listed, e.g.
424
//         * "^a-z" means all characters except 'a' to 'z' inclusive.
425
//       * '\' escapes special characters.
426
//     * Normal character represents itself, e.g. "abc" is a set including 'a', 'b' and 'c'.
427
//
428
// Translate will try to find a 1:1 mapping from from to to.
429
// If to is smaller than from, last rune in to will be used to map "out of range" characters in from.
430
//
431
// Note that '^' only works in the from pattern. It will be considered as a normal character in the to pattern.
432
//
433
// If the to pattern is an empty string, Translate works exactly the same as Delete.
434
//
435
// Samples:
436
//     Translate("hello", "aeiou", "12345")    => "h2ll4"
437
//     Translate("hello", "a-z", "A-Z")        => "HELLO"
438
//     Translate("hello", "z-a", "a-z")        => "svool"
439
//     Translate("hello", "aeiou", "*")        => "h*ll*"
440
//     Translate("hello", "^l", "*")           => "**ll*"
441
//     Translate("hello ^ world", `\^lo`, "*") => "he*** * w*r*d"
442
func Translate(str, from, to string) string {
443
	tr := NewTranslator(from, to)
444
	return tr.Translate(str)
445
}
446

447
// Delete runes in str matching the pattern.
448
// Pattern is defined in Translate function.
449
//
450
// Samples:
451
//     Delete("hello", "aeiou") => "hll"
452
//     Delete("hello", "a-k")   => "llo"
453
//     Delete("hello", "^a-k")  => "he"
454
func Delete(str, pattern string) string {
455
	tr := NewTranslator(pattern, "")
456
	return tr.Translate(str)
457
}
458

459
// Count how many runes in str match the pattern.
460
// Pattern is defined in Translate function.
461
//
462
// Samples:
463
//     Count("hello", "aeiou") => 3
464
//     Count("hello", "a-k")   => 3
465
//     Count("hello", "^a-k")  => 2
466
func Count(str, pattern string) int {
467
	if pattern == "" || str == "" {
468
		return 0
469
	}
470

471
	var r rune
472
	var size int
473
	var matched bool
474

475
	tr := NewTranslator(pattern, "")
476
	cnt := 0
477

478
	for len(str) > 0 {
479
		r, size = utf8.DecodeRuneInString(str)
480
		str = str[size:]
481

482
		if _, matched = tr.TranslateRune(r); matched {
483
			cnt++
484
		}
485
	}
486

487
	return cnt
488
}
489

490
// Squeeze deletes adjacent repeated runes in str.
491
// If pattern is not empty, only runes matching the pattern will be squeezed.
492
//
493
// Samples:
494
//     Squeeze("hello", "")             => "helo"
495
//     Squeeze("hello", "m-z")          => "hello"
496
//     Squeeze("hello   world", " ")    => "hello world"
497
func Squeeze(str, pattern string) string {
498
	var last, r rune
499
	var size int
500
	var skipSqueeze, matched bool
501
	var tr *Translator
502
	var output *stringBuilder
503

504
	orig := str
505
	last = -1
506

507
	if len(pattern) > 0 {
508
		tr = NewTranslator(pattern, "")
509
	}
510

511
	for len(str) > 0 {
512
		r, size = utf8.DecodeRuneInString(str)
513

514
		// Need to squeeze the str.
515
		if last == r && !skipSqueeze {
516
			if tr != nil {
517
				if _, matched = tr.TranslateRune(r); !matched {
518
					skipSqueeze = true
519
				}
520
			}
521

522
			if output == nil {
523
				output = allocBuffer(orig, str)
524
			}
525

526
			if skipSqueeze {
527
				output.WriteRune(r)
528
			}
529
		} else {
530
			if output != nil {
531
				output.WriteRune(r)
532
			}
533

534
			last = r
535
			skipSqueeze = false
536
		}
537

538
		str = str[size:]
539
	}
540

541
	if output == nil {
542
		return orig
543
	}
544

545
	return output.String()
546
}
547

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.