podman
1// Copyright 2015 Huan Du. All rights reserved.
2// Licensed under the MIT license that can be found in the LICENSE file.
3
4package xstrings5
6import (7"unicode"8"unicode/utf8"9)
10
11type runeRangeMap struct {12FromLo rune // Lower bound of range map.13FromHi rune // An inclusive higher bound of range map.14ToLo rune15ToHi rune16}
17
18type runeDict struct {19Dict [unicode.MaxASCII + 1]rune20}
21
22type runeMap map[rune]rune23
24// Translator can translate string with pre-compiled from and to patterns.
25// If a from/to pattern pair needs to be used more than once, it's recommended
26// to create a Translator and reuse it.
27type Translator struct {28quickDict *runeDict // A quick dictionary to look up rune by index. Only available for latin runes.29runeMap runeMap // Rune map for translation.30ranges []*runeRangeMap // Ranges of runes.31mappedRune rune // If mappedRune >= 0, all matched runes are translated to the mappedRune.32reverted bool // If to pattern is empty, all matched characters will be deleted.33hasPattern bool34}
35
36// NewTranslator creates new Translator through a from/to pattern pair.
37func NewTranslator(from, to string) *Translator {38tr := &Translator{}39
40if from == "" {41return tr42}43
44reverted := from[0] == '^'45deletion := len(to) == 046
47if reverted {48from = from[1:]49}50
51var fromStart, fromEnd, fromRangeStep rune52var toStart, toEnd, toRangeStep rune53var fromRangeSize, toRangeSize rune54var singleRunes []rune55
56// Update the to rune range.57updateRange := func() {58// No more rune to read in the to rune pattern.59if toEnd == utf8.RuneError {60return61}62
63if toRangeStep == 0 {64to, toStart, toEnd, toRangeStep = nextRuneRange(to, toEnd)65return66}67
68// Current range is not empty. Consume 1 rune from start.69if toStart != toEnd {70toStart += toRangeStep71return72}73
74// No more rune. Repeat the last rune.75if to == "" {76toEnd = utf8.RuneError77return78}79
80// Both start and end are used. Read two more runes from the to pattern.81to, toStart, toEnd, toRangeStep = nextRuneRange(to, utf8.RuneError)82}83
84if deletion {85toStart = utf8.RuneError86toEnd = utf8.RuneError87} else {88// If from pattern is reverted, only the last rune in the to pattern will be used.89if reverted {90var size int91
92for len(to) > 0 {93toStart, size = utf8.DecodeRuneInString(to)94to = to[size:]95}96
97toEnd = utf8.RuneError98} else {99to, toStart, toEnd, toRangeStep = nextRuneRange(to, utf8.RuneError)100}101}102
103fromEnd = utf8.RuneError104
105for len(from) > 0 {106from, fromStart, fromEnd, fromRangeStep = nextRuneRange(from, fromEnd)107
108// fromStart is a single character. Just map it with a rune in the to pattern.109if fromRangeStep == 0 {110singleRunes = tr.addRune(fromStart, toStart, singleRunes)111updateRange()112continue113}114
115for toEnd != utf8.RuneError && fromStart != fromEnd {116// If mapped rune is a single character instead of a range, simply shift first117// rune in the range.118if toRangeStep == 0 {119singleRunes = tr.addRune(fromStart, toStart, singleRunes)120updateRange()121fromStart += fromRangeStep122continue123}124
125fromRangeSize = (fromEnd - fromStart) * fromRangeStep126toRangeSize = (toEnd - toStart) * toRangeStep127
128// Not enough runes in the to pattern. Need to read more.129if fromRangeSize > toRangeSize {130fromStart, toStart = tr.addRuneRange(fromStart, fromStart+toRangeSize*fromRangeStep, toStart, toEnd, singleRunes)131fromStart += fromRangeStep132updateRange()133
134// Edge case: If fromRangeSize == toRangeSize + 1, the last fromStart value needs be considered135// as a single rune.136if fromStart == fromEnd {137singleRunes = tr.addRune(fromStart, toStart, singleRunes)138updateRange()139}140
141continue142}143
144fromStart, toStart = tr.addRuneRange(fromStart, fromEnd, toStart, toStart+fromRangeSize*toRangeStep, singleRunes)145updateRange()146break147}148
149if fromStart == fromEnd {150fromEnd = utf8.RuneError151continue152}153
154_, toStart = tr.addRuneRange(fromStart, fromEnd, toStart, toStart, singleRunes)155fromEnd = utf8.RuneError156}157
158if fromEnd != utf8.RuneError {159tr.addRune(fromEnd, toStart, singleRunes)160}161
162tr.reverted = reverted163tr.mappedRune = -1164tr.hasPattern = true165
166// Translate RuneError only if in deletion or reverted mode.167if deletion || reverted {168tr.mappedRune = toStart169}170
171return tr172}
173
174func (tr *Translator) addRune(from, to rune, singleRunes []rune) []rune {175if from <= unicode.MaxASCII {176if tr.quickDict == nil {177tr.quickDict = &runeDict{}178}179
180tr.quickDict.Dict[from] = to181} else {182if tr.runeMap == nil {183tr.runeMap = make(runeMap)184}185
186tr.runeMap[from] = to187}188
189singleRunes = append(singleRunes, from)190return singleRunes191}
192
193func (tr *Translator) addRuneRange(fromLo, fromHi, toLo, toHi rune, singleRunes []rune) (rune, rune) {194var r rune195var rrm *runeRangeMap196
197if fromLo < fromHi {198rrm = &runeRangeMap{199FromLo: fromLo,200FromHi: fromHi,201ToLo: toLo,202ToHi: toHi,203}204} else {205rrm = &runeRangeMap{206FromLo: fromHi,207FromHi: fromLo,208ToLo: toHi,209ToHi: toLo,210}211}212
213// If there is any single rune conflicts with this rune range, clear single rune record.214for _, r = range singleRunes {215if rrm.FromLo <= r && r <= rrm.FromHi {216if r <= unicode.MaxASCII {217tr.quickDict.Dict[r] = 0218} else {219delete(tr.runeMap, r)220}221}222}223
224tr.ranges = append(tr.ranges, rrm)225return fromHi, toHi226}
227
228func nextRuneRange(str string, last rune) (remaining string, start, end rune, rangeStep rune) {229var r rune230var size int231
232remaining = str233escaping := false234isRange := false235
236for len(remaining) > 0 {237r, size = utf8.DecodeRuneInString(remaining)238remaining = remaining[size:]239
240// Parse special characters.241if !escaping {242if r == '\\' {243escaping = true244continue245}246
247if r == '-' {248// Ignore slash at beginning of string.249if last == utf8.RuneError {250continue251}252
253start = last254isRange = true255continue256}257}258
259escaping = false260
261if last != utf8.RuneError {262// This is a range which start and end are the same.263// Considier it as a normal character.264if isRange && last == r {265isRange = false266continue267}268
269start = last270end = r271
272if isRange {273if start < end {274rangeStep = 1275} else {276rangeStep = -1277}278}279
280return281}282
283last = r284}285
286start = last287end = utf8.RuneError288return289}
290
291// Translate str with a from/to pattern pair.
292//
293// See comment in Translate function for usage and samples.
294func (tr *Translator) Translate(str string) string {295if !tr.hasPattern || str == "" {296return str297}298
299var r rune300var size int301var needTr bool302
303orig := str304
305var output *stringBuilder306
307for len(str) > 0 {308r, size = utf8.DecodeRuneInString(str)309r, needTr = tr.TranslateRune(r)310
311if needTr && output == nil {312output = allocBuffer(orig, str)313}314
315if r != utf8.RuneError && output != nil {316output.WriteRune(r)317}318
319str = str[size:]320}321
322// No character is translated.323if output == nil {324return orig325}326
327return output.String()328}
329
330// TranslateRune return translated rune and true if r matches the from pattern.
331// If r doesn't match the pattern, original r is returned and translated is false.
332func (tr *Translator) TranslateRune(r rune) (result rune, translated bool) {333switch {334case tr.quickDict != nil:335if r <= unicode.MaxASCII {336result = tr.quickDict.Dict[r]337
338if result != 0 {339translated = true340
341if tr.mappedRune >= 0 {342result = tr.mappedRune343}344
345break346}347}348
349fallthrough350
351case tr.runeMap != nil:352var ok bool353
354if result, ok = tr.runeMap[r]; ok {355translated = true356
357if tr.mappedRune >= 0 {358result = tr.mappedRune359}360
361break362}363
364fallthrough365
366default:367var rrm *runeRangeMap368ranges := tr.ranges369
370for i := len(ranges) - 1; i >= 0; i-- {371rrm = ranges[i]372
373if rrm.FromLo <= r && r <= rrm.FromHi {374translated = true375
376if tr.mappedRune >= 0 {377result = tr.mappedRune378break379}380
381if rrm.ToLo < rrm.ToHi {382result = rrm.ToLo + r - rrm.FromLo383} else if rrm.ToLo > rrm.ToHi {384// ToHi can be smaller than ToLo if range is from higher to lower.385result = rrm.ToLo - r + rrm.FromLo386} else {387result = rrm.ToLo388}389
390break391}392}393}394
395if tr.reverted {396if !translated {397result = tr.mappedRune398}399
400translated = !translated401}402
403if !translated {404result = r405}406
407return408}
409
410// HasPattern returns true if Translator has one pattern at least.
411func (tr *Translator) HasPattern() bool {412return tr.hasPattern413}
414
415// Translate str with the characters defined in from replaced by characters defined in to.
416//
417// From and to are patterns representing a set of characters. Pattern is defined as following.
418//
419// * Special characters
420// * '-' means a range of runes, e.g.
421// * "a-z" means all characters from 'a' to 'z' inclusive;
422// * "z-a" means all characters from 'z' to 'a' inclusive.
423// * '^' as first character means a set of all runes excepted listed, e.g.
424// * "^a-z" means all characters except 'a' to 'z' inclusive.
425// * '\' escapes special characters.
426// * Normal character represents itself, e.g. "abc" is a set including 'a', 'b' and 'c'.
427//
428// Translate will try to find a 1:1 mapping from from to to.
429// If to is smaller than from, last rune in to will be used to map "out of range" characters in from.
430//
431// Note that '^' only works in the from pattern. It will be considered as a normal character in the to pattern.
432//
433// If the to pattern is an empty string, Translate works exactly the same as Delete.
434//
435// Samples:
436// Translate("hello", "aeiou", "12345") => "h2ll4"
437// Translate("hello", "a-z", "A-Z") => "HELLO"
438// Translate("hello", "z-a", "a-z") => "svool"
439// Translate("hello", "aeiou", "*") => "h*ll*"
440// Translate("hello", "^l", "*") => "**ll*"
441// Translate("hello ^ world", `\^lo`, "*") => "he*** * w*r*d"
442func Translate(str, from, to string) string {443tr := NewTranslator(from, to)444return tr.Translate(str)445}
446
447// Delete runes in str matching the pattern.
448// Pattern is defined in Translate function.
449//
450// Samples:
451// Delete("hello", "aeiou") => "hll"
452// Delete("hello", "a-k") => "llo"
453// Delete("hello", "^a-k") => "he"
454func Delete(str, pattern string) string {455tr := NewTranslator(pattern, "")456return tr.Translate(str)457}
458
459// Count how many runes in str match the pattern.
460// Pattern is defined in Translate function.
461//
462// Samples:
463// Count("hello", "aeiou") => 3
464// Count("hello", "a-k") => 3
465// Count("hello", "^a-k") => 2
466func Count(str, pattern string) int {467if pattern == "" || str == "" {468return 0469}470
471var r rune472var size int473var matched bool474
475tr := NewTranslator(pattern, "")476cnt := 0477
478for len(str) > 0 {479r, size = utf8.DecodeRuneInString(str)480str = str[size:]481
482if _, matched = tr.TranslateRune(r); matched {483cnt++484}485}486
487return cnt488}
489
490// Squeeze deletes adjacent repeated runes in str.
491// If pattern is not empty, only runes matching the pattern will be squeezed.
492//
493// Samples:
494// Squeeze("hello", "") => "helo"
495// Squeeze("hello", "m-z") => "hello"
496// Squeeze("hello world", " ") => "hello world"
497func Squeeze(str, pattern string) string {498var last, r rune499var size int500var skipSqueeze, matched bool501var tr *Translator502var output *stringBuilder503
504orig := str505last = -1506
507if len(pattern) > 0 {508tr = NewTranslator(pattern, "")509}510
511for len(str) > 0 {512r, size = utf8.DecodeRuneInString(str)513
514// Need to squeeze the str.515if last == r && !skipSqueeze {516if tr != nil {517if _, matched = tr.TranslateRune(r); !matched {518skipSqueeze = true519}520}521
522if output == nil {523output = allocBuffer(orig, str)524}525
526if skipSqueeze {527output.WriteRune(r)528}529} else {530if output != nil {531output.WriteRune(r)532}533
534last = r535skipSqueeze = false536}537
538str = str[size:]539}540
541if output == nil {542return orig543}544
545return output.String()546}
547