1
// Copyright 2015 Huan Du. All rights reserved.
2
// Licensed under the MIT license that can be found in the LICENSE file.
11
// Len returns str's utf8 rune length.
12
func Len(str string) int {
13
return utf8.RuneCountInString(str)
16
// WordCount returns number of words in a string.
18
// Word is defined as a locale dependent string containing alphabetic characters,
19
// which may also contain but not start with `'` and `-` characters.
20
func WordCount(str string) int {
27
r, size = utf8.DecodeRuneInString(str)
36
case inWord && (r == '\'' || r == '-'):
49
const minCJKCharacter = '\u3400'
51
// Checks r is a letter but not CJK character.
52
func isAlphabet(r rune) bool {
53
if !unicode.IsLetter(r) {
58
// Quick check for non-CJK character.
59
case r < minCJKCharacter:
62
// Common CJK characters.
63
case r >= '\u4E00' && r <= '\u9FCC':
66
// Rare CJK characters.
67
case r >= '\u3400' && r <= '\u4D85':
70
// Rare and historic CJK characters.
71
case r >= '\U00020000' && r <= '\U0002B81D':
78
// Width returns string width in monotype font.
79
// Multi-byte characters are usually twice the width of single byte characters.
81
// Algorithm comes from `mb_strwidth` in PHP.
82
// http://php.net/manual/en/function.mb-strwidth.php
83
func Width(str string) int {
88
r, size = utf8.DecodeRuneInString(str)
96
// RuneWidth returns character width in monotype font.
97
// Multi-byte characters are usually twice the width of single byte characters.
99
// Algorithm comes from `mb_strwidth` in PHP.
100
// http://php.net/manual/en/function.mb-strwidth.php
101
func RuneWidth(r rune) int {
103
case r == utf8.RuneError || r < '\x20':
106
case '\x20' <= r && r < '\u2000':
109
case '\u2000' <= r && r < '\uFF61':
112
case '\uFF61' <= r && r < '\uFFA0':