1//go:generate go run maketables.go > tables.go 2 3package confusables 4 5import ( 6 "bytes" 7 8 "golang.org/x/text/unicode/norm" 9) 10 11// TODO: document casefolding approaches 12// (suggest to force casefold strings; explain how to catch paypal - pAypal) 13// TODO: DOC you might want to store the Skeleton and check against it later 14// TODO: implement xidmodifications.txt restricted characters 15 16type lookupFunc func(rune) (string) 17 18func lookupReplacement(r rune) string { 19 return confusablesMap[r] 20} 21 22func lookupReplacementTweaked(r rune) string { 23 if replacement, ok := tweaksMap[r]; ok { 24 return replacement 25 } 26 return confusablesMap[r] 27} 28 29func skeletonBase(s string, lookup lookupFunc) string { 30 31 // 1. Converting X to NFD format 32 s = norm.NFD.String(s) 33 34 // 2. Successively mapping each source character in X to the target string 35 // according to the specified data table 36 var buf bytes.Buffer 37 changed := false // fast path: if this remains false, keep s intact 38 prevPos := 0 39 var replacement string 40 for i, r := range s { 41 if changed && replacement == "" { 42 buf.WriteString(s[prevPos:i]) 43 } 44 prevPos = i 45 replacement = lookup(r) 46 if replacement != "" { 47 if !changed { 48 changed = true 49 // first replacement: copy over the previously unmodified text 50 buf.WriteString(s[:i]) 51 } 52 buf.WriteString(replacement) 53 } 54 } 55 if changed && replacement == "" { 56 buf.WriteString(s[prevPos:]) // loop-and-a-half 57 } 58 if changed { 59 s = buf.String() 60 } 61 62 // 3. Reapplying NFD 63 s = norm.NFD.String(s) 64 65 return s 66} 67 68// Skeleton converts a string to its "skeleton" form 69// as described in http://www.unicode.org/reports/tr39/#Confusable_Detection 70func Skeleton(s string) string { 71 return skeletonBase(s, lookupReplacement) 72} 73 74// SkeletonTweaked is like Skeleton, but it implements some custom overrides 75// to the confusables table (currently it removes the m -> rn mapping): 76func SkeletonTweaked(s string) string { 77 return skeletonBase(s, lookupReplacementTweaked) 78} 79 80func Confusable(x, y string) bool { 81 return Skeleton(x) == Skeleton(y) 82} 83