1//go:generate go run maketables.go > tables.go
2
3package confusables
4
5import (
6	"bytes"
7
8	"golang.org/x/text/unicode/norm"
9)
10
11// TODO: document casefolding approaches
12// (suggest to force casefold strings; explain how to catch paypal - pAypal)
13// TODO: DOC you might want to store the Skeleton and check against it later
14// TODO: implement xidmodifications.txt restricted characters
15
16type lookupFunc func(rune) (string)
17
18func lookupReplacement(r rune) string {
19	return confusablesMap[r]
20}
21
22func lookupReplacementTweaked(r rune) string {
23	if replacement, ok := tweaksMap[r]; ok {
24		return replacement
25	}
26	return confusablesMap[r]
27}
28
29func skeletonBase(s string, lookup lookupFunc) string {
30
31	// 1. Converting X to NFD format
32	s = norm.NFD.String(s)
33
34	// 2. Successively mapping each source character in X to the target string
35	// according to the specified data table
36	var buf bytes.Buffer
37	changed := false // fast path: if this remains false, keep s intact
38	prevPos := 0
39	var replacement string
40	for i, r := range s {
41		if changed && replacement == "" {
42			buf.WriteString(s[prevPos:i])
43		}
44		prevPos = i
45		replacement = lookup(r)
46		if replacement != "" {
47			if !changed {
48				changed = true
49				// first replacement: copy over the previously unmodified text
50				buf.WriteString(s[:i])
51			}
52			buf.WriteString(replacement)
53		}
54	}
55	if changed && replacement == "" {
56		buf.WriteString(s[prevPos:]) // loop-and-a-half
57	}
58	if changed {
59		s = buf.String()
60	}
61
62	// 3. Reapplying NFD
63	s = norm.NFD.String(s)
64
65	return s
66}
67
68// Skeleton converts a string to its "skeleton" form
69// as described in http://www.unicode.org/reports/tr39/#Confusable_Detection
70func Skeleton(s string) string {
71	return skeletonBase(s, lookupReplacement)
72}
73
74// SkeletonTweaked is like Skeleton, but it implements some custom overrides
75// to the confusables table (currently it removes the m -> rn mapping):
76func SkeletonTweaked(s string) string {
77	return skeletonBase(s, lookupReplacementTweaked)
78}
79
80func Confusable(x, y string) bool {
81	return Skeleton(x) == Skeleton(y)
82}
83