1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package edn
6
7import (
8	"bytes"
9	"unicode/utf8"
10)
11
12const (
13	caseMask     = ^byte(0x20) // Mask to ignore case in ASCII.
14	kelvin       = '\u212a'
15	smallLongEss = '\u017f'
16)
17
18// foldFunc returns one of four different case folding equivalence
19// functions, from most general (and slow) to fastest:
20//
21// 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
22// 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
23// 3) asciiEqualFold, no special, but includes non-letters (including _)
24// 4) simpleLetterEqualFold, no specials, no non-letters.
25//
26// The letters S and K are special because they map to 3 runes, not just 2:
27//  * S maps to s and to U+017F 'ſ' Latin small letter long s
28//  * k maps to K and to U+212A 'K' Kelvin sign
29// See https://play.golang.org/p/tTxjOc0OGo
30//
31// The returned function is specialized for matching against s and
32// should only be given s. It's not curried for performance reasons.
33func foldFunc(s []byte) func(s, t []byte) bool {
34	nonLetter := false
35	special := false // special letter
36	for _, b := range s {
37		if b >= utf8.RuneSelf {
38			return bytes.EqualFold
39		}
40		upper := b & caseMask
41		if upper < 'A' || upper > 'Z' {
42			nonLetter = true
43		} else if upper == 'K' || upper == 'S' {
44			// See above for why these letters are special.
45			special = true
46		}
47	}
48	if special {
49		return equalFoldRight
50	}
51	if nonLetter {
52		return asciiEqualFold
53	}
54	return simpleLetterEqualFold
55}
56
57// equalFoldRight is a specialization of bytes.EqualFold when s is
58// known to be all ASCII (including punctuation), but contains an 's',
59// 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
60// See comments on foldFunc.
61func equalFoldRight(s, t []byte) bool {
62	for _, sb := range s {
63		if len(t) == 0 {
64			return false
65		}
66		tb := t[0]
67		if tb < utf8.RuneSelf {
68			if sb != tb {
69				sbUpper := sb & caseMask
70				if 'A' <= sbUpper && sbUpper <= 'Z' {
71					if sbUpper != tb&caseMask {
72						return false
73					}
74				} else {
75					return false
76				}
77			}
78			t = t[1:]
79			continue
80		}
81		// sb is ASCII and t is not. t must be either kelvin
82		// sign or long s; sb must be s, S, k, or K.
83		tr, size := utf8.DecodeRune(t)
84		switch sb {
85		case 's', 'S':
86			if tr != smallLongEss {
87				return false
88			}
89		case 'k', 'K':
90			if tr != kelvin {
91				return false
92			}
93		default:
94			return false
95		}
96		t = t[size:]
97
98	}
99	if len(t) > 0 {
100		return false
101	}
102	return true
103}
104
105// asciiEqualFold is a specialization of bytes.EqualFold for use when
106// s is all ASCII (but may contain non-letters) and contains no
107// special-folding letters.
108// See comments on foldFunc.
109func asciiEqualFold(s, t []byte) bool {
110	if len(s) != len(t) {
111		return false
112	}
113	for i, sb := range s {
114		tb := t[i]
115		if sb == tb {
116			continue
117		}
118		if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
119			if sb&caseMask != tb&caseMask {
120				return false
121			}
122		} else {
123			return false
124		}
125	}
126	return true
127}
128
129// simpleLetterEqualFold is a specialization of bytes.EqualFold for
130// use when s is all ASCII letters (no underscores, etc) and also
131// doesn't contain 'k', 'K', 's', or 'S'.
132// See comments on foldFunc.
133func simpleLetterEqualFold(s, t []byte) bool {
134	if len(s) != len(t) {
135		return false
136	}
137	for i, b := range s {
138		if b&caseMask != t[i]&caseMask {
139			return false
140		}
141	}
142	return true
143}
144