1// Copyright 2016 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package fields
16
17// This file was copied from https://go.googlesource.com/go/+/go1.7.3/src/encoding/json/fold.go.
18// Only the license and package were changed.
19
20import (
21	"bytes"
22	"unicode/utf8"
23)
24
25const (
26	caseMask     = ^byte(0x20) // Mask to ignore case in ASCII.
27	kelvin       = '\u212a'
28	smallLongEss = '\u017f'
29)
30
31// foldFunc returns one of four different case folding equivalence
32// functions, from most general (and slow) to fastest:
33//
34// 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
35// 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
36// 3) asciiEqualFold, no special, but includes non-letters (including _)
37// 4) simpleLetterEqualFold, no specials, no non-letters.
38//
39// The letters S and K are special because they map to 3 runes, not just 2:
40//  * S maps to s and to U+017F 'ſ' Latin small letter long s
41//  * k maps to K and to U+212A 'K' Kelvin sign
42// See https://play.golang.org/p/tTxjOc0OGo
43//
44// The returned function is specialized for matching against s and
45// should only be given s. It's not curried for performance reasons.
46func foldFunc(s []byte) func(s, t []byte) bool {
47	nonLetter := false
48	special := false // special letter
49	for _, b := range s {
50		if b >= utf8.RuneSelf {
51			return bytes.EqualFold
52		}
53		upper := b & caseMask
54		if upper < 'A' || upper > 'Z' {
55			nonLetter = true
56		} else if upper == 'K' || upper == 'S' {
57			// See above for why these letters are special.
58			special = true
59		}
60	}
61	if special {
62		return equalFoldRight
63	}
64	if nonLetter {
65		return asciiEqualFold
66	}
67	return simpleLetterEqualFold
68}
69
70// equalFoldRight is a specialization of bytes.EqualFold when s is
71// known to be all ASCII (including punctuation), but contains an 's',
72// 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
73// See comments on foldFunc.
74func equalFoldRight(s, t []byte) bool {
75	for _, sb := range s {
76		if len(t) == 0 {
77			return false
78		}
79		tb := t[0]
80		if tb < utf8.RuneSelf {
81			if sb != tb {
82				sbUpper := sb & caseMask
83				if 'A' <= sbUpper && sbUpper <= 'Z' {
84					if sbUpper != tb&caseMask {
85						return false
86					}
87				} else {
88					return false
89				}
90			}
91			t = t[1:]
92			continue
93		}
94		// sb is ASCII and t is not. t must be either kelvin
95		// sign or long s; sb must be s, S, k, or K.
96		tr, size := utf8.DecodeRune(t)
97		switch sb {
98		case 's', 'S':
99			if tr != smallLongEss {
100				return false
101			}
102		case 'k', 'K':
103			if tr != kelvin {
104				return false
105			}
106		default:
107			return false
108		}
109		t = t[size:]
110
111	}
112	if len(t) > 0 {
113		return false
114	}
115	return true
116}
117
118// asciiEqualFold is a specialization of bytes.EqualFold for use when
119// s is all ASCII (but may contain non-letters) and contains no
120// special-folding letters.
121// See comments on foldFunc.
122func asciiEqualFold(s, t []byte) bool {
123	if len(s) != len(t) {
124		return false
125	}
126	for i, sb := range s {
127		tb := t[i]
128		if sb == tb {
129			continue
130		}
131		if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
132			if sb&caseMask != tb&caseMask {
133				return false
134			}
135		} else {
136			return false
137		}
138	}
139	return true
140}
141
142// simpleLetterEqualFold is a specialization of bytes.EqualFold for
143// use when s is all ASCII letters (no underscores, etc) and also
144// doesn't contain 'k', 'K', 's', or 'S'.
145// See comments on foldFunc.
146func simpleLetterEqualFold(s, t []byte) bool {
147	if len(s) != len(t) {
148		return false
149	}
150	for i, b := range s {
151		if b&caseMask != t[i]&caseMask {
152			return false
153		}
154	}
155	return true
156}
157