1// Copyright (c) 2012-2014 Jeremy Latt
2// Copyright (c) 2014-2015 Edmund Huber
3// Copyright (c) 2016-2017 Daniel Oaks <daniel@danieloaks.net>
4// released under the MIT license
5
6package irc
7
8import (
9	"fmt"
10	"regexp"
11	"strings"
12
13	"github.com/ergochat/confusables"
14	"golang.org/x/text/cases"
15	"golang.org/x/text/secure/precis"
16	"golang.org/x/text/unicode/norm"
17	"golang.org/x/text/width"
18
19	"github.com/ergochat/ergo/irc/utils"
20)
21
22const (
23	precisUTF8MappingToken = "rfc8265"
24
25	// space can't be used
26	// , is used as a separator
27	// * is used in mask matching
28	// ? is used in mask matching
29	// . denotes a server name
30	// ! separates nickname from username
31	// @ separates username from hostname
32	// : means trailing
33	protocolBreakingNameCharacters = " ,*?.!@:"
34
35	// #1436: we discovered that these characters are problematic,
36	// so we're disallowing them in new nicks/account names, but allowing
37	// previously registered names
38	disfavoredNameCharacters = `<>'";#`
39)
40
41var (
42	// reviving the old ergonomadic nickname regex:
43	// in permissive mode, allow arbitrary letters, numbers, punctuation, and symbols
44	permissiveCharsRegex = regexp.MustCompile(`^[\pL\pN\pP\pS]*$`)
45)
46
47type Casemapping uint
48
49const (
50	// "precis" is the default / zero value:
51	// casefolding/validation: PRECIS + ircd restrictions (like no *)
52	// confusables detection: standard skeleton algorithm
53	CasemappingPRECIS Casemapping = iota
54	// "ascii" is the traditional ircd behavior:
55	// casefolding/validation: must be pure ASCII and follow ircd restrictions, ASCII lowercasing
56	// confusables detection: none
57	CasemappingASCII
58	// "permissive" is an insecure mode:
59	// casefolding/validation: arbitrary unicodes that follow ircd restrictions, unicode casefolding
60	// confusables detection: standard skeleton algorithm (which may be ineffective
61	// over the larger set of permitted identifiers)
62	CasemappingPermissive
63)
64
65// XXX this is a global variable without explicit synchronization.
66// it gets set during the initial Server.applyConfig and cannot be changed by rehash:
67// this happens-before all IRC connections and all casefolding operations.
68var globalCasemappingSetting Casemapping = CasemappingPRECIS
69
70// XXX analogous unsynchronized global variable controlling utf8 validation
71// if this is off, you get the traditional IRC behavior (relaying any valid RFC1459
72// octets) and invalid utf8 messages are silently dropped for websocket clients only.
73// if this is on, invalid utf8 inputs get a FAIL reply.
74var globalUtf8EnforcementSetting bool
75
76// Each pass of PRECIS casefolding is a composition of idempotent operations,
77// but not idempotent itself. Therefore, the spec says "do it four times and hope
78// it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option,
79// which provides this functionality, but unfortunately it's not exposed publicly.
80func iterateFolding(profile *precis.Profile, oldStr string) (str string, err error) {
81	str = oldStr
82	// follow the stabilizing rules laid out here:
83	// https://tools.ietf.org/html/draft-ietf-precis-7564bis-10.html#section-7
84	for i := 0; i < 4; i++ {
85		str, err = profile.CompareKey(str)
86		if err != nil {
87			return "", err
88		}
89		if oldStr == str {
90			break
91		}
92		oldStr = str
93	}
94	if oldStr != str {
95		return "", errCouldNotStabilize
96	}
97	return str, nil
98}
99
100// Casefold returns a casefolded string, without doing any name or channel character checks.
101func Casefold(str string) (string, error) {
102	return casefoldWithSetting(str, globalCasemappingSetting)
103}
104
105func casefoldWithSetting(str string, setting Casemapping) (string, error) {
106	switch setting {
107	default:
108		return iterateFolding(precis.UsernameCaseMapped, str)
109	case CasemappingASCII:
110		return foldASCII(str)
111	case CasemappingPermissive:
112		return foldPermissive(str)
113	}
114}
115
116// CasefoldChannel returns a casefolded version of a channel name.
117func CasefoldChannel(name string) (string, error) {
118	if len(name) == 0 {
119		return "", errStringIsEmpty
120	}
121
122	// don't casefold the preceding #'s
123	var start int
124	for start = 0; start < len(name) && name[start] == '#'; start += 1 {
125	}
126
127	if start == 0 {
128		// no preceding #'s
129		return "", errInvalidCharacter
130	}
131
132	lowered, err := Casefold(name[start:])
133	if err != nil {
134		return "", err
135	}
136
137	// space can't be used
138	// , is used as a separator
139	// * is used in mask matching
140	// ? is used in mask matching
141	if strings.ContainsAny(lowered, " ,*?") {
142		return "", errInvalidCharacter
143	}
144
145	return name[:start] + lowered, err
146}
147
148// CasefoldName returns a casefolded version of a nick/user name.
149func CasefoldName(name string) (string, error) {
150	lowered, err := Casefold(name)
151
152	if err != nil {
153		return "", err
154	} else if len(lowered) == 0 {
155		return "", errStringIsEmpty
156	}
157
158	// # is a channel prefix
159	// ~&@%+ are channel membership prefixes
160	// - I feel like disallowing
161	if strings.ContainsAny(lowered, protocolBreakingNameCharacters) || strings.ContainsAny(string(lowered[0]), "#~&@%+-") {
162		return "", errInvalidCharacter
163	}
164
165	return lowered, err
166}
167
168// returns true if the given name is a valid ident, using a mix of Insp and
169// Chary's ident restrictions.
170func isIdent(name string) bool {
171	if len(name) < 1 {
172		return false
173	}
174
175	for i := 0; i < len(name); i++ {
176		chr := name[i]
177		if (chr >= 'a' && chr <= 'z') || (chr >= 'A' && chr <= 'Z') || (chr >= '0' && chr <= '9') {
178			continue // alphanumerics
179		}
180		if i == 0 {
181			return false // first char must be alnum
182		}
183		switch chr {
184		case '[', '\\', ']', '^', '_', '{', '|', '}', '-', '.', '`':
185			continue // allowed chars
186		default:
187			return false // disallowed chars
188		}
189	}
190
191	return true
192}
193
194// Skeleton produces a canonicalized identifier that tries to catch
195// homoglyphic / confusable identifiers. It's a tweaked version of the TR39
196// skeleton algorithm. We apply the skeleton algorithm first and only then casefold,
197// because casefolding first would lose some information about visual confusability.
198// This has the weird consequence that the skeleton is not a function of the
199// casefolded identifier --- therefore it must always be computed
200// from the original (unfolded) identifier and stored/tracked separately from the
201// casefolded identifier.
202func Skeleton(name string) (string, error) {
203	switch globalCasemappingSetting {
204	default:
205		return realSkeleton(name)
206	case CasemappingASCII:
207		// identity function is fine because we independently case-normalize in Casefold
208		return name, nil
209	}
210}
211
212func realSkeleton(name string) (string, error) {
213	// XXX the confusables table includes some, but not all, fullwidth->standard
214	// mappings for latin characters. do a pass of explicit width folding,
215	// same as PRECIS:
216	name = width.Fold.String(name)
217
218	name = confusables.SkeletonTweaked(name)
219
220	// internationalized lowercasing for skeletons; this is much more lenient than
221	// Casefold. In particular, skeletons are expected to mix scripts (which may
222	// violate the bidi rule). We also don't care if they contain runes
223	// that are disallowed by PRECIS, because every identifier must independently
224	// pass PRECIS --- we are just further canonicalizing the skeleton.
225	return cases.Fold().String(name), nil
226}
227
228// maps a nickmask fragment to an expanded, casefolded wildcard:
229// Shivaram@good-fortune -> *!shivaram@good-fortune
230// EDMUND -> edmund!*@*
231func CanonicalizeMaskWildcard(userhost string) (expanded string, err error) {
232	userhost = strings.TrimSpace(userhost)
233	var nick, user, host string
234	bangIndex := strings.IndexByte(userhost, '!')
235	strudelIndex := strings.IndexByte(userhost, '@')
236
237	if bangIndex != -1 && bangIndex < strudelIndex {
238		nick = userhost[:bangIndex]
239		user = userhost[bangIndex+1 : strudelIndex]
240		host = userhost[strudelIndex+1:]
241	} else if bangIndex != -1 && strudelIndex == -1 {
242		nick = userhost[:bangIndex]
243		user = userhost[bangIndex+1:]
244	} else if bangIndex != -1 && strudelIndex < bangIndex {
245		// @ before !, fail
246		return "", errNicknameInvalid
247	} else if bangIndex == -1 && strudelIndex != -1 {
248		user = userhost[:strudelIndex]
249		host = userhost[strudelIndex+1:]
250	} else if bangIndex == -1 && strudelIndex == -1 {
251		nick = userhost
252	} else {
253		// shouldn't be possible
254		return "", errInvalidParams
255	}
256
257	if nick == "" {
258		nick = "*"
259	}
260	if nick != "*" {
261		// XXX wildcards are not accepted with most unicode nicks,
262		// because the * character breaks casefolding
263		nick, err = Casefold(nick)
264		if err != nil {
265			return "", err
266		}
267	}
268	if user == "" {
269		user = "*"
270	}
271	if user != "*" {
272		user = strings.ToLower(user)
273	}
274	if host == "" {
275		host = "*"
276	}
277	if host != "*" {
278		host = strings.ToLower(host)
279	}
280	expanded = fmt.Sprintf("%s!%s@%s", nick, user, host)
281	if utils.SafeErrorParam(expanded) != expanded {
282		err = errInvalidCharacter
283	}
284	return
285}
286
287func foldASCII(str string) (result string, err error) {
288	if !IsPrintableASCII(str) {
289		return "", errInvalidCharacter
290	}
291	return strings.ToLower(str), nil
292}
293
294func IsPrintableASCII(str string) bool {
295	for i := 0; i < len(str); i++ {
296		// allow space here because it's technically printable;
297		// it will be disallowed later by CasefoldName/CasefoldChannel
298		chr := str[i]
299		if chr < ' ' || chr > '~' {
300			return false
301		}
302	}
303	return true
304}
305
306func foldPermissive(str string) (result string, err error) {
307	if !permissiveCharsRegex.MatchString(str) {
308		return "", errInvalidCharacter
309	}
310	// YOLO
311	str = norm.NFD.String(str)
312	str = cases.Fold().String(str)
313	str = norm.NFD.String(str)
314	return str, nil
315}
316
317// Reduce, e.g., `alice!~u@host` to `alice`
318func NUHToNick(nuh string) (nick string) {
319	if idx := strings.IndexByte(nuh, '!'); idx != -1 {
320		return nuh[0:idx]
321	}
322	return nuh
323}
324