1// Copyright (c) 2012-2014 Jeremy Latt 2// Copyright (c) 2014-2015 Edmund Huber 3// Copyright (c) 2016-2017 Daniel Oaks <daniel@danieloaks.net> 4// released under the MIT license 5 6package irc 7 8import ( 9 "fmt" 10 "regexp" 11 "strings" 12 13 "github.com/ergochat/confusables" 14 "golang.org/x/text/cases" 15 "golang.org/x/text/secure/precis" 16 "golang.org/x/text/unicode/norm" 17 "golang.org/x/text/width" 18 19 "github.com/ergochat/ergo/irc/utils" 20) 21 22const ( 23 precisUTF8MappingToken = "rfc8265" 24 25 // space can't be used 26 // , is used as a separator 27 // * is used in mask matching 28 // ? is used in mask matching 29 // . denotes a server name 30 // ! separates nickname from username 31 // @ separates username from hostname 32 // : means trailing 33 protocolBreakingNameCharacters = " ,*?.!@:" 34 35 // #1436: we discovered that these characters are problematic, 36 // so we're disallowing them in new nicks/account names, but allowing 37 // previously registered names 38 disfavoredNameCharacters = `<>'";#` 39) 40 41var ( 42 // reviving the old ergonomadic nickname regex: 43 // in permissive mode, allow arbitrary letters, numbers, punctuation, and symbols 44 permissiveCharsRegex = regexp.MustCompile(`^[\pL\pN\pP\pS]*$`) 45) 46 47type Casemapping uint 48 49const ( 50 // "precis" is the default / zero value: 51 // casefolding/validation: PRECIS + ircd restrictions (like no *) 52 // confusables detection: standard skeleton algorithm 53 CasemappingPRECIS Casemapping = iota 54 // "ascii" is the traditional ircd behavior: 55 // casefolding/validation: must be pure ASCII and follow ircd restrictions, ASCII lowercasing 56 // confusables detection: none 57 CasemappingASCII 58 // "permissive" is an insecure mode: 59 // casefolding/validation: arbitrary unicodes that follow ircd restrictions, unicode casefolding 60 // confusables detection: standard skeleton algorithm (which may be ineffective 61 // over the larger set of permitted identifiers) 62 CasemappingPermissive 63) 64 65// XXX this is a global variable without explicit synchronization. 66// it gets set during the initial Server.applyConfig and cannot be changed by rehash: 67// this happens-before all IRC connections and all casefolding operations. 68var globalCasemappingSetting Casemapping = CasemappingPRECIS 69 70// XXX analogous unsynchronized global variable controlling utf8 validation 71// if this is off, you get the traditional IRC behavior (relaying any valid RFC1459 72// octets) and invalid utf8 messages are silently dropped for websocket clients only. 73// if this is on, invalid utf8 inputs get a FAIL reply. 74var globalUtf8EnforcementSetting bool 75 76// Each pass of PRECIS casefolding is a composition of idempotent operations, 77// but not idempotent itself. Therefore, the spec says "do it four times and hope 78// it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option, 79// which provides this functionality, but unfortunately it's not exposed publicly. 80func iterateFolding(profile *precis.Profile, oldStr string) (str string, err error) { 81 str = oldStr 82 // follow the stabilizing rules laid out here: 83 // https://tools.ietf.org/html/draft-ietf-precis-7564bis-10.html#section-7 84 for i := 0; i < 4; i++ { 85 str, err = profile.CompareKey(str) 86 if err != nil { 87 return "", err 88 } 89 if oldStr == str { 90 break 91 } 92 oldStr = str 93 } 94 if oldStr != str { 95 return "", errCouldNotStabilize 96 } 97 return str, nil 98} 99 100// Casefold returns a casefolded string, without doing any name or channel character checks. 101func Casefold(str string) (string, error) { 102 return casefoldWithSetting(str, globalCasemappingSetting) 103} 104 105func casefoldWithSetting(str string, setting Casemapping) (string, error) { 106 switch setting { 107 default: 108 return iterateFolding(precis.UsernameCaseMapped, str) 109 case CasemappingASCII: 110 return foldASCII(str) 111 case CasemappingPermissive: 112 return foldPermissive(str) 113 } 114} 115 116// CasefoldChannel returns a casefolded version of a channel name. 117func CasefoldChannel(name string) (string, error) { 118 if len(name) == 0 { 119 return "", errStringIsEmpty 120 } 121 122 // don't casefold the preceding #'s 123 var start int 124 for start = 0; start < len(name) && name[start] == '#'; start += 1 { 125 } 126 127 if start == 0 { 128 // no preceding #'s 129 return "", errInvalidCharacter 130 } 131 132 lowered, err := Casefold(name[start:]) 133 if err != nil { 134 return "", err 135 } 136 137 // space can't be used 138 // , is used as a separator 139 // * is used in mask matching 140 // ? is used in mask matching 141 if strings.ContainsAny(lowered, " ,*?") { 142 return "", errInvalidCharacter 143 } 144 145 return name[:start] + lowered, err 146} 147 148// CasefoldName returns a casefolded version of a nick/user name. 149func CasefoldName(name string) (string, error) { 150 lowered, err := Casefold(name) 151 152 if err != nil { 153 return "", err 154 } else if len(lowered) == 0 { 155 return "", errStringIsEmpty 156 } 157 158 // # is a channel prefix 159 // ~&@%+ are channel membership prefixes 160 // - I feel like disallowing 161 if strings.ContainsAny(lowered, protocolBreakingNameCharacters) || strings.ContainsAny(string(lowered[0]), "#~&@%+-") { 162 return "", errInvalidCharacter 163 } 164 165 return lowered, err 166} 167 168// returns true if the given name is a valid ident, using a mix of Insp and 169// Chary's ident restrictions. 170func isIdent(name string) bool { 171 if len(name) < 1 { 172 return false 173 } 174 175 for i := 0; i < len(name); i++ { 176 chr := name[i] 177 if (chr >= 'a' && chr <= 'z') || (chr >= 'A' && chr <= 'Z') || (chr >= '0' && chr <= '9') { 178 continue // alphanumerics 179 } 180 if i == 0 { 181 return false // first char must be alnum 182 } 183 switch chr { 184 case '[', '\\', ']', '^', '_', '{', '|', '}', '-', '.', '`': 185 continue // allowed chars 186 default: 187 return false // disallowed chars 188 } 189 } 190 191 return true 192} 193 194// Skeleton produces a canonicalized identifier that tries to catch 195// homoglyphic / confusable identifiers. It's a tweaked version of the TR39 196// skeleton algorithm. We apply the skeleton algorithm first and only then casefold, 197// because casefolding first would lose some information about visual confusability. 198// This has the weird consequence that the skeleton is not a function of the 199// casefolded identifier --- therefore it must always be computed 200// from the original (unfolded) identifier and stored/tracked separately from the 201// casefolded identifier. 202func Skeleton(name string) (string, error) { 203 switch globalCasemappingSetting { 204 default: 205 return realSkeleton(name) 206 case CasemappingASCII: 207 // identity function is fine because we independently case-normalize in Casefold 208 return name, nil 209 } 210} 211 212func realSkeleton(name string) (string, error) { 213 // XXX the confusables table includes some, but not all, fullwidth->standard 214 // mappings for latin characters. do a pass of explicit width folding, 215 // same as PRECIS: 216 name = width.Fold.String(name) 217 218 name = confusables.SkeletonTweaked(name) 219 220 // internationalized lowercasing for skeletons; this is much more lenient than 221 // Casefold. In particular, skeletons are expected to mix scripts (which may 222 // violate the bidi rule). We also don't care if they contain runes 223 // that are disallowed by PRECIS, because every identifier must independently 224 // pass PRECIS --- we are just further canonicalizing the skeleton. 225 return cases.Fold().String(name), nil 226} 227 228// maps a nickmask fragment to an expanded, casefolded wildcard: 229// Shivaram@good-fortune -> *!shivaram@good-fortune 230// EDMUND -> edmund!*@* 231func CanonicalizeMaskWildcard(userhost string) (expanded string, err error) { 232 userhost = strings.TrimSpace(userhost) 233 var nick, user, host string 234 bangIndex := strings.IndexByte(userhost, '!') 235 strudelIndex := strings.IndexByte(userhost, '@') 236 237 if bangIndex != -1 && bangIndex < strudelIndex { 238 nick = userhost[:bangIndex] 239 user = userhost[bangIndex+1 : strudelIndex] 240 host = userhost[strudelIndex+1:] 241 } else if bangIndex != -1 && strudelIndex == -1 { 242 nick = userhost[:bangIndex] 243 user = userhost[bangIndex+1:] 244 } else if bangIndex != -1 && strudelIndex < bangIndex { 245 // @ before !, fail 246 return "", errNicknameInvalid 247 } else if bangIndex == -1 && strudelIndex != -1 { 248 user = userhost[:strudelIndex] 249 host = userhost[strudelIndex+1:] 250 } else if bangIndex == -1 && strudelIndex == -1 { 251 nick = userhost 252 } else { 253 // shouldn't be possible 254 return "", errInvalidParams 255 } 256 257 if nick == "" { 258 nick = "*" 259 } 260 if nick != "*" { 261 // XXX wildcards are not accepted with most unicode nicks, 262 // because the * character breaks casefolding 263 nick, err = Casefold(nick) 264 if err != nil { 265 return "", err 266 } 267 } 268 if user == "" { 269 user = "*" 270 } 271 if user != "*" { 272 user = strings.ToLower(user) 273 } 274 if host == "" { 275 host = "*" 276 } 277 if host != "*" { 278 host = strings.ToLower(host) 279 } 280 expanded = fmt.Sprintf("%s!%s@%s", nick, user, host) 281 if utils.SafeErrorParam(expanded) != expanded { 282 err = errInvalidCharacter 283 } 284 return 285} 286 287func foldASCII(str string) (result string, err error) { 288 if !IsPrintableASCII(str) { 289 return "", errInvalidCharacter 290 } 291 return strings.ToLower(str), nil 292} 293 294func IsPrintableASCII(str string) bool { 295 for i := 0; i < len(str); i++ { 296 // allow space here because it's technically printable; 297 // it will be disallowed later by CasefoldName/CasefoldChannel 298 chr := str[i] 299 if chr < ' ' || chr > '~' { 300 return false 301 } 302 } 303 return true 304} 305 306func foldPermissive(str string) (result string, err error) { 307 if !permissiveCharsRegex.MatchString(str) { 308 return "", errInvalidCharacter 309 } 310 // YOLO 311 str = norm.NFD.String(str) 312 str = cases.Fold().String(str) 313 str = norm.NFD.String(str) 314 return str, nil 315} 316 317// Reduce, e.g., `alice!~u@host` to `alice` 318func NUHToNick(nuh string) (nick string) { 319 if idx := strings.IndexByte(nuh, '!'); idx != -1 { 320 return nuh[0:idx] 321 } 322 return nuh 323} 324