1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "builtin/intl/LanguageTag.h"
8 
9 #include "mozilla/intl/Locale.h"
10 #include "mozilla/Span.h"
11 
12 #include "builtin/intl/StringAsciiChars.h"
13 #include "gc/Tracer.h"
14 #include "js/CharacterEncoding.h"
15 #include "js/TracingAPI.h"
16 #include "vm/JSContext.h"
17 
18 namespace js {
19 namespace intl {
20 
ParseLocale(JSContext * cx,HandleLinearString str,mozilla::intl::Locale & result)21 [[nodiscard]] bool ParseLocale(JSContext* cx, HandleLinearString str,
22                                mozilla::intl::Locale& result) {
23   if (StringIsAscii(str)) {
24     intl::StringAsciiChars chars(str);
25     if (!chars.init(cx)) {
26       return false;
27     }
28 
29     if (mozilla::intl::LocaleParser::TryParse(chars, result).isOk()) {
30       return true;
31     }
32   }
33 
34   if (UniqueChars localeChars = QuoteString(cx, str, '"')) {
35     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
36                               JSMSG_INVALID_LANGUAGE_TAG, localeChars.get());
37   }
38   return false;
39 }
40 
ParseStandaloneLanguageTag(HandleLinearString str,mozilla::intl::LanguageSubtag & result)41 bool ParseStandaloneLanguageTag(HandleLinearString str,
42                                 mozilla::intl::LanguageSubtag& result) {
43   // Tell the analysis the |IsStructurallyValidLanguageTag| function can't GC.
44   JS::AutoSuppressGCAnalysis nogc;
45 
46   if (str->hasLatin1Chars()) {
47     if (!mozilla::intl::IsStructurallyValidLanguageTag<Latin1Char>(
48             str->latin1Range(nogc))) {
49       return false;
50     }
51     result.Set<Latin1Char>(str->latin1Range(nogc));
52   } else {
53     if (!mozilla::intl::IsStructurallyValidLanguageTag<char16_t>(
54             str->twoByteRange(nogc))) {
55       return false;
56     }
57     result.Set<char16_t>(str->twoByteRange(nogc));
58   }
59   return true;
60 }
61 
ParseStandaloneScriptTag(HandleLinearString str,mozilla::intl::ScriptSubtag & result)62 bool ParseStandaloneScriptTag(HandleLinearString str,
63                               mozilla::intl::ScriptSubtag& result) {
64   // Tell the analysis the |IsStructurallyValidScriptTag| function can't GC.
65   JS::AutoSuppressGCAnalysis nogc;
66 
67   if (str->hasLatin1Chars()) {
68     if (!mozilla::intl::IsStructurallyValidScriptTag<Latin1Char>(
69             str->latin1Range(nogc))) {
70       return false;
71     }
72     result.Set<Latin1Char>(str->latin1Range(nogc));
73   } else {
74     if (!mozilla::intl::IsStructurallyValidScriptTag<char16_t>(
75             str->twoByteRange(nogc))) {
76       return false;
77     }
78     result.Set<char16_t>(str->twoByteRange(nogc));
79   }
80   return true;
81 }
82 
ParseStandaloneRegionTag(HandleLinearString str,mozilla::intl::RegionSubtag & result)83 bool ParseStandaloneRegionTag(HandleLinearString str,
84                               mozilla::intl::RegionSubtag& result) {
85   // Tell the analysis the |IsStructurallyValidRegionTag| function can't GC.
86   JS::AutoSuppressGCAnalysis nogc;
87 
88   if (str->hasLatin1Chars()) {
89     if (!mozilla::intl::IsStructurallyValidRegionTag<Latin1Char>(
90             str->latin1Range(nogc))) {
91       return false;
92     }
93     result.Set<Latin1Char>(str->latin1Range(nogc));
94   } else {
95     if (!mozilla::intl::IsStructurallyValidRegionTag<char16_t>(
96             str->twoByteRange(nogc))) {
97       return false;
98     }
99     result.Set<char16_t>(str->twoByteRange(nogc));
100   }
101   return true;
102 }
103 
104 template <typename CharT>
IsAsciiLowercaseAlpha(mozilla::Span<const CharT> span)105 static bool IsAsciiLowercaseAlpha(mozilla::Span<const CharT> span) {
106   // Tell the analysis the |std::all_of| function can't GC.
107   JS::AutoSuppressGCAnalysis nogc;
108 
109   const CharT* ptr = span.data();
110   size_t length = span.size();
111   return std::all_of(ptr, ptr + length, mozilla::IsAsciiLowercaseAlpha<CharT>);
112 }
113 
IsAsciiLowercaseAlpha(JSLinearString * str)114 static bool IsAsciiLowercaseAlpha(JSLinearString* str) {
115   JS::AutoCheckCannotGC nogc;
116   if (str->hasLatin1Chars()) {
117     return IsAsciiLowercaseAlpha<Latin1Char>(str->latin1Range(nogc));
118   }
119   return IsAsciiLowercaseAlpha<char16_t>(str->twoByteRange(nogc));
120 }
121 
122 template <typename CharT>
IsAsciiAlpha(mozilla::Span<const CharT> span)123 static bool IsAsciiAlpha(mozilla::Span<const CharT> span) {
124   // Tell the analysis the |std::all_of| function can't GC.
125   JS::AutoSuppressGCAnalysis nogc;
126 
127   const CharT* ptr = span.data();
128   size_t length = span.size();
129   return std::all_of(ptr, ptr + length, mozilla::IsAsciiAlpha<CharT>);
130 }
131 
IsAsciiAlpha(JSLinearString * str)132 static bool IsAsciiAlpha(JSLinearString* str) {
133   JS::AutoCheckCannotGC nogc;
134   if (str->hasLatin1Chars()) {
135     return IsAsciiAlpha<Latin1Char>(str->latin1Range(nogc));
136   }
137   return IsAsciiAlpha<char16_t>(str->twoByteRange(nogc));
138 }
139 
ParseStandaloneISO639LanguageTag(JSContext * cx,HandleLinearString str)140 JS::Result<JSString*> ParseStandaloneISO639LanguageTag(JSContext* cx,
141                                                        HandleLinearString str) {
142   // ISO-639 language codes contain either two or three characters.
143   size_t length = str->length();
144   if (length != 2 && length != 3) {
145     return nullptr;
146   }
147 
148   // We can directly the return the input below if it's in the correct case.
149   bool isLowerCase = IsAsciiLowercaseAlpha(str);
150   if (!isLowerCase) {
151     // Must be an ASCII alpha string.
152     if (!IsAsciiAlpha(str)) {
153       return nullptr;
154     }
155   }
156 
157   mozilla::intl::LanguageSubtag languageTag;
158   if (str->hasLatin1Chars()) {
159     JS::AutoCheckCannotGC nogc;
160     languageTag.Set<Latin1Char>(str->latin1Range(nogc));
161   } else {
162     JS::AutoCheckCannotGC nogc;
163     languageTag.Set<char16_t>(str->twoByteRange(nogc));
164   }
165 
166   if (!isLowerCase) {
167     // The language subtag is canonicalized to lower case.
168     languageTag.ToLowerCase();
169   }
170 
171   // Reject the input if the canonical tag contains more than just a single
172   // language subtag.
173   if (mozilla::intl::Locale::ComplexLanguageMapping(languageTag)) {
174     return nullptr;
175   }
176 
177   // Take care to replace deprecated subtags with their preferred values.
178   JSString* result;
179   if (mozilla::intl::Locale::LanguageMapping(languageTag) || !isLowerCase) {
180     result = NewStringCopy<CanGC>(cx, languageTag.Span());
181   } else {
182     result = str;
183   }
184   if (!result) {
185     return cx->alreadyReportedOOM();
186   }
187   return result;
188 }
189 
trace(JSTracer * trc)190 void js::intl::UnicodeExtensionKeyword::trace(JSTracer* trc) {
191   TraceRoot(trc, &type_, "UnicodeExtensionKeyword::type");
192 }
193 
194 }  // namespace intl
195 }  // namespace js
196