1 /*
2  * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 /*
25  * @test
26  * @bug 8204938 8242010
27  * @summary Checks the IANA language subtag registry data update
28  *          with Locale.LanguageRange parse method.
29  * @run main LSRDataTest
30  */
31 import java.io.IOException;
32 import java.nio.charset.Charset;
33 import java.nio.file.Files;
34 import java.nio.file.Paths;
35 import java.nio.file.Path;
36 import java.util.ArrayList;
37 import java.util.HashMap;
38 import java.util.List;
39 import java.util.Map;
40 import java.util.Locale;
41 import java.util.Locale.LanguageRange;
42 import java.util.stream.Collectors;
43 import java.util.stream.Stream;
44 
45 import static java.util.Locale.LanguageRange.MAX_WEIGHT;
46 import static java.util.Locale.LanguageRange.MIN_WEIGHT;
47 
48 public class LSRDataTest {
49 
50     private static final char HYPHEN = '-';
51     private static final Map<String, String> singleLangEquivMap = new HashMap<>();
52     private static final Map<String, List<String>> multiLangEquivsMap = new HashMap<>();
53     private static final Map<String, String> regionVariantEquivMap = new HashMap<>();
54 
55     // path to the lsr file from the make folder, this test relies on the
56     // relative path to the file in the make folder, considering
57     // test and make will always exist in the same jdk layout
58     private static final String LSR_FILE_PATH = System.getProperty("test.src", ".")
59                 + "/../../../../../make/data/lsrdata/language-subtag-registry.txt";
60 
main(String[] args)61     public static void main(String[] args) throws IOException {
62 
63         loadLSRData(Paths.get(LSR_FILE_PATH).toRealPath());
64 
65         // checking the tags with weight
66         String ranges = "Accept-Language: aam, adp, aue, bcg, cqu, ema,"
67                 + " en-gb-oed, gti, koj, kwq, kxe, lii, lmm, mtm, ngv,"
68                 + " oyb, phr, pub, suj, taj;q=0.9, yug;q=0.5, gfx;q=0.4";
69         List<LanguageRange> expected = parse(ranges);
70         List<LanguageRange> actual = LanguageRange.parse(ranges);
71         checkEquality(actual, expected);
72 
73         // checking all language ranges
74         ranges = generateLangRanges();
75         expected = parse(ranges);
76         actual = LanguageRange.parse(ranges);
77         checkEquality(actual, expected);
78 
79         // checking all region/variant ranges
80         ranges = generateRegionRanges();
81         expected = parse(ranges);
82         actual = LanguageRange.parse(ranges);
83         checkEquality(actual, expected);
84 
85     }
86 
87     // generate range string containing all equiv language tags
generateLangRanges()88     private static String generateLangRanges() {
89         return Stream.concat(singleLangEquivMap.keySet().stream(), multiLangEquivsMap
90                 .keySet().stream()).collect(Collectors.joining(","));
91     }
92 
93     // generate range string containing all equiv region tags
generateRegionRanges()94     private static String generateRegionRanges() {
95         return regionVariantEquivMap.keySet().stream()
96                 .map(r -> "en".concat(r)).collect(Collectors.joining(", "));
97     }
98 
99     // load LSR data from the file
loadLSRData(Path path)100     private static void loadLSRData(Path path) throws IOException {
101         String type = null;
102         String tag = null;
103         String preferred = null;
104         String prefix = null;
105 
106         for (String line : Files.readAllLines(path, Charset.forName("UTF-8"))) {
107             line = line.toLowerCase(Locale.ROOT);
108             int index = line.indexOf(' ') + 1;
109             if (line.startsWith("type:")) {
110                 type = line.substring(index);
111             } else if (line.startsWith("tag:") || line.startsWith("subtag:")) {
112                 tag = line.substring(index);
113             } else if (line.startsWith("preferred-value:")) {
114                 preferred = line.substring(index);
115             } else if (line.startsWith("prefix:")) {
116                 prefix = line.substring(index);
117             } else if (line.equals("%%")) {
118                 processDataAndGenerateMaps(type, tag, preferred, prefix);
119                 type = null;
120                 tag = null;
121                 preferred = null;
122                 prefix = null;
123             }
124         }
125 
126         // Last entry
127         processDataAndGenerateMaps(type, tag, preferred, prefix);
128     }
129 
processDataAndGenerateMaps(String type, String tag, String preferred, String prefix)130     private static void processDataAndGenerateMaps(String type,
131             String tag,
132             String preferred,
133             String prefix) {
134 
135         if (type == null || tag == null || preferred == null) {
136             return;
137         }
138 
139         if (type.equals("extlang") && prefix != null) {
140             tag = prefix + "-" + tag;
141         }
142 
143         if (type.equals("region") || type.equals("variant")) {
144             if (!regionVariantEquivMap.containsKey(preferred)) {
145                 String tPref = HYPHEN + preferred;
146                 String tTag = HYPHEN + tag;
147                 regionVariantEquivMap.put(tPref, tTag);
148                 regionVariantEquivMap.put(tTag, tPref);
149             } else {
150                 throw new RuntimeException("New case, need implementation."
151                         + " A region/variant subtag \"" + preferred
152                         + "\" is registered for more than one subtags.");
153             }
154         } else { // language, extlang, grandfathered, and redundant
155             if (!singleLangEquivMap.containsKey(preferred)
156                     && !multiLangEquivsMap.containsKey(preferred)) {
157                 // new entry add it into single equiv map
158                 singleLangEquivMap.put(preferred, tag);
159                 singleLangEquivMap.put(tag, preferred);
160             } else if (singleLangEquivMap.containsKey(preferred)
161                     && !multiLangEquivsMap.containsKey(preferred)) {
162                 String value = singleLangEquivMap.get(preferred);
163                 List<String> subtags = List.of(preferred, value, tag);
164                 // remove from single eqiv map before adding to multi equiv
165                 singleLangEquivMap.keySet().removeAll(subtags);
166                 addEntriesToMultiEquivsMap(subtags);
167             } else if (multiLangEquivsMap.containsKey(preferred)
168                     && !singleLangEquivMap.containsKey(preferred)) {
169                 List<String> subtags = multiLangEquivsMap.get(preferred);
170                 // should use the order preferred, subtags, tag to keep the
171                 // expected order same as the JDK API in multi equivalent maps
172                 subtags.add(0, preferred);
173                 subtags.add(tag);
174                 addEntriesToMultiEquivsMap(subtags);
175             }
176         }
177     }
178 
179     // Add entries into the multi equivalent map from the given subtags
addEntriesToMultiEquivsMap(List<String> subtags)180     private static void addEntriesToMultiEquivsMap(List<String> subtags) {
181         // for each subtag within the given subtags, add an entry in multi
182         // equivalent language map with subtag as the key and the value
183         // as the list of all subtags excluding the one which is getting
184         // traversed
185         subtags.forEach(subtag -> multiLangEquivsMap.put(subtag, subtags.stream()
186                 .filter(t -> !t.equals(subtag))
187                 .collect(Collectors.toList())));
188     }
189 
parse(String ranges)190     private static List<LanguageRange> parse(String ranges) {
191         ranges = ranges.replace(" ", "").toLowerCase(Locale.ROOT);
192         if (ranges.startsWith("accept-language:")) {
193             ranges = ranges.substring(16);
194         }
195         String[] langRanges = ranges.split(",");
196         List<LanguageRange> priorityList = new ArrayList<>(langRanges.length);
197         int numOfRanges = 0;
198         for (String range : langRanges) {
199             int wIndex = range.indexOf(";q=");
200             String tag;
201             double weight = 0.0;
202             if (wIndex == -1) {
203                 tag = range;
204                 weight = MAX_WEIGHT;
205             } else {
206                 tag = range.substring(0, wIndex);
207                 try {
208                     weight = Double.parseDouble(range.substring(wIndex + 3));
209                 } catch (RuntimeException ex) {
210                     throw new IllegalArgumentException("weight= " + weight + " for"
211                             + " language range \"" + tag + "\", should be"
212                             + " represented as a double");
213                 }
214 
215                 if (weight < MIN_WEIGHT || weight > MAX_WEIGHT) {
216                     throw new IllegalArgumentException("weight=" + weight
217                             + " for language range \"" + tag
218                             + "\", must be between " + MIN_WEIGHT
219                             + " and " + MAX_WEIGHT + ".");
220                 }
221             }
222 
223             LanguageRange entry = new LanguageRange(tag, weight);
224             if (!priorityList.contains(entry)) {
225 
226                 int index = numOfRanges;
227                 // find the index in the list to add the current range at the
228                 // correct index sorted by the descending order of weight
229                 for (int i = 0; i < priorityList.size(); i++) {
230                     if (priorityList.get(i).getWeight() < weight) {
231                         index = i;
232                         break;
233                     }
234                 }
235                 priorityList.add(index, entry);
236                 numOfRanges++;
237 
238                 String equivalent = getEquivalentForRegionAndVariant(tag);
239                 if (equivalent != null) {
240                     LanguageRange equivRange = new LanguageRange(equivalent, weight);
241                     if (!priorityList.contains(equivRange)) {
242                         priorityList.add(index + 1, equivRange);
243                         numOfRanges++;
244                     }
245                 }
246 
247                 List<String> equivalents = getEquivalentsForLanguage(tag);
248                 if (equivalents != null) {
249                     for (String equiv : equivalents) {
250                         LanguageRange equivRange = new LanguageRange(equiv, weight);
251                         if (!priorityList.contains(equivRange)) {
252                             priorityList.add(index + 1, equivRange);
253                             numOfRanges++;
254                         }
255 
256                         equivalent = getEquivalentForRegionAndVariant(equiv);
257                         if (equivalent != null) {
258                             equivRange = new LanguageRange(equivalent, weight);
259                             if (!priorityList.contains(equivRange)) {
260                                 priorityList.add(index + 1, equivRange);
261                                 numOfRanges++;
262                             }
263                         }
264                     }
265                 }
266             }
267         }
268         return priorityList;
269     }
270 
271     /**
272      * A faster alternative approach to String.replaceFirst(), if the given
273      * string is a literal String, not a regex.
274      */
replaceFirstSubStringMatch(String range, String substr, String replacement)275     private static String replaceFirstSubStringMatch(String range,
276             String substr, String replacement) {
277         int pos = range.indexOf(substr);
278         if (pos == -1) {
279             return range;
280         } else {
281             return range.substring(0, pos) + replacement
282                     + range.substring(pos + substr.length());
283         }
284     }
285 
getEquivalentsForLanguage(String range)286     private static List<String> getEquivalentsForLanguage(String range) {
287         String r = range;
288 
289         while (r.length() > 0) {
290             if (singleLangEquivMap.containsKey(r)) {
291                 String equiv = singleLangEquivMap.get(r);
292                 // Return immediately for performance if the first matching
293                 // subtag is found.
294                 return List.of(replaceFirstSubStringMatch(range, r, equiv));
295             } else if (multiLangEquivsMap.containsKey(r)) {
296                 List<String> equivs = multiLangEquivsMap.get(r);
297                 List<String> result = new ArrayList(equivs.size());
298                 for (int i = 0; i < equivs.size(); i++) {
299                     result.add(i, replaceFirstSubStringMatch(range,
300                             r, equivs.get(i)));
301                 }
302                 return result;
303             }
304 
305             // Truncate the last subtag simply.
306             int index = r.lastIndexOf(HYPHEN);
307             if (index == -1) {
308                 break;
309             }
310             r = r.substring(0, index);
311         }
312 
313         return null;
314     }
315 
getEquivalentForRegionAndVariant(String range)316     private static String getEquivalentForRegionAndVariant(String range) {
317         int extensionKeyIndex = getExtentionKeyIndex(range);
318 
319         for (String subtag : regionVariantEquivMap.keySet()) {
320             int index;
321             if ((index = range.indexOf(subtag)) != -1) {
322                 // Check if the matching text is a valid region or variant.
323                 if (extensionKeyIndex != Integer.MIN_VALUE
324                         && index > extensionKeyIndex) {
325                     continue;
326                 }
327 
328                 int len = index + subtag.length();
329                 if (range.length() == len || range.charAt(len) == HYPHEN) {
330                     return replaceFirstSubStringMatch(range, subtag,
331                             regionVariantEquivMap.get(subtag));
332                 }
333             }
334         }
335 
336         return null;
337     }
338 
getExtentionKeyIndex(String s)339     private static int getExtentionKeyIndex(String s) {
340         char[] c = s.toCharArray();
341         int index = Integer.MIN_VALUE;
342         for (int i = 1; i < c.length; i++) {
343             if (c[i] == HYPHEN) {
344                 if (i - index == 2) {
345                     return index;
346                 } else {
347                     index = i;
348                 }
349             }
350         }
351         return Integer.MIN_VALUE;
352     }
353 
checkEquality(List<LanguageRange> expected, List<LanguageRange> actual)354     private static void checkEquality(List<LanguageRange> expected,
355             List<LanguageRange> actual) {
356 
357         int expectedSize = expected.size();
358         int actualSize = actual.size();
359 
360         if (expectedSize != actualSize) {
361             throw new RuntimeException("[FAILED: Size of the priority list"
362                     + " does not match, Expected size=" + expectedSize + "]");
363         } else {
364             for (int i = 0; i < expectedSize; i++) {
365                 LanguageRange lr1 = expected.get(i);
366                 LanguageRange lr2 = actual.get(i);
367 
368                 if (!lr1.getRange().equals(lr2.getRange())
369                         || lr1.getWeight() != lr2.getWeight()) {
370                     throw new RuntimeException("[FAILED: Ranges at index "
371                             + i + " do not match Expected: range=" + lr1.getRange()
372                             + ", weight=" + lr1.getWeight() + ", Actual: range="
373                             + lr2.getRange() + ", weight=" + lr2.getWeight() + "]");
374                 }
375             }
376         }
377     }
378 }
379