1 /* 2 * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /* 25 * @test 26 * @bug 8204938 8242010 27 * @summary Checks the IANA language subtag registry data update 28 * with Locale.LanguageRange parse method. 29 * @run main LSRDataTest 30 */ 31 import java.io.IOException; 32 import java.nio.charset.Charset; 33 import java.nio.file.Files; 34 import java.nio.file.Paths; 35 import java.nio.file.Path; 36 import java.util.ArrayList; 37 import java.util.HashMap; 38 import java.util.List; 39 import java.util.Map; 40 import java.util.Locale; 41 import java.util.Locale.LanguageRange; 42 import java.util.stream.Collectors; 43 import java.util.stream.Stream; 44 45 import static java.util.Locale.LanguageRange.MAX_WEIGHT; 46 import static java.util.Locale.LanguageRange.MIN_WEIGHT; 47 48 public class LSRDataTest { 49 50 private static final char HYPHEN = '-'; 51 private static final Map<String, String> singleLangEquivMap = new HashMap<>(); 52 private static final Map<String, List<String>> multiLangEquivsMap = new HashMap<>(); 53 private static final Map<String, String> regionVariantEquivMap = new HashMap<>(); 54 55 // path to the lsr file from the make folder, this test relies on the 56 // relative path to the file in the make folder, considering 57 // test and make will always exist in the same jdk layout 58 private static final String LSR_FILE_PATH = System.getProperty("test.src", ".") 59 + "/../../../../../make/data/lsrdata/language-subtag-registry.txt"; 60 main(String[] args)61 public static void main(String[] args) throws IOException { 62 63 loadLSRData(Paths.get(LSR_FILE_PATH).toRealPath()); 64 65 // checking the tags with weight 66 String ranges = "Accept-Language: aam, adp, aue, bcg, cqu, ema," 67 + " en-gb-oed, gti, koj, kwq, kxe, lii, lmm, mtm, ngv," 68 + " oyb, phr, pub, suj, taj;q=0.9, yug;q=0.5, gfx;q=0.4"; 69 List<LanguageRange> expected = parse(ranges); 70 List<LanguageRange> actual = LanguageRange.parse(ranges); 71 checkEquality(actual, expected); 72 73 // checking all language ranges 74 ranges = generateLangRanges(); 75 expected = parse(ranges); 76 actual = LanguageRange.parse(ranges); 77 checkEquality(actual, expected); 78 79 // checking all region/variant ranges 80 ranges = generateRegionRanges(); 81 expected = parse(ranges); 82 actual = LanguageRange.parse(ranges); 83 checkEquality(actual, expected); 84 85 } 86 87 // generate range string containing all equiv language tags generateLangRanges()88 private static String generateLangRanges() { 89 return Stream.concat(singleLangEquivMap.keySet().stream(), multiLangEquivsMap 90 .keySet().stream()).collect(Collectors.joining(",")); 91 } 92 93 // generate range string containing all equiv region tags generateRegionRanges()94 private static String generateRegionRanges() { 95 return regionVariantEquivMap.keySet().stream() 96 .map(r -> "en".concat(r)).collect(Collectors.joining(", ")); 97 } 98 99 // load LSR data from the file loadLSRData(Path path)100 private static void loadLSRData(Path path) throws IOException { 101 String type = null; 102 String tag = null; 103 String preferred = null; 104 String prefix = null; 105 106 for (String line : Files.readAllLines(path, Charset.forName("UTF-8"))) { 107 line = line.toLowerCase(Locale.ROOT); 108 int index = line.indexOf(' ') + 1; 109 if (line.startsWith("type:")) { 110 type = line.substring(index); 111 } else if (line.startsWith("tag:") || line.startsWith("subtag:")) { 112 tag = line.substring(index); 113 } else if (line.startsWith("preferred-value:")) { 114 preferred = line.substring(index); 115 } else if (line.startsWith("prefix:")) { 116 prefix = line.substring(index); 117 } else if (line.equals("%%")) { 118 processDataAndGenerateMaps(type, tag, preferred, prefix); 119 type = null; 120 tag = null; 121 preferred = null; 122 prefix = null; 123 } 124 } 125 126 // Last entry 127 processDataAndGenerateMaps(type, tag, preferred, prefix); 128 } 129 processDataAndGenerateMaps(String type, String tag, String preferred, String prefix)130 private static void processDataAndGenerateMaps(String type, 131 String tag, 132 String preferred, 133 String prefix) { 134 135 if (type == null || tag == null || preferred == null) { 136 return; 137 } 138 139 if (type.equals("extlang") && prefix != null) { 140 tag = prefix + "-" + tag; 141 } 142 143 if (type.equals("region") || type.equals("variant")) { 144 if (!regionVariantEquivMap.containsKey(preferred)) { 145 String tPref = HYPHEN + preferred; 146 String tTag = HYPHEN + tag; 147 regionVariantEquivMap.put(tPref, tTag); 148 regionVariantEquivMap.put(tTag, tPref); 149 } else { 150 throw new RuntimeException("New case, need implementation." 151 + " A region/variant subtag \"" + preferred 152 + "\" is registered for more than one subtags."); 153 } 154 } else { // language, extlang, grandfathered, and redundant 155 if (!singleLangEquivMap.containsKey(preferred) 156 && !multiLangEquivsMap.containsKey(preferred)) { 157 // new entry add it into single equiv map 158 singleLangEquivMap.put(preferred, tag); 159 singleLangEquivMap.put(tag, preferred); 160 } else if (singleLangEquivMap.containsKey(preferred) 161 && !multiLangEquivsMap.containsKey(preferred)) { 162 String value = singleLangEquivMap.get(preferred); 163 List<String> subtags = List.of(preferred, value, tag); 164 // remove from single eqiv map before adding to multi equiv 165 singleLangEquivMap.keySet().removeAll(subtags); 166 addEntriesToMultiEquivsMap(subtags); 167 } else if (multiLangEquivsMap.containsKey(preferred) 168 && !singleLangEquivMap.containsKey(preferred)) { 169 List<String> subtags = multiLangEquivsMap.get(preferred); 170 // should use the order preferred, subtags, tag to keep the 171 // expected order same as the JDK API in multi equivalent maps 172 subtags.add(0, preferred); 173 subtags.add(tag); 174 addEntriesToMultiEquivsMap(subtags); 175 } 176 } 177 } 178 179 // Add entries into the multi equivalent map from the given subtags addEntriesToMultiEquivsMap(List<String> subtags)180 private static void addEntriesToMultiEquivsMap(List<String> subtags) { 181 // for each subtag within the given subtags, add an entry in multi 182 // equivalent language map with subtag as the key and the value 183 // as the list of all subtags excluding the one which is getting 184 // traversed 185 subtags.forEach(subtag -> multiLangEquivsMap.put(subtag, subtags.stream() 186 .filter(t -> !t.equals(subtag)) 187 .collect(Collectors.toList()))); 188 } 189 parse(String ranges)190 private static List<LanguageRange> parse(String ranges) { 191 ranges = ranges.replace(" ", "").toLowerCase(Locale.ROOT); 192 if (ranges.startsWith("accept-language:")) { 193 ranges = ranges.substring(16); 194 } 195 String[] langRanges = ranges.split(","); 196 List<LanguageRange> priorityList = new ArrayList<>(langRanges.length); 197 int numOfRanges = 0; 198 for (String range : langRanges) { 199 int wIndex = range.indexOf(";q="); 200 String tag; 201 double weight = 0.0; 202 if (wIndex == -1) { 203 tag = range; 204 weight = MAX_WEIGHT; 205 } else { 206 tag = range.substring(0, wIndex); 207 try { 208 weight = Double.parseDouble(range.substring(wIndex + 3)); 209 } catch (RuntimeException ex) { 210 throw new IllegalArgumentException("weight= " + weight + " for" 211 + " language range \"" + tag + "\", should be" 212 + " represented as a double"); 213 } 214 215 if (weight < MIN_WEIGHT || weight > MAX_WEIGHT) { 216 throw new IllegalArgumentException("weight=" + weight 217 + " for language range \"" + tag 218 + "\", must be between " + MIN_WEIGHT 219 + " and " + MAX_WEIGHT + "."); 220 } 221 } 222 223 LanguageRange entry = new LanguageRange(tag, weight); 224 if (!priorityList.contains(entry)) { 225 226 int index = numOfRanges; 227 // find the index in the list to add the current range at the 228 // correct index sorted by the descending order of weight 229 for (int i = 0; i < priorityList.size(); i++) { 230 if (priorityList.get(i).getWeight() < weight) { 231 index = i; 232 break; 233 } 234 } 235 priorityList.add(index, entry); 236 numOfRanges++; 237 238 String equivalent = getEquivalentForRegionAndVariant(tag); 239 if (equivalent != null) { 240 LanguageRange equivRange = new LanguageRange(equivalent, weight); 241 if (!priorityList.contains(equivRange)) { 242 priorityList.add(index + 1, equivRange); 243 numOfRanges++; 244 } 245 } 246 247 List<String> equivalents = getEquivalentsForLanguage(tag); 248 if (equivalents != null) { 249 for (String equiv : equivalents) { 250 LanguageRange equivRange = new LanguageRange(equiv, weight); 251 if (!priorityList.contains(equivRange)) { 252 priorityList.add(index + 1, equivRange); 253 numOfRanges++; 254 } 255 256 equivalent = getEquivalentForRegionAndVariant(equiv); 257 if (equivalent != null) { 258 equivRange = new LanguageRange(equivalent, weight); 259 if (!priorityList.contains(equivRange)) { 260 priorityList.add(index + 1, equivRange); 261 numOfRanges++; 262 } 263 } 264 } 265 } 266 } 267 } 268 return priorityList; 269 } 270 271 /** 272 * A faster alternative approach to String.replaceFirst(), if the given 273 * string is a literal String, not a regex. 274 */ replaceFirstSubStringMatch(String range, String substr, String replacement)275 private static String replaceFirstSubStringMatch(String range, 276 String substr, String replacement) { 277 int pos = range.indexOf(substr); 278 if (pos == -1) { 279 return range; 280 } else { 281 return range.substring(0, pos) + replacement 282 + range.substring(pos + substr.length()); 283 } 284 } 285 getEquivalentsForLanguage(String range)286 private static List<String> getEquivalentsForLanguage(String range) { 287 String r = range; 288 289 while (r.length() > 0) { 290 if (singleLangEquivMap.containsKey(r)) { 291 String equiv = singleLangEquivMap.get(r); 292 // Return immediately for performance if the first matching 293 // subtag is found. 294 return List.of(replaceFirstSubStringMatch(range, r, equiv)); 295 } else if (multiLangEquivsMap.containsKey(r)) { 296 List<String> equivs = multiLangEquivsMap.get(r); 297 List<String> result = new ArrayList(equivs.size()); 298 for (int i = 0; i < equivs.size(); i++) { 299 result.add(i, replaceFirstSubStringMatch(range, 300 r, equivs.get(i))); 301 } 302 return result; 303 } 304 305 // Truncate the last subtag simply. 306 int index = r.lastIndexOf(HYPHEN); 307 if (index == -1) { 308 break; 309 } 310 r = r.substring(0, index); 311 } 312 313 return null; 314 } 315 getEquivalentForRegionAndVariant(String range)316 private static String getEquivalentForRegionAndVariant(String range) { 317 int extensionKeyIndex = getExtentionKeyIndex(range); 318 319 for (String subtag : regionVariantEquivMap.keySet()) { 320 int index; 321 if ((index = range.indexOf(subtag)) != -1) { 322 // Check if the matching text is a valid region or variant. 323 if (extensionKeyIndex != Integer.MIN_VALUE 324 && index > extensionKeyIndex) { 325 continue; 326 } 327 328 int len = index + subtag.length(); 329 if (range.length() == len || range.charAt(len) == HYPHEN) { 330 return replaceFirstSubStringMatch(range, subtag, 331 regionVariantEquivMap.get(subtag)); 332 } 333 } 334 } 335 336 return null; 337 } 338 getExtentionKeyIndex(String s)339 private static int getExtentionKeyIndex(String s) { 340 char[] c = s.toCharArray(); 341 int index = Integer.MIN_VALUE; 342 for (int i = 1; i < c.length; i++) { 343 if (c[i] == HYPHEN) { 344 if (i - index == 2) { 345 return index; 346 } else { 347 index = i; 348 } 349 } 350 } 351 return Integer.MIN_VALUE; 352 } 353 checkEquality(List<LanguageRange> expected, List<LanguageRange> actual)354 private static void checkEquality(List<LanguageRange> expected, 355 List<LanguageRange> actual) { 356 357 int expectedSize = expected.size(); 358 int actualSize = actual.size(); 359 360 if (expectedSize != actualSize) { 361 throw new RuntimeException("[FAILED: Size of the priority list" 362 + " does not match, Expected size=" + expectedSize + "]"); 363 } else { 364 for (int i = 0; i < expectedSize; i++) { 365 LanguageRange lr1 = expected.get(i); 366 LanguageRange lr2 = actual.get(i); 367 368 if (!lr1.getRange().equals(lr2.getRange()) 369 || lr1.getWeight() != lr2.getWeight()) { 370 throw new RuntimeException("[FAILED: Ranges at index " 371 + i + " do not match Expected: range=" + lr1.getRange() 372 + ", weight=" + lr1.getWeight() + ", Actual: range=" 373 + lr2.getRange() + ", weight=" + lr2.getWeight() + "]"); 374 } 375 } 376 } 377 } 378 } 379