IntlObject.cpp::removeUnicodeLocaleExtension() should not touch locales that end...
[WebKit-https.git] / Source / JavaScriptCore / runtime / IntlObject.cpp
1 /*
2  * Copyright (C) 2015 Andy VanWagoner (thetalecrafter@gmail.com)
3  * Copyright (C) 2015 Sukolsak Sakshuwong (sukolsak@gmail.com)
4  * Copyright (C) 2016 Apple Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
19  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
25  * THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 #include "config.h"
29 #include "IntlObject.h"
30
31 #if ENABLE(INTL)
32
33 #include "Error.h"
34 #include "FunctionPrototype.h"
35 #include "IntlCollator.h"
36 #include "IntlCollatorConstructor.h"
37 #include "IntlCollatorPrototype.h"
38 #include "IntlDateTimeFormat.h"
39 #include "IntlDateTimeFormatConstructor.h"
40 #include "IntlDateTimeFormatPrototype.h"
41 #include "IntlNumberFormat.h"
42 #include "IntlNumberFormatConstructor.h"
43 #include "IntlNumberFormatPrototype.h"
44 #include "IntlPluralRules.h"
45 #include "IntlPluralRulesConstructor.h"
46 #include "IntlPluralRulesPrototype.h"
47 #include "JSCInlines.h"
48 #include "JSCJSValueInlines.h"
49 #include "Lookup.h"
50 #include "ObjectPrototype.h"
51 #include "Options.h"
52 #include <unicode/uloc.h>
53 #include <unicode/unumsys.h>
54 #include <wtf/Assertions.h>
55 #include <wtf/Language.h>
56 #include <wtf/NeverDestroyed.h>
57 #include <wtf/text/StringBuilder.h>
58
59 namespace JSC {
60
61 STATIC_ASSERT_IS_TRIVIALLY_DESTRUCTIBLE(IntlObject);
62
63 static EncodedJSValue JSC_HOST_CALL intlObjectFuncGetCanonicalLocales(ExecState*);
64
65 }
66
67 namespace JSC {
68
69 struct MatcherResult {
70     String locale;
71     String extension;
72     size_t extensionIndex;
73 };
74
75 const ClassInfo IntlObject::s_info = { "Object", &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlObject) };
76
77 IntlObject::IntlObject(VM& vm, Structure* structure)
78     : JSNonFinalObject(vm, structure)
79 {
80 }
81
82 IntlObject* IntlObject::create(VM& vm, JSGlobalObject* globalObject, Structure* structure)
83 {
84     IntlObject* object = new (NotNull, allocateCell<IntlObject>(vm.heap)) IntlObject(vm, structure);
85     object->finishCreation(vm, globalObject);
86     return object;
87 }
88
89 void IntlObject::finishCreation(VM& vm, JSGlobalObject* globalObject)
90 {
91     Base::finishCreation(vm);
92     ASSERT(inherits(vm, info()));
93
94     // Set up Collator.
95     IntlCollatorPrototype* collatorPrototype = IntlCollatorPrototype::create(vm, globalObject, IntlCollatorPrototype::createStructure(vm, globalObject, globalObject->objectPrototype()));
96     Structure* collatorStructure = IntlCollator::createStructure(vm, globalObject, collatorPrototype);
97     IntlCollatorConstructor* collatorConstructor = IntlCollatorConstructor::create(vm, IntlCollatorConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), collatorPrototype, collatorStructure);
98
99     // Set up NumberFormat.
100     IntlNumberFormatPrototype* numberFormatPrototype = IntlNumberFormatPrototype::create(vm, globalObject, IntlNumberFormatPrototype::createStructure(vm, globalObject, globalObject->objectPrototype()));
101     Structure* numberFormatStructure = IntlNumberFormat::createStructure(vm, globalObject, numberFormatPrototype);
102     IntlNumberFormatConstructor* numberFormatConstructor = IntlNumberFormatConstructor::create(vm, IntlNumberFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), numberFormatPrototype, numberFormatStructure);
103
104     // Set up DateTimeFormat.
105     IntlDateTimeFormatPrototype* dateTimeFormatPrototype = IntlDateTimeFormatPrototype::create(vm, globalObject, IntlDateTimeFormatPrototype::createStructure(vm, globalObject, globalObject->objectPrototype()));
106     Structure* dateTimeFormatStructure = IntlDateTimeFormat::createStructure(vm, globalObject, dateTimeFormatPrototype);
107     IntlDateTimeFormatConstructor* dateTimeFormatConstructor = IntlDateTimeFormatConstructor::create(vm, IntlDateTimeFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), dateTimeFormatPrototype, dateTimeFormatStructure);
108
109     // Set up PluralRules.
110     IntlPluralRulesPrototype* pluralRulesPrototype = IntlPluralRulesPrototype::create(vm, globalObject, IntlPluralRulesPrototype::createStructure(vm, globalObject, globalObject->objectPrototype()));
111     Structure* pluralRulesStructure = IntlPluralRules::createStructure(vm, globalObject, pluralRulesPrototype);
112     IntlPluralRulesConstructor* pluralRulesConstructor = IntlPluralRulesConstructor::create(vm, IntlPluralRulesConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), pluralRulesPrototype, pluralRulesStructure);
113
114     // Constructor Properties of the Intl Object
115     // https://tc39.github.io/ecma402/#sec-constructor-properties-of-the-intl-object
116     putDirectWithoutTransition(vm, vm.propertyNames->Collator, collatorConstructor, static_cast<unsigned>(PropertyAttribute::DontEnum));
117     putDirectWithoutTransition(vm, vm.propertyNames->NumberFormat, numberFormatConstructor, static_cast<unsigned>(PropertyAttribute::DontEnum));
118     putDirectWithoutTransition(vm, vm.propertyNames->DateTimeFormat, dateTimeFormatConstructor, static_cast<unsigned>(PropertyAttribute::DontEnum));
119     if (Options::useIntlPluralRules())
120         putDirectWithoutTransition(vm, vm.propertyNames->PluralRules, pluralRulesConstructor, static_cast<unsigned>(PropertyAttribute::DontEnum));
121
122     // Function Properties of the Intl Object
123     // https://tc39.github.io/ecma402/#sec-function-properties-of-the-intl-object
124     putDirectNativeFunction(vm, globalObject, Identifier::fromString(&vm, "getCanonicalLocales"), 1, intlObjectFuncGetCanonicalLocales, NoIntrinsic, static_cast<unsigned>(PropertyAttribute::DontEnum));
125 }
126
127 Structure* IntlObject::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype)
128 {
129     return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info());
130 }
131
132 void convertICULocaleToBCP47LanguageTag(String& locale)
133 {
134     locale.replace('_', '-');
135 }
136
137 bool intlBooleanOption(ExecState& state, JSValue options, PropertyName property, bool& usesFallback)
138 {
139     // GetOption (options, property, type="boolean", values, fallback)
140     // https://tc39.github.io/ecma402/#sec-getoption
141
142     VM& vm = state.vm();
143     auto scope = DECLARE_THROW_SCOPE(vm);
144
145     JSObject* opts = options.toObject(&state);
146     RETURN_IF_EXCEPTION(scope, false);
147
148     JSValue value = opts->get(&state, property);
149     RETURN_IF_EXCEPTION(scope, false);
150
151     if (!value.isUndefined()) {
152         bool booleanValue = value.toBoolean(&state);
153         usesFallback = false;
154         return booleanValue;
155     }
156
157     // Because fallback can be undefined, we let the caller handle it instead.
158     usesFallback = true;
159     return false;
160 }
161
162 String intlStringOption(ExecState& state, JSValue options, PropertyName property, std::initializer_list<const char*> values, const char* notFound, const char* fallback)
163 {
164     // GetOption (options, property, type="string", values, fallback)
165     // https://tc39.github.io/ecma402/#sec-getoption
166
167     VM& vm = state.vm();
168     auto scope = DECLARE_THROW_SCOPE(vm);
169
170     JSObject* opts = options.toObject(&state);
171     RETURN_IF_EXCEPTION(scope, String());
172
173     JSValue value = opts->get(&state, property);
174     RETURN_IF_EXCEPTION(scope, String());
175
176     if (!value.isUndefined()) {
177         String stringValue = value.toWTFString(&state);
178         RETURN_IF_EXCEPTION(scope, String());
179
180         if (values.size() && std::find(values.begin(), values.end(), stringValue) == values.end()) {
181             throwException(&state, scope, createRangeError(&state, notFound));
182             return { };
183         }
184         return stringValue;
185     }
186
187     return fallback;
188 }
189
190 unsigned intlNumberOption(ExecState& state, JSValue options, PropertyName property, unsigned minimum, unsigned maximum, unsigned fallback)
191 {
192     // GetNumberOption (options, property, minimum, maximum, fallback)
193     // https://tc39.github.io/ecma402/#sec-getnumberoption
194
195     VM& vm = state.vm();
196     auto scope = DECLARE_THROW_SCOPE(vm);
197
198     JSObject* opts = options.toObject(&state);
199     RETURN_IF_EXCEPTION(scope, 0);
200
201     JSValue value = opts->get(&state, property);
202     RETURN_IF_EXCEPTION(scope, 0);
203
204     if (!value.isUndefined()) {
205         double doubleValue = value.toNumber(&state);
206         RETURN_IF_EXCEPTION(scope, 0);
207
208         if (!(doubleValue >= minimum && doubleValue <= maximum)) {
209             throwException(&state, scope, createRangeError(&state, *property.publicName() + " is out of range"));
210             return 0;
211         }
212         return static_cast<unsigned>(doubleValue);
213     }
214     return fallback;
215 }
216
217 static String privateUseLangTag(const Vector<String>& parts, size_t startIndex)
218 {
219     size_t numParts = parts.size();
220     size_t currentIndex = startIndex;
221
222     // Check for privateuse.
223     // privateuse = "x" 1*("-" (1*8alphanum))
224     StringBuilder privateuse;
225     while (currentIndex < numParts) {
226         const String& singleton = parts[currentIndex];
227         unsigned singletonLength = singleton.length();
228         bool isValid = (singletonLength == 1 && (singleton == "x" || singleton == "X"));
229         if (!isValid)
230             break;
231
232         if (currentIndex != startIndex)
233             privateuse.append('-');
234
235         ++currentIndex;
236         unsigned numExtParts = 0;
237         privateuse.append('x');
238         while (currentIndex < numParts) {
239             const String& extPart = parts[currentIndex];
240             unsigned extPartLength = extPart.length();
241
242             bool isValid = (extPartLength >= 1 && extPartLength <= 8 && extPart.isAllSpecialCharacters<isASCIIAlphanumeric>());
243             if (!isValid)
244                 break;
245
246             ++currentIndex;
247             ++numExtParts;
248             privateuse.append('-');
249             privateuse.append(extPart.convertToASCIILowercase());
250         }
251
252         // Requires at least one production.
253         if (!numExtParts)
254             return String();
255     }
256
257     // Leftovers makes it invalid.
258     if (currentIndex < numParts)
259         return String();
260
261     return privateuse.toString();
262 }
263
264 static String canonicalLangTag(const Vector<String>& parts)
265 {
266     ASSERT(!parts.isEmpty());
267
268     // Follows the grammar at https://www.rfc-editor.org/rfc/bcp/bcp47.txt
269     // langtag = language ["-" script] ["-" region] *("-" variant) *("-" extension) ["-" privateuse]
270
271     size_t numParts = parts.size();
272     // Check for language.
273     // language = 2*3ALPHA ["-" extlang] / 4ALPHA / 5*8ALPHA
274     size_t currentIndex = 0;
275     const String& language = parts[currentIndex];
276     unsigned languageLength = language.length();
277     bool canHaveExtlang = languageLength >= 2 && languageLength <= 3;
278     bool isValidLanguage = languageLength >= 2 && languageLength <= 8 && language.isAllSpecialCharacters<isASCIIAlpha>();
279     if (!isValidLanguage)
280         return String();
281
282     ++currentIndex;
283     StringBuilder canonical;
284     canonical.append(language.convertToASCIILowercase());
285
286     // Check for extlang.
287     // extlang = 3ALPHA *2("-" 3ALPHA)
288     if (canHaveExtlang) {
289         for (unsigned times = 0; times < 3 && currentIndex < numParts; ++times) {
290             const String& extlang = parts[currentIndex];
291             unsigned extlangLength = extlang.length();
292             if (extlangLength == 3 && extlang.isAllSpecialCharacters<isASCIIAlpha>()) {
293                 ++currentIndex;
294                 canonical.append('-');
295                 canonical.append(extlang.convertToASCIILowercase());
296             } else
297                 break;
298         }
299     }
300
301     // Check for script.
302     // script = 4ALPHA
303     if (currentIndex < numParts) {
304         const String& script = parts[currentIndex];
305         unsigned scriptLength = script.length();
306         if (scriptLength == 4 && script.isAllSpecialCharacters<isASCIIAlpha>()) {
307             ++currentIndex;
308             canonical.append('-');
309             canonical.append(toASCIIUpper(script[0]));
310             canonical.append(script.substring(1, 3).convertToASCIILowercase());
311         }
312     }
313
314     // Check for region.
315     // region = 2ALPHA / 3DIGIT
316     if (currentIndex < numParts) {
317         const String& region = parts[currentIndex];
318         unsigned regionLength = region.length();
319         bool isValidRegion = (
320             (regionLength == 2 && region.isAllSpecialCharacters<isASCIIAlpha>())
321             || (regionLength == 3 && region.isAllSpecialCharacters<isASCIIDigit>())
322         );
323         if (isValidRegion) {
324             ++currentIndex;
325             canonical.append('-');
326             canonical.append(region.convertToASCIIUppercase());
327         }
328     }
329
330     // Check for variant.
331     // variant = 5*8alphanum / (DIGIT 3alphanum)
332     HashSet<String> subtags;
333     while (currentIndex < numParts) {
334         const String& variant = parts[currentIndex];
335         unsigned variantLength = variant.length();
336         bool isValidVariant = (
337             (variantLength >= 5 && variantLength <= 8 && variant.isAllSpecialCharacters<isASCIIAlphanumeric>())
338             || (variantLength == 4 && isASCIIDigit(variant[0]) && variant.substring(1, 3).isAllSpecialCharacters<isASCIIAlphanumeric>())
339         );
340         if (!isValidVariant)
341             break;
342
343         // Cannot include duplicate subtags (case insensitive).
344         String lowerVariant = variant.convertToASCIILowercase();
345         if (!subtags.add(lowerVariant).isNewEntry)
346             return String();
347
348         ++currentIndex;
349
350         // Reordering variant subtags is not required in the spec.
351         canonical.append('-');
352         canonical.append(lowerVariant);
353     }
354
355     // Check for extension.
356     // extension = singleton 1*("-" (2*8alphanum))
357     // singleton = alphanum except x or X
358     subtags.clear();
359     Vector<String> extensions;
360     while (currentIndex < numParts) {
361         const String& possibleSingleton = parts[currentIndex];
362         unsigned singletonLength = possibleSingleton.length();
363         bool isValidSingleton = (singletonLength == 1 && possibleSingleton != "x" && possibleSingleton != "X" && isASCIIAlphanumeric(possibleSingleton[0]));
364         if (!isValidSingleton)
365             break;
366
367         // Cannot include duplicate singleton (case insensitive).
368         String singleton = possibleSingleton.convertToASCIILowercase();
369         if (!subtags.add(singleton).isNewEntry)
370             return String();
371
372         ++currentIndex;
373         int numExtParts = 0;
374         StringBuilder extension;
375         extension.append(singleton);
376         while (currentIndex < numParts) {
377             const String& extPart = parts[currentIndex];
378             unsigned extPartLength = extPart.length();
379
380             bool isValid = (extPartLength >= 2 && extPartLength <= 8 && extPart.isAllSpecialCharacters<isASCIIAlphanumeric>());
381             if (!isValid)
382                 break;
383
384             ++currentIndex;
385             ++numExtParts;
386             extension.append('-');
387             extension.append(extPart.convertToASCIILowercase());
388         }
389
390         // Requires at least one production.
391         if (!numExtParts)
392             return String();
393
394         extensions.append(extension.toString());
395     }
396
397     // Add extensions to canonical sorted by singleton.
398     std::sort(
399         extensions.begin(),
400         extensions.end(),
401         [] (const String& a, const String& b) -> bool {
402             return a[0] < b[0];
403         }
404     );
405     size_t numExtenstions = extensions.size();
406     for (size_t i = 0; i < numExtenstions; ++i) {
407         canonical.append('-');
408         canonical.append(extensions[i]);
409     }
410
411     // Check for privateuse.
412     if (currentIndex < numParts) {
413         String privateuse = privateUseLangTag(parts, currentIndex);
414         if (privateuse.isNull())
415             return String();
416         canonical.append('-');
417         canonical.append(privateuse);
418     }
419
420     // FIXME: Replace subtags with their preferred values.
421
422     return canonical.toString();
423 }
424
425 static String grandfatheredLangTag(const String& locale)
426 {
427     // grandfathered = irregular / regular
428     // FIXME: convert to a compile time hash table if this is causing performance issues.
429     HashMap<String, String> tagMap = {
430         // Irregular.
431         { ASCIILiteral("en-gb-oed"), ASCIILiteral("en-GB-oed") },
432         { ASCIILiteral("i-ami"), ASCIILiteral("ami") },
433         { ASCIILiteral("i-bnn"), ASCIILiteral("bnn") },
434         { ASCIILiteral("i-default"), ASCIILiteral("i-default") },
435         { ASCIILiteral("i-enochian"), ASCIILiteral("i-enochian") },
436         { ASCIILiteral("i-hak"), ASCIILiteral("hak") },
437         { ASCIILiteral("i-klingon"), ASCIILiteral("tlh") },
438         { ASCIILiteral("i-lux"), ASCIILiteral("lb") },
439         { ASCIILiteral("i-mingo"), ASCIILiteral("i-mingo") },
440         { ASCIILiteral("i-navajo"), ASCIILiteral("nv") },
441         { ASCIILiteral("i-pwn"), ASCIILiteral("pwn") },
442         { ASCIILiteral("i-tao"), ASCIILiteral("tao") },
443         { ASCIILiteral("i-tay"), ASCIILiteral("tay") },
444         { ASCIILiteral("i-tsu"), ASCIILiteral("tsu") },
445         { ASCIILiteral("sgn-be-fr"), ASCIILiteral("sfb") },
446         { ASCIILiteral("sgn-be-nl"), ASCIILiteral("vgt") },
447         { ASCIILiteral("sgn-ch-de"), ASCIILiteral("sgg") },
448         // Regular.
449         { ASCIILiteral("art-lojban"), ASCIILiteral("jbo") },
450         { ASCIILiteral("cel-gaulish"), ASCIILiteral("cel-gaulish") },
451         { ASCIILiteral("no-bok"), ASCIILiteral("nb") },
452         { ASCIILiteral("no-nyn"), ASCIILiteral("nn") },
453         { ASCIILiteral("zh-guoyu"), ASCIILiteral("cmn") },
454         { ASCIILiteral("zh-hakka"), ASCIILiteral("hak") },
455         { ASCIILiteral("zh-min"), ASCIILiteral("zh-min") },
456         { ASCIILiteral("zh-min-nan"), ASCIILiteral("nan") },
457         { ASCIILiteral("zh-xiang"), ASCIILiteral("hsn") }
458     };
459
460     return tagMap.get(locale.convertToASCIILowercase());
461 }
462
463 static String canonicalizeLanguageTag(const String& locale)
464 {
465     // IsStructurallyValidLanguageTag (locale)
466     // CanonicalizeLanguageTag (locale)
467     // These are done one after another in CanonicalizeLocaleList, so they are combined here to reduce duplication.
468     // https://www.rfc-editor.org/rfc/bcp/bcp47.txt
469
470     // Language-Tag = langtag / privateuse / grandfathered
471     String grandfather = grandfatheredLangTag(locale);
472     if (!grandfather.isNull())
473         return grandfather;
474
475     // FIXME: Replace redundant tags [RFC4647].
476
477     Vector<String> parts;
478     locale.split('-', true, parts);
479     if (!parts.isEmpty()) {
480         String langtag = canonicalLangTag(parts);
481         if (!langtag.isNull())
482             return langtag;
483
484         String privateuse = privateUseLangTag(parts, 0);
485         if (!privateuse.isNull())
486             return privateuse;
487     }
488
489     return String();
490 }
491
492 Vector<String> canonicalizeLocaleList(ExecState& state, JSValue locales)
493 {
494     // CanonicalizeLocaleList (locales)
495     // https://tc39.github.io/ecma402/#sec-canonicalizelocalelist
496
497     VM& vm = state.vm();
498     auto scope = DECLARE_THROW_SCOPE(vm);
499
500     JSGlobalObject* globalObject = state.jsCallee()->globalObject();
501     Vector<String> seen;
502
503     if (locales.isUndefined())
504         return seen;
505
506     JSObject* localesObject;
507     if (locales.isString()) {
508         JSArray* localesArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous));
509         if (!localesArray) {
510             throwOutOfMemoryError(&state, scope);
511             RETURN_IF_EXCEPTION(scope, Vector<String>());
512         }
513         localesArray->push(&state, locales);
514         RETURN_IF_EXCEPTION(scope, Vector<String>());
515
516         localesObject = localesArray;
517     } else {
518         localesObject = locales.toObject(&state);
519         RETURN_IF_EXCEPTION(scope, Vector<String>());
520     }
521
522     // 6. Let len be ToLength(Get(O, "length")).
523     JSValue lengthProperty = localesObject->get(&state, vm.propertyNames->length);
524     RETURN_IF_EXCEPTION(scope, Vector<String>());
525
526     double length = lengthProperty.toLength(&state);
527     RETURN_IF_EXCEPTION(scope, Vector<String>());
528
529     HashSet<String> seenSet;
530     for (double k = 0; k < length; ++k) {
531         bool kPresent = localesObject->hasProperty(&state, k);
532         RETURN_IF_EXCEPTION(scope, Vector<String>());
533
534         if (kPresent) {
535             JSValue kValue = localesObject->get(&state, k);
536             RETURN_IF_EXCEPTION(scope, Vector<String>());
537
538             if (!kValue.isString() && !kValue.isObject()) {
539                 throwTypeError(&state, scope, ASCIILiteral("locale value must be a string or object"));
540                 return Vector<String>();
541             }
542
543             JSString* tag = kValue.toString(&state);
544             RETURN_IF_EXCEPTION(scope, Vector<String>());
545
546             String canonicalizedTag = canonicalizeLanguageTag(tag->value(&state));
547             if (canonicalizedTag.isNull()) {
548                 throwException(&state, scope, createRangeError(&state, String::format("invalid language tag: %s", tag->value(&state).utf8().data())));
549                 return Vector<String>();
550             }
551
552             if (seenSet.add(canonicalizedTag).isNewEntry)
553                 seen.append(canonicalizedTag);
554         }
555     }
556
557     return seen;
558 }
559
560 String bestAvailableLocale(const HashSet<String>& availableLocales, const String& locale)
561 {
562     // BestAvailableLocale (availableLocales, locale)
563     // https://tc39.github.io/ecma402/#sec-bestavailablelocale
564
565     String candidate = locale;
566     while (!candidate.isEmpty()) {
567         if (availableLocales.contains(candidate))
568             return candidate;
569
570         size_t pos = candidate.reverseFind('-');
571         if (pos == notFound)
572             return String();
573
574         if (pos >= 2 && candidate[pos - 2] == '-')
575             pos -= 2;
576
577         candidate = candidate.substring(0, pos);
578     }
579
580     return String();
581 }
582
583 String defaultLocale(ExecState& state)
584 {
585     // DefaultLocale ()
586     // https://tc39.github.io/ecma402/#sec-defaultlocale
587     
588     // WebCore's global objects will have their own ideas of how to determine the language. It may
589     // be determined by WebCore-specific logic like some WK settings. Usually this will return the
590     // same thing as userPreferredLanguages()[0].
591     if (auto defaultLanguage = state.jsCallee()->globalObject()->globalObjectMethodTable()->defaultLanguage) {
592         String locale = defaultLanguage();
593         if (!locale.isEmpty())
594             return canonicalizeLanguageTag(locale);
595     }
596
597     Vector<String> languages = userPreferredLanguages();
598     if (!languages.isEmpty() && !languages[0].isEmpty())
599         return canonicalizeLanguageTag(languages[0]);
600     
601     // If all else fails, ask ICU. It will probably say something bogus like en_us even if the user
602     // has configured some other language, but being wrong is better than crashing.
603     String locale = uloc_getDefault();
604     convertICULocaleToBCP47LanguageTag(locale);
605     return locale;
606 }
607
608 String removeUnicodeLocaleExtension(const String& locale)
609 {
610     Vector<String> parts;
611     locale.split('-', parts);
612     StringBuilder builder;
613     size_t partsSize = parts.size();
614     if (partsSize > 0)
615         builder.append(parts[0]);
616     for (size_t p = 1; p < partsSize; ++p) {
617         if (parts[p] == "u" && p + 1 < partsSize) {
618             // Skip the u- and anything that follows until another singleton.
619             // While the next part is part of the unicode extension, skip it.
620             while (p + 1 < partsSize && parts[p + 1].length() > 1)
621                 ++p;
622         } else {
623             builder.append('-');
624             builder.append(parts[p]);
625         }
626     }
627     return builder.toString();
628 }
629
630 static MatcherResult lookupMatcher(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
631 {
632     // LookupMatcher (availableLocales, requestedLocales)
633     // https://tc39.github.io/ecma402/#sec-lookupmatcher
634
635     String locale;
636     String noExtensionsLocale;
637     String availableLocale;
638     for (size_t i = 0; i < requestedLocales.size() && availableLocale.isNull(); ++i) {
639         locale = requestedLocales[i];
640         noExtensionsLocale = removeUnicodeLocaleExtension(locale);
641         availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale);
642     }
643
644     MatcherResult result;
645     if (!availableLocale.isNull()) {
646         result.locale = availableLocale;
647         if (locale != noExtensionsLocale) {
648             size_t extensionIndex = locale.find("-u-");
649             RELEASE_ASSERT(extensionIndex != notFound);
650
651             size_t extensionLength = locale.length() - extensionIndex;
652             size_t end = extensionIndex + 3;
653             while (end < locale.length()) {
654                 end = locale.find('-', end);
655                 if (end == notFound)
656                     break;
657                 if (end + 2 < locale.length() && locale[end + 2] == '-') {
658                     extensionLength = end - extensionIndex;
659                     break;
660                 }
661                 end++;
662             }
663             result.extension = locale.substring(extensionIndex, extensionLength);
664             result.extensionIndex = extensionIndex;
665         }
666     } else
667         result.locale = defaultLocale(state);
668     return result;
669 }
670
671 static MatcherResult bestFitMatcher(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
672 {
673     // BestFitMatcher (availableLocales, requestedLocales)
674     // https://tc39.github.io/ecma402/#sec-bestfitmatcher
675
676     // FIXME: Implement something better than lookup.
677     return lookupMatcher(state, availableLocales, requestedLocales);
678 }
679
680 static void unicodeExtensionSubTags(const String& extension, Vector<String>& subtags)
681 {
682     // UnicodeExtensionSubtags (extension)
683     // https://tc39.github.io/ecma402/#sec-unicodeextensionsubtags
684
685     auto extensionLength = extension.length();
686     if (extensionLength < 3)
687         return;
688
689     size_t subtagStart = 3; // Skip initial -u-.
690     size_t valueStart = 3;
691     bool isLeading = true;
692     for (size_t index = subtagStart; index < extensionLength; ++index) {
693         if (extension[index] == '-') {
694             if (index - subtagStart == 2) {
695                 // Tag is a key, first append prior key's value if there is one.
696                 if (subtagStart - valueStart > 1)
697                     subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1));
698                 subtags.append(extension.substring(subtagStart, index - subtagStart));
699                 valueStart = index + 1;
700                 isLeading = false;
701             } else if (isLeading) {
702                 // Leading subtags before first key.
703                 subtags.append(extension.substring(subtagStart, index - subtagStart));
704                 valueStart = index + 1;
705             }
706             subtagStart = index + 1;
707         }
708     }
709     if (extensionLength - subtagStart == 2) {
710         // Trailing an extension key, first append prior key's value if there is one.
711         if (subtagStart - valueStart > 1)
712             subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1));
713         valueStart = subtagStart;
714     }
715     // Append final key's value.
716     subtags.append(extension.substring(valueStart, extensionLength - valueStart));
717 }
718
719 HashMap<String, String> resolveLocale(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, const HashMap<String, String>& options, const char* const relevantExtensionKeys[], size_t relevantExtensionKeyCount, Vector<String> (*localeData)(const String&, size_t))
720 {
721     // ResolveLocale (availableLocales, requestedLocales, options, relevantExtensionKeys, localeData)
722     // https://tc39.github.io/ecma402/#sec-resolvelocale
723
724     const String& matcher = options.get(ASCIILiteral("localeMatcher"));
725     MatcherResult matcherResult = (matcher == "lookup")
726         ? lookupMatcher(state, availableLocales, requestedLocales)
727         : bestFitMatcher(state, availableLocales, requestedLocales);
728
729     String foundLocale = matcherResult.locale;
730
731     Vector<String> extensionSubtags;
732     if (!matcherResult.extension.isNull())
733         unicodeExtensionSubTags(matcherResult.extension, extensionSubtags);
734
735     HashMap<String, String> result;
736     result.add(ASCIILiteral("dataLocale"), foundLocale);
737
738     String supportedExtension = ASCIILiteral("-u");
739     for (size_t keyIndex = 0; keyIndex < relevantExtensionKeyCount; ++keyIndex) {
740         const char* key = relevantExtensionKeys[keyIndex];
741         Vector<String> keyLocaleData = localeData(foundLocale, keyIndex);
742         ASSERT(!keyLocaleData.isEmpty());
743
744         String value = keyLocaleData[0];
745         String supportedExtensionAddition;
746
747         if (!extensionSubtags.isEmpty()) {
748             size_t keyPos = extensionSubtags.find(key);
749             if (keyPos != notFound) {
750                 if (keyPos + 1 < extensionSubtags.size() && extensionSubtags[keyPos + 1].length() > 2) {
751                     const String& requestedValue = extensionSubtags[keyPos + 1];
752                     if (keyLocaleData.contains(requestedValue)) {
753                         value = requestedValue;
754                         supportedExtensionAddition = makeString('-', key, '-', value);
755                     }
756                 } else if (keyLocaleData.contains(static_cast<String>(ASCIILiteral("true")))) {
757                     value = ASCIILiteral("true");
758                 }
759             }
760         }
761
762         HashMap<String, String>::const_iterator iterator = options.find(key);
763         if (iterator != options.end()) {
764             const String& optionsValue = iterator->value;
765             if (!optionsValue.isNull() && keyLocaleData.contains(optionsValue)) {
766                 if (optionsValue != value) {
767                     value = optionsValue;
768                     supportedExtensionAddition = String();
769                 }
770             }
771         }
772         result.add(key, value);
773         supportedExtension.append(supportedExtensionAddition);
774     }
775
776     if (supportedExtension.length() > 2) {
777         String preExtension = foundLocale.substring(0, matcherResult.extensionIndex);
778         String postExtension = foundLocale.substring(matcherResult.extensionIndex);
779         foundLocale = preExtension + supportedExtension + postExtension;
780     }
781
782     result.add(ASCIILiteral("locale"), foundLocale);
783     return result;
784 }
785
786 static JSArray* lookupSupportedLocales(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
787 {
788     // LookupSupportedLocales (availableLocales, requestedLocales)
789     // https://tc39.github.io/ecma402/#sec-lookupsupportedlocales
790
791     VM& vm = state.vm();
792     auto scope = DECLARE_THROW_SCOPE(vm);
793
794     size_t len = requestedLocales.size();
795     JSGlobalObject* globalObject = state.jsCallee()->globalObject();
796     JSArray* subset = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithUndecided), 0);
797     if (!subset) {
798         throwOutOfMemoryError(&state, scope);
799         return nullptr;
800     }
801
802     for (size_t k = 0; k < len; ++k) {
803         const String& locale = requestedLocales[k];
804         String noExtensionsLocale = removeUnicodeLocaleExtension(locale);
805         String availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale);
806         if (!availableLocale.isNull()) {
807             subset->push(&state, jsString(&state, locale));
808             RETURN_IF_EXCEPTION(scope, nullptr);
809         }
810     }
811
812     return subset;
813 }
814
815 static JSArray* bestFitSupportedLocales(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
816 {
817     // BestFitSupportedLocales (availableLocales, requestedLocales)
818     // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
819
820     // FIXME: Implement something better than lookup.
821     return lookupSupportedLocales(state, availableLocales, requestedLocales);
822 }
823
824 JSValue supportedLocales(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, JSValue options)
825 {
826     // SupportedLocales (availableLocales, requestedLocales, options)
827     // https://tc39.github.io/ecma402/#sec-supportedlocales
828
829     VM& vm = state.vm();
830     auto scope = DECLARE_THROW_SCOPE(vm);
831     String matcher;
832
833     if (!options.isUndefined()) {
834         matcher = intlStringOption(state, options, vm.propertyNames->localeMatcher, { "lookup", "best fit" }, "localeMatcher must be either \"lookup\" or \"best fit\"", "best fit");
835         RETURN_IF_EXCEPTION(scope, JSValue());
836     } else
837         matcher = ASCIILiteral("best fit");
838
839     JSArray* supportedLocales = (matcher == "best fit")
840         ? bestFitSupportedLocales(state, availableLocales, requestedLocales)
841         : lookupSupportedLocales(state, availableLocales, requestedLocales);
842     RETURN_IF_EXCEPTION(scope, JSValue());
843
844     PropertyNameArray keys(&vm, PropertyNameMode::Strings, PrivateSymbolMode::Exclude);
845     supportedLocales->getOwnPropertyNames(supportedLocales, &state, keys, EnumerationMode());
846     RETURN_IF_EXCEPTION(scope, JSValue());
847
848     PropertyDescriptor desc;
849     desc.setConfigurable(false);
850     desc.setWritable(false);
851
852     size_t len = keys.size();
853     for (size_t i = 0; i < len; ++i) {
854         supportedLocales->defineOwnProperty(supportedLocales, &state, keys[i], desc, true);
855         RETURN_IF_EXCEPTION(scope, JSValue());
856     }
857
858     return supportedLocales;
859 }
860
861 Vector<String> numberingSystemsForLocale(const String& locale)
862 {
863     static NeverDestroyed<Vector<String>> cachedNumberingSystems;
864     Vector<String>& availableNumberingSystems = cachedNumberingSystems.get();
865
866     if (UNLIKELY(availableNumberingSystems.isEmpty())) {
867         static Lock cachedNumberingSystemsMutex;
868         std::lock_guard<Lock> lock(cachedNumberingSystemsMutex);
869         if (availableNumberingSystems.isEmpty()) {
870             UErrorCode status = U_ZERO_ERROR;
871             UEnumeration* numberingSystemNames = unumsys_openAvailableNames(&status);
872             ASSERT(U_SUCCESS(status));
873
874             int32_t resultLength;
875             // Numbering system names are always ASCII, so use char[].
876             while (const char* result = uenum_next(numberingSystemNames, &resultLength, &status)) {
877                 ASSERT(U_SUCCESS(status));
878                 availableNumberingSystems.append(String(result, resultLength));
879             }
880             uenum_close(numberingSystemNames);
881         }
882     }
883
884     UErrorCode status = U_ZERO_ERROR;
885     UNumberingSystem* defaultSystem = unumsys_open(locale.utf8().data(), &status);
886     ASSERT(U_SUCCESS(status));
887     String defaultSystemName(unumsys_getName(defaultSystem));
888     unumsys_close(defaultSystem);
889
890     Vector<String> numberingSystems({ defaultSystemName });
891     numberingSystems.appendVector(availableNumberingSystems);
892     return numberingSystems;
893 }
894
895 EncodedJSValue JSC_HOST_CALL intlObjectFuncGetCanonicalLocales(ExecState* state)
896 {
897     // Intl.getCanonicalLocales(locales)
898     // https://tc39.github.io/ecma402/#sec-intl.getcanonicallocales
899
900     VM& vm = state->vm();
901     auto scope = DECLARE_THROW_SCOPE(vm);
902
903     Vector<String> localeList = canonicalizeLocaleList(*state, state->argument(0));
904     RETURN_IF_EXCEPTION(scope, encodedJSValue());
905
906     JSGlobalObject* globalObject = state->jsCallee()->globalObject();
907     JSArray* localeArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous));
908     if (!localeArray) {
909         throwOutOfMemoryError(state, scope);
910         return encodedJSValue();
911     }
912
913     auto length = localeList.size();
914     for (size_t i = 0; i < length; ++i) {
915         localeArray->push(state, jsString(state, localeList[i]));
916         RETURN_IF_EXCEPTION(scope, encodedJSValue());
917     }
918     return JSValue::encode(localeArray);
919 }
920
921 } // namespace JSC
922
923 #endif // ENABLE(INTL)