Unreviewed, rolling out r234489.
[WebKit-https.git] / Source / JavaScriptCore / runtime / IntlObject.cpp
1 /*
2  * Copyright (C) 2015 Andy VanWagoner (andy@vanwagoner.family)
3  * Copyright (C) 2015 Sukolsak Sakshuwong (sukolsak@gmail.com)
4  * Copyright (C) 2016 Apple Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
19  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
25  * THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 #include "config.h"
29 #include "IntlObject.h"
30
31 #if ENABLE(INTL)
32
33 #include "Error.h"
34 #include "FunctionPrototype.h"
35 #include "IntlCanonicalizeLanguage.h"
36 #include "IntlCollator.h"
37 #include "IntlCollatorConstructor.h"
38 #include "IntlCollatorPrototype.h"
39 #include "IntlDateTimeFormat.h"
40 #include "IntlDateTimeFormatConstructor.h"
41 #include "IntlDateTimeFormatPrototype.h"
42 #include "IntlNumberFormat.h"
43 #include "IntlNumberFormatConstructor.h"
44 #include "IntlNumberFormatPrototype.h"
45 #include "IntlPluralRules.h"
46 #include "IntlPluralRulesConstructor.h"
47 #include "IntlPluralRulesPrototype.h"
48 #include "JSCInlines.h"
49 #include "JSCJSValueInlines.h"
50 #include "Lookup.h"
51 #include "ObjectPrototype.h"
52 #include "Options.h"
53 #include <unicode/uloc.h>
54 #include <unicode/unumsys.h>
55 #include <wtf/Assertions.h>
56 #include <wtf/Language.h>
57 #include <wtf/NeverDestroyed.h>
58 #include <wtf/text/StringBuilder.h>
59
60 namespace JSC {
61
62 STATIC_ASSERT_IS_TRIVIALLY_DESTRUCTIBLE(IntlObject);
63
64 static EncodedJSValue JSC_HOST_CALL intlObjectFuncGetCanonicalLocales(ExecState*);
65
66 }
67
68 namespace JSC {
69
70 struct MatcherResult {
71     String locale;
72     String extension;
73     size_t extensionIndex { 0 };
74 };
75
76 const ClassInfo IntlObject::s_info = { "Object", &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlObject) };
77
78 IntlObject::IntlObject(VM& vm, Structure* structure)
79     : JSNonFinalObject(vm, structure)
80 {
81 }
82
83 IntlObject* IntlObject::create(VM& vm, JSGlobalObject* globalObject, Structure* structure)
84 {
85     IntlObject* object = new (NotNull, allocateCell<IntlObject>(vm.heap)) IntlObject(vm, structure);
86     object->finishCreation(vm, globalObject);
87     return object;
88 }
89
90 void IntlObject::finishCreation(VM& vm, JSGlobalObject* globalObject)
91 {
92     Base::finishCreation(vm);
93     ASSERT(inherits(vm, info()));
94
95     // Set up Collator.
96     IntlCollatorPrototype* collatorPrototype = IntlCollatorPrototype::create(vm, globalObject, IntlCollatorPrototype::createStructure(vm, globalObject, globalObject->objectPrototype()));
97     Structure* collatorStructure = IntlCollator::createStructure(vm, globalObject, collatorPrototype);
98     IntlCollatorConstructor* collatorConstructor = IntlCollatorConstructor::create(vm, IntlCollatorConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), collatorPrototype, collatorStructure);
99
100     // Set up NumberFormat.
101     IntlNumberFormatPrototype* numberFormatPrototype = IntlNumberFormatPrototype::create(vm, globalObject, IntlNumberFormatPrototype::createStructure(vm, globalObject, globalObject->objectPrototype()));
102     Structure* numberFormatStructure = IntlNumberFormat::createStructure(vm, globalObject, numberFormatPrototype);
103     IntlNumberFormatConstructor* numberFormatConstructor = IntlNumberFormatConstructor::create(vm, IntlNumberFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), numberFormatPrototype, numberFormatStructure);
104
105     // Set up DateTimeFormat.
106     IntlDateTimeFormatPrototype* dateTimeFormatPrototype = IntlDateTimeFormatPrototype::create(vm, globalObject, IntlDateTimeFormatPrototype::createStructure(vm, globalObject, globalObject->objectPrototype()));
107     Structure* dateTimeFormatStructure = IntlDateTimeFormat::createStructure(vm, globalObject, dateTimeFormatPrototype);
108     IntlDateTimeFormatConstructor* dateTimeFormatConstructor = IntlDateTimeFormatConstructor::create(vm, IntlDateTimeFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), dateTimeFormatPrototype, dateTimeFormatStructure);
109
110     // Set up PluralRules.
111     IntlPluralRulesPrototype* pluralRulesPrototype = IntlPluralRulesPrototype::create(vm, globalObject, IntlPluralRulesPrototype::createStructure(vm, globalObject, globalObject->objectPrototype()));
112     Structure* pluralRulesStructure = IntlPluralRules::createStructure(vm, globalObject, pluralRulesPrototype);
113     IntlPluralRulesConstructor* pluralRulesConstructor = IntlPluralRulesConstructor::create(vm, IntlPluralRulesConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), pluralRulesPrototype, pluralRulesStructure);
114
115     // Constructor Properties of the Intl Object
116     // https://tc39.github.io/ecma402/#sec-constructor-properties-of-the-intl-object
117     putDirectWithoutTransition(vm, vm.propertyNames->Collator, collatorConstructor, static_cast<unsigned>(PropertyAttribute::DontEnum));
118     putDirectWithoutTransition(vm, vm.propertyNames->NumberFormat, numberFormatConstructor, static_cast<unsigned>(PropertyAttribute::DontEnum));
119     putDirectWithoutTransition(vm, vm.propertyNames->DateTimeFormat, dateTimeFormatConstructor, static_cast<unsigned>(PropertyAttribute::DontEnum));
120     if (Options::useIntlPluralRules())
121         putDirectWithoutTransition(vm, vm.propertyNames->PluralRules, pluralRulesConstructor, static_cast<unsigned>(PropertyAttribute::DontEnum));
122
123     // Function Properties of the Intl Object
124     // https://tc39.github.io/ecma402/#sec-function-properties-of-the-intl-object
125     putDirectNativeFunction(vm, globalObject, Identifier::fromString(&vm, "getCanonicalLocales"), 1, intlObjectFuncGetCanonicalLocales, NoIntrinsic, static_cast<unsigned>(PropertyAttribute::DontEnum));
126 }
127
128 Structure* IntlObject::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype)
129 {
130     return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info());
131 }
132
133 String convertICULocaleToBCP47LanguageTag(const char* localeID)
134 {
135     UErrorCode status = U_ZERO_ERROR;
136     Vector<char, 32> buffer(32);
137     auto length = uloc_toLanguageTag(localeID, buffer.data(), buffer.size(), false, &status);
138     if (status == U_BUFFER_OVERFLOW_ERROR) {
139         buffer.grow(length);
140         status = U_ZERO_ERROR;
141         uloc_toLanguageTag(localeID, buffer.data(), buffer.size(), false, &status);
142     }
143     if (!U_FAILURE(status))
144         return String(buffer.data(), length);
145     return String();
146 }
147
148 bool intlBooleanOption(ExecState& state, JSValue options, PropertyName property, bool& usesFallback)
149 {
150     // GetOption (options, property, type="boolean", values, fallback)
151     // https://tc39.github.io/ecma402/#sec-getoption
152
153     VM& vm = state.vm();
154     auto scope = DECLARE_THROW_SCOPE(vm);
155
156     JSObject* opts = options.toObject(&state);
157     RETURN_IF_EXCEPTION(scope, false);
158
159     JSValue value = opts->get(&state, property);
160     RETURN_IF_EXCEPTION(scope, false);
161
162     if (!value.isUndefined()) {
163         bool booleanValue = value.toBoolean(&state);
164         usesFallback = false;
165         return booleanValue;
166     }
167
168     // Because fallback can be undefined, we let the caller handle it instead.
169     usesFallback = true;
170     return false;
171 }
172
173 String intlStringOption(ExecState& state, JSValue options, PropertyName property, std::initializer_list<const char*> values, const char* notFound, const char* fallback)
174 {
175     // GetOption (options, property, type="string", values, fallback)
176     // https://tc39.github.io/ecma402/#sec-getoption
177
178     VM& vm = state.vm();
179     auto scope = DECLARE_THROW_SCOPE(vm);
180
181     JSObject* opts = options.toObject(&state);
182     RETURN_IF_EXCEPTION(scope, String());
183
184     JSValue value = opts->get(&state, property);
185     RETURN_IF_EXCEPTION(scope, String());
186
187     if (!value.isUndefined()) {
188         String stringValue = value.toWTFString(&state);
189         RETURN_IF_EXCEPTION(scope, String());
190
191         if (values.size() && std::find(values.begin(), values.end(), stringValue) == values.end()) {
192             throwException(&state, scope, createRangeError(&state, notFound));
193             return { };
194         }
195         return stringValue;
196     }
197
198     return fallback;
199 }
200
201 unsigned intlNumberOption(ExecState& state, JSValue options, PropertyName property, unsigned minimum, unsigned maximum, unsigned fallback)
202 {
203     // GetNumberOption (options, property, minimum, maximum, fallback)
204     // https://tc39.github.io/ecma402/#sec-getnumberoption
205
206     VM& vm = state.vm();
207     auto scope = DECLARE_THROW_SCOPE(vm);
208
209     JSObject* opts = options.toObject(&state);
210     RETURN_IF_EXCEPTION(scope, 0);
211
212     JSValue value = opts->get(&state, property);
213     RETURN_IF_EXCEPTION(scope, 0);
214
215     scope.release();
216     return intlDefaultNumberOption(state, value, property, minimum, maximum, fallback);
217 }
218
219 unsigned intlDefaultNumberOption(ExecState& state, JSValue value, PropertyName property, unsigned minimum, unsigned maximum, unsigned fallback)
220 {
221     // DefaultNumberOption (value, minimum, maximum, fallback)
222     // https://tc39.github.io/ecma402/#sec-defaultnumberoption
223
224     VM& vm = state.vm();
225     auto scope = DECLARE_THROW_SCOPE(vm);
226
227     if (!value.isUndefined()) {
228         double doubleValue = value.toNumber(&state);
229         RETURN_IF_EXCEPTION(scope, 0);
230
231         if (!(doubleValue >= minimum && doubleValue <= maximum)) {
232             throwException(&state, scope, createRangeError(&state, *property.publicName() + " is out of range"));
233             return 0;
234         }
235         return static_cast<unsigned>(doubleValue);
236     }
237     return fallback;
238 }
239
240 static String privateUseLangTag(const Vector<String>& parts, size_t startIndex)
241 {
242     size_t numParts = parts.size();
243     size_t currentIndex = startIndex;
244
245     // Check for privateuse.
246     // privateuse = "x" 1*("-" (1*8alphanum))
247     StringBuilder privateuse;
248     while (currentIndex < numParts) {
249         const String& singleton = parts[currentIndex];
250         unsigned singletonLength = singleton.length();
251         bool isValid = (singletonLength == 1 && (singleton == "x" || singleton == "X"));
252         if (!isValid)
253             break;
254
255         if (currentIndex != startIndex)
256             privateuse.append('-');
257
258         ++currentIndex;
259         unsigned numExtParts = 0;
260         privateuse.append('x');
261         while (currentIndex < numParts) {
262             const String& extPart = parts[currentIndex];
263             unsigned extPartLength = extPart.length();
264
265             bool isValid = (extPartLength >= 1 && extPartLength <= 8 && extPart.isAllSpecialCharacters<isASCIIAlphanumeric>());
266             if (!isValid)
267                 break;
268
269             ++currentIndex;
270             ++numExtParts;
271             privateuse.append('-');
272             privateuse.append(extPart.convertToASCIILowercase());
273         }
274
275         // Requires at least one production.
276         if (!numExtParts)
277             return String();
278     }
279
280     // Leftovers makes it invalid.
281     if (currentIndex < numParts)
282         return String();
283
284     return privateuse.toString();
285 }
286
287 static String preferredLanguage(const String& language)
288 {
289     auto preferred = intlPreferredLanguageTag(language);
290     if (!preferred.isNull())
291         return preferred;
292     return language;
293 }
294
295 static String preferredRegion(const String& region)
296 {
297     auto preferred = intlPreferredRegionTag(region);
298     if (!preferred.isNull())
299         return preferred;
300     return region;
301
302 }
303
304 static String canonicalLangTag(const Vector<String>& parts)
305 {
306     ASSERT(!parts.isEmpty());
307
308     // Follows the grammar at https://www.rfc-editor.org/rfc/bcp/bcp47.txt
309     // langtag = language ["-" script] ["-" region] *("-" variant) *("-" extension) ["-" privateuse]
310
311     size_t numParts = parts.size();
312     // Check for language.
313     // language = 2*3ALPHA ["-" extlang] / 4ALPHA / 5*8ALPHA
314     size_t currentIndex = 0;
315     const String& language = parts[currentIndex];
316     unsigned languageLength = language.length();
317     bool canHaveExtlang = languageLength >= 2 && languageLength <= 3;
318     bool isValidLanguage = languageLength >= 2 && languageLength <= 8 && language.isAllSpecialCharacters<isASCIIAlpha>();
319     if (!isValidLanguage)
320         return String();
321
322     ++currentIndex;
323     StringBuilder canonical;
324
325     const String langtag = preferredLanguage(language.convertToASCIILowercase());
326     canonical.append(langtag);
327
328     // Check for extlang.
329     // extlang = 3ALPHA *2("-" 3ALPHA)
330     if (canHaveExtlang) {
331         for (unsigned times = 0; times < 3 && currentIndex < numParts; ++times) {
332             const String& extlang = parts[currentIndex];
333             unsigned extlangLength = extlang.length();
334             if (extlangLength == 3 && extlang.isAllSpecialCharacters<isASCIIAlpha>()) {
335                 ++currentIndex;
336                 auto extlangLower = extlang.convertToASCIILowercase();
337                 if (!times && intlPreferredExtlangTag(extlangLower) == langtag) {
338                     canonical.clear();
339                     canonical.append(extlangLower);
340                     continue;
341                 }
342                 canonical.append('-');
343                 canonical.append(extlangLower);
344             } else
345                 break;
346         }
347     }
348
349     // Check for script.
350     // script = 4ALPHA
351     if (currentIndex < numParts) {
352         const String& script = parts[currentIndex];
353         unsigned scriptLength = script.length();
354         if (scriptLength == 4 && script.isAllSpecialCharacters<isASCIIAlpha>()) {
355             ++currentIndex;
356             canonical.append('-');
357             canonical.append(toASCIIUpper(script[0]));
358             canonical.append(script.substring(1, 3).convertToASCIILowercase());
359         }
360     }
361
362     // Check for region.
363     // region = 2ALPHA / 3DIGIT
364     if (currentIndex < numParts) {
365         const String& region = parts[currentIndex];
366         unsigned regionLength = region.length();
367         bool isValidRegion = (
368             (regionLength == 2 && region.isAllSpecialCharacters<isASCIIAlpha>())
369             || (regionLength == 3 && region.isAllSpecialCharacters<isASCIIDigit>())
370         );
371         if (isValidRegion) {
372             ++currentIndex;
373             canonical.append('-');
374             canonical.append(preferredRegion(region.convertToASCIIUppercase()));
375         }
376     }
377
378     // Check for variant.
379     // variant = 5*8alphanum / (DIGIT 3alphanum)
380     HashSet<String> subtags;
381     while (currentIndex < numParts) {
382         const String& variant = parts[currentIndex];
383         unsigned variantLength = variant.length();
384         bool isValidVariant = (
385             (variantLength >= 5 && variantLength <= 8 && variant.isAllSpecialCharacters<isASCIIAlphanumeric>())
386             || (variantLength == 4 && isASCIIDigit(variant[0]) && variant.substring(1, 3).isAllSpecialCharacters<isASCIIAlphanumeric>())
387         );
388         if (!isValidVariant)
389             break;
390
391         // Cannot include duplicate subtags (case insensitive).
392         String lowerVariant = variant.convertToASCIILowercase();
393         if (!subtags.add(lowerVariant).isNewEntry)
394             return String();
395
396         ++currentIndex;
397
398         // Reordering variant subtags is not required in the spec.
399         canonical.append('-');
400         canonical.append(lowerVariant);
401     }
402
403     // Check for extension.
404     // extension = singleton 1*("-" (2*8alphanum))
405     // singleton = alphanum except x or X
406     subtags.clear();
407     Vector<String> extensions;
408     while (currentIndex < numParts) {
409         const String& possibleSingleton = parts[currentIndex];
410         unsigned singletonLength = possibleSingleton.length();
411         bool isValidSingleton = (singletonLength == 1 && possibleSingleton != "x" && possibleSingleton != "X" && isASCIIAlphanumeric(possibleSingleton[0]));
412         if (!isValidSingleton)
413             break;
414
415         // Cannot include duplicate singleton (case insensitive).
416         String singleton = possibleSingleton.convertToASCIILowercase();
417         if (!subtags.add(singleton).isNewEntry)
418             return String();
419
420         ++currentIndex;
421         int numExtParts = 0;
422         StringBuilder extension;
423         extension.append(singleton);
424         while (currentIndex < numParts) {
425             const String& extPart = parts[currentIndex];
426             unsigned extPartLength = extPart.length();
427
428             bool isValid = (extPartLength >= 2 && extPartLength <= 8 && extPart.isAllSpecialCharacters<isASCIIAlphanumeric>());
429             if (!isValid)
430                 break;
431
432             ++currentIndex;
433             ++numExtParts;
434             extension.append('-');
435             extension.append(extPart.convertToASCIILowercase());
436         }
437
438         // Requires at least one production.
439         if (!numExtParts)
440             return String();
441
442         extensions.append(extension.toString());
443     }
444
445     // Add extensions to canonical sorted by singleton.
446     std::sort(
447         extensions.begin(),
448         extensions.end(),
449         [] (const String& a, const String& b) -> bool {
450             return a[0] < b[0];
451         }
452     );
453     size_t numExtenstions = extensions.size();
454     for (size_t i = 0; i < numExtenstions; ++i) {
455         canonical.append('-');
456         canonical.append(extensions[i]);
457     }
458
459     // Check for privateuse.
460     if (currentIndex < numParts) {
461         String privateuse = privateUseLangTag(parts, currentIndex);
462         if (privateuse.isNull())
463             return String();
464         canonical.append('-');
465         canonical.append(privateuse);
466     }
467
468     const String tag = canonical.toString();
469     const String preferred = intlRedundantLanguageTag(tag);
470     if (!preferred.isNull())
471         return preferred;
472     return tag;
473 }
474
475 static String canonicalizeLanguageTag(const String& locale)
476 {
477     // IsStructurallyValidLanguageTag (locale)
478     // CanonicalizeLanguageTag (locale)
479     // These are done one after another in CanonicalizeLocaleList, so they are combined here to reduce duplication.
480     // https://www.rfc-editor.org/rfc/bcp/bcp47.txt
481
482     // Language-Tag = langtag / privateuse / grandfathered
483     String grandfather = intlGrandfatheredLanguageTag(locale.convertToASCIILowercase());
484     if (!grandfather.isNull())
485         return grandfather;
486
487     Vector<String> parts = locale.splitAllowingEmptyEntries('-');
488     if (!parts.isEmpty()) {
489         String langtag = canonicalLangTag(parts);
490         if (!langtag.isNull())
491             return langtag;
492
493         String privateuse = privateUseLangTag(parts, 0);
494         if (!privateuse.isNull())
495             return privateuse;
496     }
497
498     return String();
499 }
500
501 Vector<String> canonicalizeLocaleList(ExecState& state, JSValue locales)
502 {
503     // CanonicalizeLocaleList (locales)
504     // https://tc39.github.io/ecma402/#sec-canonicalizelocalelist
505
506     VM& vm = state.vm();
507     auto scope = DECLARE_THROW_SCOPE(vm);
508
509     JSGlobalObject* globalObject = state.jsCallee()->globalObject(vm);
510     Vector<String> seen;
511
512     if (locales.isUndefined())
513         return seen;
514
515     JSObject* localesObject;
516     if (locales.isString()) {
517         JSArray* localesArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous));
518         if (!localesArray) {
519             throwOutOfMemoryError(&state, scope);
520             RETURN_IF_EXCEPTION(scope, Vector<String>());
521         }
522         localesArray->push(&state, locales);
523         RETURN_IF_EXCEPTION(scope, Vector<String>());
524
525         localesObject = localesArray;
526     } else {
527         localesObject = locales.toObject(&state);
528         RETURN_IF_EXCEPTION(scope, Vector<String>());
529     }
530
531     // 6. Let len be ToLength(Get(O, "length")).
532     JSValue lengthProperty = localesObject->get(&state, vm.propertyNames->length);
533     RETURN_IF_EXCEPTION(scope, Vector<String>());
534
535     double length = lengthProperty.toLength(&state);
536     RETURN_IF_EXCEPTION(scope, Vector<String>());
537
538     HashSet<String> seenSet;
539     for (double k = 0; k < length; ++k) {
540         bool kPresent = localesObject->hasProperty(&state, k);
541         RETURN_IF_EXCEPTION(scope, Vector<String>());
542
543         if (kPresent) {
544             JSValue kValue = localesObject->get(&state, k);
545             RETURN_IF_EXCEPTION(scope, Vector<String>());
546
547             if (!kValue.isString() && !kValue.isObject()) {
548                 throwTypeError(&state, scope, "locale value must be a string or object"_s);
549                 return Vector<String>();
550             }
551
552             JSString* tag = kValue.toString(&state);
553             RETURN_IF_EXCEPTION(scope, Vector<String>());
554
555             String canonicalizedTag = canonicalizeLanguageTag(tag->value(&state));
556             if (canonicalizedTag.isNull()) {
557                 throwException(&state, scope, createRangeError(&state, String::format("invalid language tag: %s", tag->value(&state).utf8().data())));
558                 return Vector<String>();
559             }
560
561             if (seenSet.add(canonicalizedTag).isNewEntry)
562                 seen.append(canonicalizedTag);
563         }
564     }
565
566     return seen;
567 }
568
569 String bestAvailableLocale(const HashSet<String>& availableLocales, const String& locale)
570 {
571     // BestAvailableLocale (availableLocales, locale)
572     // https://tc39.github.io/ecma402/#sec-bestavailablelocale
573
574     String candidate = locale;
575     while (!candidate.isEmpty()) {
576         if (availableLocales.contains(candidate))
577             return candidate;
578
579         size_t pos = candidate.reverseFind('-');
580         if (pos == notFound)
581             return String();
582
583         if (pos >= 2 && candidate[pos - 2] == '-')
584             pos -= 2;
585
586         candidate = candidate.substring(0, pos);
587     }
588
589     return String();
590 }
591
592 String defaultLocale(ExecState& state)
593 {
594     // DefaultLocale ()
595     // https://tc39.github.io/ecma402/#sec-defaultlocale
596
597     // WebCore's global objects will have their own ideas of how to determine the language. It may
598     // be determined by WebCore-specific logic like some WK settings. Usually this will return the
599     // same thing as userPreferredLanguages()[0].
600     VM& vm = state.vm();
601     if (auto defaultLanguage = state.jsCallee()->globalObject(vm)->globalObjectMethodTable()->defaultLanguage) {
602         String locale = canonicalizeLanguageTag(defaultLanguage());
603         if (!locale.isEmpty())
604             return locale;
605     }
606
607     Vector<String> languages = userPreferredLanguages();
608     for (const auto& language : languages) {
609         String locale = canonicalizeLanguageTag(language);
610         if (!locale.isEmpty())
611             return locale;
612     }
613
614     // If all else fails, ask ICU. It will probably say something bogus like en_us even if the user
615     // has configured some other language, but being wrong is better than crashing.
616     String locale = convertICULocaleToBCP47LanguageTag(uloc_getDefault());
617     if (!locale.isEmpty())
618         return locale;
619
620     return "en"_s;
621 }
622
623 String removeUnicodeLocaleExtension(const String& locale)
624 {
625     Vector<String> parts = locale.split('-');
626     StringBuilder builder;
627     size_t partsSize = parts.size();
628     bool atPrivate = false;
629     if (partsSize > 0)
630         builder.append(parts[0]);
631     for (size_t p = 1; p < partsSize; ++p) {
632         if (parts[p] == "x")
633             atPrivate = true;
634         if (!atPrivate && parts[p] == "u" && p + 1 < partsSize) {
635             // Skip the u- and anything that follows until another singleton.
636             // While the next part is part of the unicode extension, skip it.
637             while (p + 1 < partsSize && parts[p + 1].length() > 1)
638                 ++p;
639         } else {
640             builder.append('-');
641             builder.append(parts[p]);
642         }
643     }
644     return builder.toString();
645 }
646
647 static MatcherResult lookupMatcher(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
648 {
649     // LookupMatcher (availableLocales, requestedLocales)
650     // https://tc39.github.io/ecma402/#sec-lookupmatcher
651
652     String locale;
653     String noExtensionsLocale;
654     String availableLocale;
655     for (size_t i = 0; i < requestedLocales.size() && availableLocale.isNull(); ++i) {
656         locale = requestedLocales[i];
657         noExtensionsLocale = removeUnicodeLocaleExtension(locale);
658         availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale);
659     }
660
661     MatcherResult result;
662     if (!availableLocale.isEmpty()) {
663         result.locale = availableLocale;
664         if (locale != noExtensionsLocale) {
665             size_t extensionIndex = locale.find("-u-");
666             RELEASE_ASSERT(extensionIndex != notFound);
667
668             size_t extensionLength = locale.length() - extensionIndex;
669             size_t end = extensionIndex + 3;
670             while (end < locale.length()) {
671                 end = locale.find('-', end);
672                 if (end == notFound)
673                     break;
674                 if (end + 2 < locale.length() && locale[end + 2] == '-') {
675                     extensionLength = end - extensionIndex;
676                     break;
677                 }
678                 end++;
679             }
680             result.extension = locale.substring(extensionIndex, extensionLength);
681             result.extensionIndex = extensionIndex;
682         }
683     } else
684         result.locale = defaultLocale(state);
685     return result;
686 }
687
688 static MatcherResult bestFitMatcher(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
689 {
690     // BestFitMatcher (availableLocales, requestedLocales)
691     // https://tc39.github.io/ecma402/#sec-bestfitmatcher
692
693     // FIXME: Implement something better than lookup.
694     return lookupMatcher(state, availableLocales, requestedLocales);
695 }
696
697 static void unicodeExtensionSubTags(const String& extension, Vector<String>& subtags)
698 {
699     // UnicodeExtensionSubtags (extension)
700     // https://tc39.github.io/ecma402/#sec-unicodeextensionsubtags
701
702     auto extensionLength = extension.length();
703     if (extensionLength < 3)
704         return;
705
706     size_t subtagStart = 3; // Skip initial -u-.
707     size_t valueStart = 3;
708     bool isLeading = true;
709     for (size_t index = subtagStart; index < extensionLength; ++index) {
710         if (extension[index] == '-') {
711             if (index - subtagStart == 2) {
712                 // Tag is a key, first append prior key's value if there is one.
713                 if (subtagStart - valueStart > 1)
714                     subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1));
715                 subtags.append(extension.substring(subtagStart, index - subtagStart));
716                 valueStart = index + 1;
717                 isLeading = false;
718             } else if (isLeading) {
719                 // Leading subtags before first key.
720                 subtags.append(extension.substring(subtagStart, index - subtagStart));
721                 valueStart = index + 1;
722             }
723             subtagStart = index + 1;
724         }
725     }
726     if (extensionLength - subtagStart == 2) {
727         // Trailing an extension key, first append prior key's value if there is one.
728         if (subtagStart - valueStart > 1)
729             subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1));
730         valueStart = subtagStart;
731     }
732     // Append final key's value.
733     subtags.append(extension.substring(valueStart, extensionLength - valueStart));
734 }
735
736 HashMap<String, String> resolveLocale(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, const HashMap<String, String>& options, const char* const relevantExtensionKeys[], size_t relevantExtensionKeyCount, Vector<String> (*localeData)(const String&, size_t))
737 {
738     // ResolveLocale (availableLocales, requestedLocales, options, relevantExtensionKeys, localeData)
739     // https://tc39.github.io/ecma402/#sec-resolvelocale
740
741     const String& matcher = options.get("localeMatcher"_s);
742     MatcherResult matcherResult = (matcher == "lookup")
743         ? lookupMatcher(state, availableLocales, requestedLocales)
744         : bestFitMatcher(state, availableLocales, requestedLocales);
745
746     String foundLocale = matcherResult.locale;
747
748     Vector<String> extensionSubtags;
749     if (!matcherResult.extension.isNull())
750         unicodeExtensionSubTags(matcherResult.extension, extensionSubtags);
751
752     HashMap<String, String> result;
753     result.add("dataLocale"_s, foundLocale);
754
755     String supportedExtension = "-u"_s;
756     for (size_t keyIndex = 0; keyIndex < relevantExtensionKeyCount; ++keyIndex) {
757         const char* key = relevantExtensionKeys[keyIndex];
758         Vector<String> keyLocaleData = localeData(foundLocale, keyIndex);
759         ASSERT(!keyLocaleData.isEmpty());
760
761         String value = keyLocaleData[0];
762         String supportedExtensionAddition;
763
764         if (!extensionSubtags.isEmpty()) {
765             size_t keyPos = extensionSubtags.find(key);
766             if (keyPos != notFound) {
767                 if (keyPos + 1 < extensionSubtags.size() && extensionSubtags[keyPos + 1].length() > 2) {
768                     const String& requestedValue = extensionSubtags[keyPos + 1];
769                     if (keyLocaleData.contains(requestedValue)) {
770                         value = requestedValue;
771                         supportedExtensionAddition = makeString('-', key, '-', value);
772                     }
773                 } else if (keyLocaleData.contains(static_cast<String>("true"_s))) {
774                     value = "true"_s;
775                 }
776             }
777         }
778
779         HashMap<String, String>::const_iterator iterator = options.find(key);
780         if (iterator != options.end()) {
781             const String& optionsValue = iterator->value;
782             // Undefined should not get added to the options, it won't displace the extension.
783             // Null will remove the extension.
784             if ((optionsValue.isNull() || keyLocaleData.contains(optionsValue)) && optionsValue != value) {
785                 value = optionsValue;
786                 supportedExtensionAddition = String();
787             }
788         }
789         result.add(key, value);
790         supportedExtension.append(supportedExtensionAddition);
791     }
792
793     if (supportedExtension.length() > 2) {
794         String preExtension = foundLocale.substring(0, matcherResult.extensionIndex);
795         String postExtension = foundLocale.substring(matcherResult.extensionIndex);
796         foundLocale = preExtension + supportedExtension + postExtension;
797     }
798
799     result.add("locale"_s, foundLocale);
800     return result;
801 }
802
803 static JSArray* lookupSupportedLocales(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
804 {
805     // LookupSupportedLocales (availableLocales, requestedLocales)
806     // https://tc39.github.io/ecma402/#sec-lookupsupportedlocales
807
808     VM& vm = state.vm();
809     auto scope = DECLARE_THROW_SCOPE(vm);
810
811     size_t len = requestedLocales.size();
812     JSGlobalObject* globalObject = state.jsCallee()->globalObject(vm);
813     JSArray* subset = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithUndecided), 0);
814     if (!subset) {
815         throwOutOfMemoryError(&state, scope);
816         return nullptr;
817     }
818
819     unsigned index = 0;
820     for (size_t k = 0; k < len; ++k) {
821         const String& locale = requestedLocales[k];
822         String noExtensionsLocale = removeUnicodeLocaleExtension(locale);
823         String availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale);
824         if (!availableLocale.isNull()) {
825             subset->putDirectIndex(&state, index++, jsString(&state, locale));
826             RETURN_IF_EXCEPTION(scope, nullptr);
827         }
828     }
829
830     return subset;
831 }
832
833 static JSArray* bestFitSupportedLocales(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
834 {
835     // BestFitSupportedLocales (availableLocales, requestedLocales)
836     // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
837
838     // FIXME: Implement something better than lookup.
839     return lookupSupportedLocales(state, availableLocales, requestedLocales);
840 }
841
842 JSValue supportedLocales(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, JSValue options)
843 {
844     // SupportedLocales (availableLocales, requestedLocales, options)
845     // https://tc39.github.io/ecma402/#sec-supportedlocales
846
847     VM& vm = state.vm();
848     auto scope = DECLARE_THROW_SCOPE(vm);
849     String matcher;
850
851     if (!options.isUndefined()) {
852         matcher = intlStringOption(state, options, vm.propertyNames->localeMatcher, { "lookup", "best fit" }, "localeMatcher must be either \"lookup\" or \"best fit\"", "best fit");
853         RETURN_IF_EXCEPTION(scope, JSValue());
854     } else
855         matcher = "best fit"_s;
856
857     JSArray* supportedLocales = (matcher == "best fit")
858         ? bestFitSupportedLocales(state, availableLocales, requestedLocales)
859         : lookupSupportedLocales(state, availableLocales, requestedLocales);
860     RETURN_IF_EXCEPTION(scope, JSValue());
861
862     PropertyNameArray keys(&vm, PropertyNameMode::Strings, PrivateSymbolMode::Exclude);
863     supportedLocales->getOwnPropertyNames(supportedLocales, &state, keys, EnumerationMode());
864     RETURN_IF_EXCEPTION(scope, JSValue());
865
866     PropertyDescriptor desc;
867     desc.setConfigurable(false);
868     desc.setWritable(false);
869
870     size_t len = keys.size();
871     for (size_t i = 0; i < len; ++i) {
872         supportedLocales->defineOwnProperty(supportedLocales, &state, keys[i], desc, true);
873         RETURN_IF_EXCEPTION(scope, JSValue());
874     }
875     supportedLocales->defineOwnProperty(supportedLocales, &state, vm.propertyNames->length, desc, true);
876     RETURN_IF_EXCEPTION(scope, JSValue());
877
878     return supportedLocales;
879 }
880
881 Vector<String> numberingSystemsForLocale(const String& locale)
882 {
883     static NeverDestroyed<Vector<String>> cachedNumberingSystems;
884     Vector<String>& availableNumberingSystems = cachedNumberingSystems.get();
885
886     if (UNLIKELY(availableNumberingSystems.isEmpty())) {
887         static Lock cachedNumberingSystemsMutex;
888         std::lock_guard<Lock> lock(cachedNumberingSystemsMutex);
889         if (availableNumberingSystems.isEmpty()) {
890             UErrorCode status = U_ZERO_ERROR;
891             UEnumeration* numberingSystemNames = unumsys_openAvailableNames(&status);
892             ASSERT(U_SUCCESS(status));
893
894             int32_t resultLength;
895             // Numbering system names are always ASCII, so use char[].
896             while (const char* result = uenum_next(numberingSystemNames, &resultLength, &status)) {
897                 ASSERT(U_SUCCESS(status));
898                 auto numsys = unumsys_openByName(result, &status);
899                 ASSERT(U_SUCCESS(status));
900                 // Only support algorithmic if it is the default fot the locale, handled below.
901                 if (!unumsys_isAlgorithmic(numsys))
902                     availableNumberingSystems.append(String(result, resultLength));
903                 unumsys_close(numsys);
904             }
905             uenum_close(numberingSystemNames);
906         }
907     }
908
909     UErrorCode status = U_ZERO_ERROR;
910     UNumberingSystem* defaultSystem = unumsys_open(locale.utf8().data(), &status);
911     ASSERT(U_SUCCESS(status));
912     String defaultSystemName(unumsys_getName(defaultSystem));
913     unumsys_close(defaultSystem);
914
915     Vector<String> numberingSystems({ defaultSystemName });
916     numberingSystems.appendVector(availableNumberingSystems);
917     return numberingSystems;
918 }
919
920 EncodedJSValue JSC_HOST_CALL intlObjectFuncGetCanonicalLocales(ExecState* state)
921 {
922     // Intl.getCanonicalLocales(locales)
923     // https://tc39.github.io/ecma402/#sec-intl.getcanonicallocales
924
925     VM& vm = state->vm();
926     auto scope = DECLARE_THROW_SCOPE(vm);
927
928     Vector<String> localeList = canonicalizeLocaleList(*state, state->argument(0));
929     RETURN_IF_EXCEPTION(scope, encodedJSValue());
930     auto length = localeList.size();
931
932     JSGlobalObject* globalObject = state->jsCallee()->globalObject(vm);
933     JSArray* localeArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous), length);
934     if (!localeArray) {
935         throwOutOfMemoryError(state, scope);
936         return encodedJSValue();
937     }
938
939     for (size_t i = 0; i < length; ++i) {
940         localeArray->putDirectIndex(state, i, jsString(state, localeList[i]));
941         RETURN_IF_EXCEPTION(scope, encodedJSValue());
942     }
943     return JSValue::encode(localeArray);
944 }
945
946 } // namespace JSC
947
948 #endif // ENABLE(INTL)