83059a105e29e7df8e3350e2ffdf2aa11d7f22d7
[WebKit-https.git] / Source / JavaScriptCore / runtime / IntlObject.cpp
1 /*
2  * Copyright (C) 2015 Andy VanWagoner (thetalecrafter@gmail.com)
3  * Copyright (C) 2015 Sukolsak Sakshuwong (sukolsak@gmail.com)
4  * Copyright (C) 2016 Apple Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
19  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
25  * THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 #include "config.h"
29 #include "IntlObject.h"
30
31 #if ENABLE(INTL)
32
33 #include "Error.h"
34 #include "FunctionPrototype.h"
35 #include "IntlCollator.h"
36 #include "IntlCollatorConstructor.h"
37 #include "IntlCollatorPrototype.h"
38 #include "IntlDateTimeFormat.h"
39 #include "IntlDateTimeFormatConstructor.h"
40 #include "IntlDateTimeFormatPrototype.h"
41 #include "IntlNumberFormat.h"
42 #include "IntlNumberFormatConstructor.h"
43 #include "IntlNumberFormatPrototype.h"
44 #include "JSCInlines.h"
45 #include "JSCJSValueInlines.h"
46 #include "Lookup.h"
47 #include "ObjectPrototype.h"
48 #include <unicode/uloc.h>
49 #include <unicode/unumsys.h>
50 #include <wtf/Assertions.h>
51 #include <wtf/NeverDestroyed.h>
52 #include <wtf/PlatformUserPreferredLanguages.h>
53 #include <wtf/text/StringBuilder.h>
54
55 namespace JSC {
56
57 STATIC_ASSERT_IS_TRIVIALLY_DESTRUCTIBLE(IntlObject);
58
59 static EncodedJSValue JSC_HOST_CALL intlObjectFuncGetCanonicalLocales(ExecState*);
60
61 }
62
63 namespace JSC {
64
65 struct MatcherResult {
66     String locale;
67     String extension;
68     size_t extensionIndex;
69 };
70
71 const ClassInfo IntlObject::s_info = { "Object", &Base::s_info, 0, CREATE_METHOD_TABLE(IntlObject) };
72
73 IntlObject::IntlObject(VM& vm, Structure* structure)
74     : JSNonFinalObject(vm, structure)
75 {
76 }
77
78 IntlObject* IntlObject::create(VM& vm, JSGlobalObject* globalObject, Structure* structure)
79 {
80     IntlObject* object = new (NotNull, allocateCell<IntlObject>(vm.heap)) IntlObject(vm, structure);
81     object->finishCreation(vm, globalObject);
82     return object;
83 }
84
85 void IntlObject::finishCreation(VM& vm, JSGlobalObject* globalObject)
86 {
87     Base::finishCreation(vm);
88     ASSERT(inherits(vm, info()));
89
90     // Set up Collator.
91     IntlCollatorPrototype* collatorPrototype = IntlCollatorPrototype::create(vm, globalObject, IntlCollatorPrototype::createStructure(vm, globalObject, globalObject->objectPrototype()));
92     Structure* collatorStructure = IntlCollator::createStructure(vm, globalObject, collatorPrototype);
93     IntlCollatorConstructor* collatorConstructor = IntlCollatorConstructor::create(vm, IntlCollatorConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), collatorPrototype, collatorStructure);
94
95     // Set up NumberFormat.
96     IntlNumberFormatPrototype* numberFormatPrototype = IntlNumberFormatPrototype::create(vm, globalObject, IntlNumberFormatPrototype::createStructure(vm, globalObject, globalObject->objectPrototype()));
97     Structure* numberFormatStructure = IntlNumberFormat::createStructure(vm, globalObject, numberFormatPrototype);
98     IntlNumberFormatConstructor* numberFormatConstructor = IntlNumberFormatConstructor::create(vm, IntlNumberFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), numberFormatPrototype, numberFormatStructure);
99
100     // Set up DateTimeFormat.
101     IntlDateTimeFormatPrototype* dateTimeFormatPrototype = IntlDateTimeFormatPrototype::create(vm, globalObject, IntlDateTimeFormatPrototype::createStructure(vm, globalObject, globalObject->objectPrototype()));
102     Structure* dateTimeFormatStructure = IntlDateTimeFormat::createStructure(vm, globalObject, dateTimeFormatPrototype);
103     IntlDateTimeFormatConstructor* dateTimeFormatConstructor = IntlDateTimeFormatConstructor::create(vm, IntlDateTimeFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), dateTimeFormatPrototype, dateTimeFormatStructure);
104
105     // Constructor Properties of the Intl Object
106     // https://tc39.github.io/ecma402/#sec-constructor-properties-of-the-intl-object
107     putDirectWithoutTransition(vm, vm.propertyNames->Collator, collatorConstructor, DontEnum);
108     putDirectWithoutTransition(vm, vm.propertyNames->NumberFormat, numberFormatConstructor, DontEnum);
109     putDirectWithoutTransition(vm, vm.propertyNames->DateTimeFormat, dateTimeFormatConstructor, DontEnum);
110
111     // Function Properties of the Intl Object
112     // https://tc39.github.io/ecma402/#sec-function-properties-of-the-intl-object
113     putDirectNativeFunction(vm, globalObject, Identifier::fromString(&vm, "getCanonicalLocales"), 1, intlObjectFuncGetCanonicalLocales, NoIntrinsic, DontEnum);
114 }
115
116 Structure* IntlObject::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype)
117 {
118     return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info());
119 }
120
121 void convertICULocaleToBCP47LanguageTag(String& locale)
122 {
123     locale.replace('_', '-');
124 }
125
126 bool intlBooleanOption(ExecState& state, JSValue options, PropertyName property, bool& usesFallback)
127 {
128     // GetOption (options, property, type="boolean", values, fallback)
129     // https://tc39.github.io/ecma402/#sec-getoption
130
131     VM& vm = state.vm();
132     auto scope = DECLARE_THROW_SCOPE(vm);
133
134     JSObject* opts = options.toObject(&state);
135     RETURN_IF_EXCEPTION(scope, false);
136
137     JSValue value = opts->get(&state, property);
138     RETURN_IF_EXCEPTION(scope, false);
139
140     if (!value.isUndefined()) {
141         bool booleanValue = value.toBoolean(&state);
142         usesFallback = false;
143         return booleanValue;
144     }
145
146     // Because fallback can be undefined, we let the caller handle it instead.
147     usesFallback = true;
148     return false;
149 }
150
151 String intlStringOption(ExecState& state, JSValue options, PropertyName property, std::initializer_list<const char*> values, const char* notFound, const char* fallback)
152 {
153     // GetOption (options, property, type="string", values, fallback)
154     // https://tc39.github.io/ecma402/#sec-getoption
155
156     VM& vm = state.vm();
157     auto scope = DECLARE_THROW_SCOPE(vm);
158
159     JSObject* opts = options.toObject(&state);
160     RETURN_IF_EXCEPTION(scope, String());
161
162     JSValue value = opts->get(&state, property);
163     RETURN_IF_EXCEPTION(scope, String());
164
165     if (!value.isUndefined()) {
166         String stringValue = value.toWTFString(&state);
167         RETURN_IF_EXCEPTION(scope, String());
168
169         if (values.size() && std::find(values.begin(), values.end(), stringValue) == values.end()) {
170             throwException(&state, scope, createRangeError(&state, notFound));
171             return { };
172         }
173         return stringValue;
174     }
175
176     return fallback;
177 }
178
179 unsigned intlNumberOption(ExecState& state, JSValue options, PropertyName property, unsigned minimum, unsigned maximum, unsigned fallback)
180 {
181     // GetNumberOption (options, property, minimum, maximum, fallback)
182     // https://tc39.github.io/ecma402/#sec-getnumberoption
183
184     VM& vm = state.vm();
185     auto scope = DECLARE_THROW_SCOPE(vm);
186
187     JSObject* opts = options.toObject(&state);
188     RETURN_IF_EXCEPTION(scope, 0);
189
190     JSValue value = opts->get(&state, property);
191     RETURN_IF_EXCEPTION(scope, 0);
192
193     if (!value.isUndefined()) {
194         double doubleValue = value.toNumber(&state);
195         RETURN_IF_EXCEPTION(scope, 0);
196
197         if (!(doubleValue >= minimum && doubleValue <= maximum)) {
198             throwException(&state, scope, createRangeError(&state, *property.publicName() + " is out of range"));
199             return 0;
200         }
201         return static_cast<unsigned>(doubleValue);
202     }
203     return fallback;
204 }
205
206 static String privateUseLangTag(const Vector<String>& parts, size_t startIndex)
207 {
208     size_t numParts = parts.size();
209     size_t currentIndex = startIndex;
210
211     // Check for privateuse.
212     // privateuse = "x" 1*("-" (1*8alphanum))
213     StringBuilder privateuse;
214     while (currentIndex < numParts) {
215         const String& singleton = parts[currentIndex];
216         unsigned singletonLength = singleton.length();
217         bool isValid = (singletonLength == 1 && (singleton == "x" || singleton == "X"));
218         if (!isValid)
219             break;
220
221         if (currentIndex != startIndex)
222             privateuse.append('-');
223
224         ++currentIndex;
225         unsigned numExtParts = 0;
226         privateuse.append('x');
227         while (currentIndex < numParts) {
228             const String& extPart = parts[currentIndex];
229             unsigned extPartLength = extPart.length();
230
231             bool isValid = (extPartLength >= 1 && extPartLength <= 8 && extPart.isAllSpecialCharacters<isASCIIAlphanumeric>());
232             if (!isValid)
233                 break;
234
235             ++currentIndex;
236             ++numExtParts;
237             privateuse.append('-');
238             privateuse.append(extPart.convertToASCIILowercase());
239         }
240
241         // Requires at least one production.
242         if (!numExtParts)
243             return String();
244     }
245
246     // Leftovers makes it invalid.
247     if (currentIndex < numParts)
248         return String();
249
250     return privateuse.toString();
251 }
252
253 static String canonicalLangTag(const Vector<String>& parts)
254 {
255     ASSERT(!parts.isEmpty());
256
257     // Follows the grammar at https://www.rfc-editor.org/rfc/bcp/bcp47.txt
258     // langtag = language ["-" script] ["-" region] *("-" variant) *("-" extension) ["-" privateuse]
259
260     size_t numParts = parts.size();
261     // Check for language.
262     // language = 2*3ALPHA ["-" extlang] / 4ALPHA / 5*8ALPHA
263     size_t currentIndex = 0;
264     const String& language = parts[currentIndex];
265     unsigned languageLength = language.length();
266     bool canHaveExtlang = languageLength >= 2 && languageLength <= 3;
267     bool isValidLanguage = languageLength >= 2 && languageLength <= 8 && language.isAllSpecialCharacters<isASCIIAlpha>();
268     if (!isValidLanguage)
269         return String();
270
271     ++currentIndex;
272     StringBuilder canonical;
273     canonical.append(language.convertToASCIILowercase());
274
275     // Check for extlang.
276     // extlang = 3ALPHA *2("-" 3ALPHA)
277     if (canHaveExtlang) {
278         for (unsigned times = 0; times < 3 && currentIndex < numParts; ++times) {
279             const String& extlang = parts[currentIndex];
280             unsigned extlangLength = extlang.length();
281             if (extlangLength == 3 && extlang.isAllSpecialCharacters<isASCIIAlpha>()) {
282                 ++currentIndex;
283                 canonical.append('-');
284                 canonical.append(extlang.convertToASCIILowercase());
285             } else
286                 break;
287         }
288     }
289
290     // Check for script.
291     // script = 4ALPHA
292     if (currentIndex < numParts) {
293         const String& script = parts[currentIndex];
294         unsigned scriptLength = script.length();
295         if (scriptLength == 4 && script.isAllSpecialCharacters<isASCIIAlpha>()) {
296             ++currentIndex;
297             canonical.append('-');
298             canonical.append(toASCIIUpper(script[0]));
299             canonical.append(script.substring(1, 3).convertToASCIILowercase());
300         }
301     }
302
303     // Check for region.
304     // region = 2ALPHA / 3DIGIT
305     if (currentIndex < numParts) {
306         const String& region = parts[currentIndex];
307         unsigned regionLength = region.length();
308         bool isValidRegion = (
309             (regionLength == 2 && region.isAllSpecialCharacters<isASCIIAlpha>())
310             || (regionLength == 3 && region.isAllSpecialCharacters<isASCIIDigit>())
311         );
312         if (isValidRegion) {
313             ++currentIndex;
314             canonical.append('-');
315             canonical.append(region.convertToASCIIUppercase());
316         }
317     }
318
319     // Check for variant.
320     // variant = 5*8alphanum / (DIGIT 3alphanum)
321     HashSet<String> subtags;
322     while (currentIndex < numParts) {
323         const String& variant = parts[currentIndex];
324         unsigned variantLength = variant.length();
325         bool isValidVariant = (
326             (variantLength >= 5 && variantLength <= 8 && variant.isAllSpecialCharacters<isASCIIAlphanumeric>())
327             || (variantLength == 4 && isASCIIDigit(variant[0]) && variant.substring(1, 3).isAllSpecialCharacters<isASCIIAlphanumeric>())
328         );
329         if (!isValidVariant)
330             break;
331
332         // Cannot include duplicate subtags (case insensitive).
333         String lowerVariant = variant.convertToASCIILowercase();
334         if (!subtags.add(lowerVariant).isNewEntry)
335             return String();
336
337         ++currentIndex;
338
339         // Reordering variant subtags is not required in the spec.
340         canonical.append('-');
341         canonical.append(lowerVariant);
342     }
343
344     // Check for extension.
345     // extension = singleton 1*("-" (2*8alphanum))
346     // singleton = alphanum except x or X
347     subtags.clear();
348     Vector<String> extensions;
349     while (currentIndex < numParts) {
350         const String& possibleSingleton = parts[currentIndex];
351         unsigned singletonLength = possibleSingleton.length();
352         bool isValidSingleton = (singletonLength == 1 && possibleSingleton != "x" && possibleSingleton != "X" && isASCIIAlphanumeric(possibleSingleton[0]));
353         if (!isValidSingleton)
354             break;
355
356         // Cannot include duplicate singleton (case insensitive).
357         String singleton = possibleSingleton.convertToASCIILowercase();
358         if (!subtags.add(singleton).isNewEntry)
359             return String();
360
361         ++currentIndex;
362         int numExtParts = 0;
363         StringBuilder extension;
364         extension.append(singleton);
365         while (currentIndex < numParts) {
366             const String& extPart = parts[currentIndex];
367             unsigned extPartLength = extPart.length();
368
369             bool isValid = (extPartLength >= 2 && extPartLength <= 8 && extPart.isAllSpecialCharacters<isASCIIAlphanumeric>());
370             if (!isValid)
371                 break;
372
373             ++currentIndex;
374             ++numExtParts;
375             extension.append('-');
376             extension.append(extPart.convertToASCIILowercase());
377         }
378
379         // Requires at least one production.
380         if (!numExtParts)
381             return String();
382
383         extensions.append(extension.toString());
384     }
385
386     // Add extensions to canonical sorted by singleton.
387     std::sort(
388         extensions.begin(),
389         extensions.end(),
390         [] (const String& a, const String& b) -> bool {
391             return a[0] < b[0];
392         }
393     );
394     size_t numExtenstions = extensions.size();
395     for (size_t i = 0; i < numExtenstions; ++i) {
396         canonical.append('-');
397         canonical.append(extensions[i]);
398     }
399
400     // Check for privateuse.
401     if (currentIndex < numParts) {
402         String privateuse = privateUseLangTag(parts, currentIndex);
403         if (privateuse.isNull())
404             return String();
405         canonical.append('-');
406         canonical.append(privateuse);
407     }
408
409     // FIXME: Replace subtags with their preferred values.
410
411     return canonical.toString();
412 }
413
414 static String grandfatheredLangTag(const String& locale)
415 {
416     // grandfathered = irregular / regular
417     // FIXME: convert to a compile time hash table if this is causing performance issues.
418     HashMap<String, String> tagMap = {
419         // Irregular.
420         { ASCIILiteral("en-gb-oed"), ASCIILiteral("en-GB-oed") },
421         { ASCIILiteral("i-ami"), ASCIILiteral("ami") },
422         { ASCIILiteral("i-bnn"), ASCIILiteral("bnn") },
423         { ASCIILiteral("i-default"), ASCIILiteral("i-default") },
424         { ASCIILiteral("i-enochian"), ASCIILiteral("i-enochian") },
425         { ASCIILiteral("i-hak"), ASCIILiteral("hak") },
426         { ASCIILiteral("i-klingon"), ASCIILiteral("tlh") },
427         { ASCIILiteral("i-lux"), ASCIILiteral("lb") },
428         { ASCIILiteral("i-mingo"), ASCIILiteral("i-mingo") },
429         { ASCIILiteral("i-navajo"), ASCIILiteral("nv") },
430         { ASCIILiteral("i-pwn"), ASCIILiteral("pwn") },
431         { ASCIILiteral("i-tao"), ASCIILiteral("tao") },
432         { ASCIILiteral("i-tay"), ASCIILiteral("tay") },
433         { ASCIILiteral("i-tsu"), ASCIILiteral("tsu") },
434         { ASCIILiteral("sgn-be-fr"), ASCIILiteral("sfb") },
435         { ASCIILiteral("sgn-be-nl"), ASCIILiteral("vgt") },
436         { ASCIILiteral("sgn-ch-de"), ASCIILiteral("sgg") },
437         // Regular.
438         { ASCIILiteral("art-lojban"), ASCIILiteral("jbo") },
439         { ASCIILiteral("cel-gaulish"), ASCIILiteral("cel-gaulish") },
440         { ASCIILiteral("no-bok"), ASCIILiteral("nb") },
441         { ASCIILiteral("no-nyn"), ASCIILiteral("nn") },
442         { ASCIILiteral("zh-guoyu"), ASCIILiteral("cmn") },
443         { ASCIILiteral("zh-hakka"), ASCIILiteral("hak") },
444         { ASCIILiteral("zh-min"), ASCIILiteral("zh-min") },
445         { ASCIILiteral("zh-min-nan"), ASCIILiteral("nan") },
446         { ASCIILiteral("zh-xiang"), ASCIILiteral("hsn") }
447     };
448
449     return tagMap.get(locale.convertToASCIILowercase());
450 }
451
452 static String canonicalizeLanguageTag(const String& locale)
453 {
454     // IsStructurallyValidLanguageTag (locale)
455     // CanonicalizeLanguageTag (locale)
456     // These are done one after another in CanonicalizeLocaleList, so they are combined here to reduce duplication.
457     // https://www.rfc-editor.org/rfc/bcp/bcp47.txt
458
459     // Language-Tag = langtag / privateuse / grandfathered
460     String grandfather = grandfatheredLangTag(locale);
461     if (!grandfather.isNull())
462         return grandfather;
463
464     // FIXME: Replace redundant tags [RFC4647].
465
466     Vector<String> parts;
467     locale.split('-', true, parts);
468     if (!parts.isEmpty()) {
469         String langtag = canonicalLangTag(parts);
470         if (!langtag.isNull())
471             return langtag;
472
473         String privateuse = privateUseLangTag(parts, 0);
474         if (!privateuse.isNull())
475             return privateuse;
476     }
477
478     return String();
479 }
480
481 Vector<String> canonicalizeLocaleList(ExecState& state, JSValue locales)
482 {
483     // CanonicalizeLocaleList (locales)
484     // https://tc39.github.io/ecma402/#sec-canonicalizelocalelist
485
486     VM& vm = state.vm();
487     auto scope = DECLARE_THROW_SCOPE(vm);
488
489     JSGlobalObject* globalObject = state.jsCallee()->globalObject();
490     Vector<String> seen;
491
492     if (locales.isUndefined())
493         return seen;
494
495     JSObject* localesObject;
496     if (locales.isString()) {
497         JSArray* localesArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous));
498         if (!localesArray) {
499             throwOutOfMemoryError(&state, scope);
500             RETURN_IF_EXCEPTION(scope, Vector<String>());
501         }
502         localesArray->push(&state, locales);
503         RETURN_IF_EXCEPTION(scope, Vector<String>());
504
505         localesObject = localesArray;
506     } else {
507         localesObject = locales.toObject(&state);
508         RETURN_IF_EXCEPTION(scope, Vector<String>());
509     }
510
511     // 6. Let len be ToLength(Get(O, "length")).
512     JSValue lengthProperty = localesObject->get(&state, vm.propertyNames->length);
513     RETURN_IF_EXCEPTION(scope, Vector<String>());
514
515     double length = lengthProperty.toLength(&state);
516     RETURN_IF_EXCEPTION(scope, Vector<String>());
517
518     HashSet<String> seenSet;
519     for (double k = 0; k < length; ++k) {
520         bool kPresent = localesObject->hasProperty(&state, k);
521         RETURN_IF_EXCEPTION(scope, Vector<String>());
522
523         if (kPresent) {
524             JSValue kValue = localesObject->get(&state, k);
525             RETURN_IF_EXCEPTION(scope, Vector<String>());
526
527             if (!kValue.isString() && !kValue.isObject()) {
528                 throwTypeError(&state, scope, ASCIILiteral("locale value must be a string or object"));
529                 return Vector<String>();
530             }
531
532             JSString* tag = kValue.toString(&state);
533             RETURN_IF_EXCEPTION(scope, Vector<String>());
534
535             String canonicalizedTag = canonicalizeLanguageTag(tag->value(&state));
536             if (canonicalizedTag.isNull()) {
537                 throwException(&state, scope, createRangeError(&state, String::format("invalid language tag: %s", tag->value(&state).utf8().data())));
538                 return Vector<String>();
539             }
540
541             if (seenSet.add(canonicalizedTag).isNewEntry)
542                 seen.append(canonicalizedTag);
543         }
544     }
545
546     return seen;
547 }
548
549 String bestAvailableLocale(const HashSet<String>& availableLocales, const String& locale)
550 {
551     // BestAvailableLocale (availableLocales, locale)
552     // https://tc39.github.io/ecma402/#sec-bestavailablelocale
553
554     String candidate = locale;
555     while (!candidate.isEmpty()) {
556         if (availableLocales.contains(candidate))
557             return candidate;
558
559         size_t pos = candidate.reverseFind('-');
560         if (pos == notFound)
561             return String();
562
563         if (pos >= 2 && candidate[pos - 2] == '-')
564             pos -= 2;
565
566         candidate = candidate.substring(0, pos);
567     }
568
569     return String();
570 }
571
572 String defaultLocale(ExecState& state)
573 {
574     // DefaultLocale ()
575     // https://tc39.github.io/ecma402/#sec-defaultlocale
576     
577     // WebCore's global objects will have their own ideas of how to determine the language. It may
578     // be determined by WebCore-specific logic like some WK settings. Usually this will return the
579     // same thing as platformUserPreferredLanguages()[0].
580     if (auto defaultLanguage = state.jsCallee()->globalObject()->globalObjectMethodTable()->defaultLanguage) {
581         String locale = defaultLanguage();
582         if (!locale.isEmpty())
583             return canonicalizeLanguageTag(locale);
584     }
585     
586     // If WebCore isn't around to tell us how to get the language then fall back to our own way of
587     // doing it, which mostly follows what WebCore would have done.
588     Vector<String> languages = platformUserPreferredLanguages();
589     if (!languages.isEmpty() && !languages[0].isEmpty())
590         return canonicalizeLanguageTag(languages[0]);
591     
592     // If all else fails, ask ICU. It will probably say something bogus like en_us even if the user
593     // has configured some other language, but being wrong is better than crashing.
594     String locale = uloc_getDefault();
595     convertICULocaleToBCP47LanguageTag(locale);
596     return locale;
597 }
598
599 String removeUnicodeLocaleExtension(const String& locale)
600 {
601     Vector<String> parts;
602     locale.split('-', parts);
603     StringBuilder builder;
604     size_t partsSize = parts.size();
605     if (partsSize > 0)
606         builder.append(parts[0]);
607     for (size_t p = 1; p < partsSize; ++p) {
608         if (parts[p] == "u") {
609             // Skip the u- and anything that follows until another singleton.
610             // While the next part is part of the unicode extension, skip it.
611             while (p + 1 < partsSize && parts[p + 1].length() > 1)
612                 ++p;
613         } else {
614             builder.append('-');
615             builder.append(parts[p]);
616         }
617     }
618     return builder.toString();
619 }
620
621 static MatcherResult lookupMatcher(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
622 {
623     // LookupMatcher (availableLocales, requestedLocales)
624     // https://tc39.github.io/ecma402/#sec-lookupmatcher
625
626     String locale;
627     String noExtensionsLocale;
628     String availableLocale;
629     for (size_t i = 0; i < requestedLocales.size() && availableLocale.isNull(); ++i) {
630         locale = requestedLocales[i];
631         noExtensionsLocale = removeUnicodeLocaleExtension(locale);
632         availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale);
633     }
634
635     MatcherResult result;
636     if (!availableLocale.isNull()) {
637         result.locale = availableLocale;
638         if (locale != noExtensionsLocale) {
639             size_t extensionIndex = locale.find("-u-");
640             RELEASE_ASSERT(extensionIndex != notFound);
641
642             size_t extensionLength = locale.length() - extensionIndex;
643             size_t end = extensionIndex + 3;
644             while (end < locale.length()) {
645                 end = locale.find('-', end);
646                 if (end == notFound)
647                     break;
648                 if (end + 2 < locale.length() && locale[end + 2] == '-') {
649                     extensionLength = end - extensionIndex;
650                     break;
651                 }
652                 end++;
653             }
654             result.extension = locale.substring(extensionIndex, extensionLength);
655             result.extensionIndex = extensionIndex;
656         }
657     } else
658         result.locale = defaultLocale(state);
659     return result;
660 }
661
662 static MatcherResult bestFitMatcher(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
663 {
664     // BestFitMatcher (availableLocales, requestedLocales)
665     // https://tc39.github.io/ecma402/#sec-bestfitmatcher
666
667     // FIXME: Implement something better than lookup.
668     return lookupMatcher(state, availableLocales, requestedLocales);
669 }
670
671 static void unicodeExtensionSubTags(const String& extension, Vector<String>& subtags)
672 {
673     // UnicodeExtensionSubtags (extension)
674     // https://tc39.github.io/ecma402/#sec-unicodeextensionsubtags
675
676     auto extensionLength = extension.length();
677     if (extensionLength < 3)
678         return;
679
680     size_t subtagStart = 3; // Skip initial -u-.
681     size_t valueStart = 3;
682     bool isLeading = true;
683     for (size_t index = subtagStart; index < extensionLength; ++index) {
684         if (extension[index] == '-') {
685             if (index - subtagStart == 2) {
686                 // Tag is a key, first append prior key's value if there is one.
687                 if (subtagStart - valueStart > 1)
688                     subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1));
689                 subtags.append(extension.substring(subtagStart, index - subtagStart));
690                 valueStart = index + 1;
691                 isLeading = false;
692             } else if (isLeading) {
693                 // Leading subtags before first key.
694                 subtags.append(extension.substring(subtagStart, index - subtagStart));
695                 valueStart = index + 1;
696             }
697             subtagStart = index + 1;
698         }
699     }
700     if (extensionLength - subtagStart == 2) {
701         // Trailing an extension key, first append prior key's value if there is one.
702         if (subtagStart - valueStart > 1)
703             subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1));
704         valueStart = subtagStart;
705     }
706     // Append final key's value.
707     subtags.append(extension.substring(valueStart, extensionLength - valueStart));
708 }
709
710 HashMap<String, String> resolveLocale(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, const HashMap<String, String>& options, const char* const relevantExtensionKeys[], size_t relevantExtensionKeyCount, Vector<String> (*localeData)(const String&, size_t))
711 {
712     // ResolveLocale (availableLocales, requestedLocales, options, relevantExtensionKeys, localeData)
713     // https://tc39.github.io/ecma402/#sec-resolvelocale
714
715     const String& matcher = options.get(ASCIILiteral("localeMatcher"));
716     MatcherResult matcherResult = (matcher == "lookup")
717         ? lookupMatcher(state, availableLocales, requestedLocales)
718         : bestFitMatcher(state, availableLocales, requestedLocales);
719
720     String foundLocale = matcherResult.locale;
721
722     Vector<String> extensionSubtags;
723     if (!matcherResult.extension.isNull())
724         unicodeExtensionSubTags(matcherResult.extension, extensionSubtags);
725
726     HashMap<String, String> result;
727     result.add(ASCIILiteral("dataLocale"), foundLocale);
728
729     String supportedExtension = ASCIILiteral("-u");
730     for (size_t keyIndex = 0; keyIndex < relevantExtensionKeyCount; ++keyIndex) {
731         const char* key = relevantExtensionKeys[keyIndex];
732         Vector<String> keyLocaleData = localeData(foundLocale, keyIndex);
733         ASSERT(!keyLocaleData.isEmpty());
734
735         String value = keyLocaleData[0];
736         String supportedExtensionAddition;
737
738         if (!extensionSubtags.isEmpty()) {
739             size_t keyPos = extensionSubtags.find(key);
740             if (keyPos != notFound) {
741                 if (keyPos + 1 < extensionSubtags.size() && extensionSubtags[keyPos + 1].length() > 2) {
742                     const String& requestedValue = extensionSubtags[keyPos + 1];
743                     if (keyLocaleData.contains(requestedValue)) {
744                         value = requestedValue;
745                         supportedExtensionAddition = makeString('-', key, '-', value);
746                     }
747                 } else if (keyLocaleData.contains(static_cast<String>(ASCIILiteral("true")))) {
748                     value = ASCIILiteral("true");
749                 }
750             }
751         }
752
753         HashMap<String, String>::const_iterator iterator = options.find(key);
754         if (iterator != options.end()) {
755             const String& optionsValue = iterator->value;
756             if (!optionsValue.isNull() && keyLocaleData.contains(optionsValue)) {
757                 if (optionsValue != value) {
758                     value = optionsValue;
759                     supportedExtensionAddition = String();
760                 }
761             }
762         }
763         result.add(key, value);
764         supportedExtension.append(supportedExtensionAddition);
765     }
766
767     if (supportedExtension.length() > 2) {
768         String preExtension = foundLocale.substring(0, matcherResult.extensionIndex);
769         String postExtension = foundLocale.substring(matcherResult.extensionIndex);
770         foundLocale = preExtension + supportedExtension + postExtension;
771     }
772
773     result.add(ASCIILiteral("locale"), foundLocale);
774     return result;
775 }
776
777 static JSArray* lookupSupportedLocales(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
778 {
779     // LookupSupportedLocales (availableLocales, requestedLocales)
780     // https://tc39.github.io/ecma402/#sec-lookupsupportedlocales
781
782     VM& vm = state.vm();
783     auto scope = DECLARE_THROW_SCOPE(vm);
784
785     size_t len = requestedLocales.size();
786     JSGlobalObject* globalObject = state.jsCallee()->globalObject();
787     JSArray* subset = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithUndecided), 0);
788     if (!subset) {
789         throwOutOfMemoryError(&state, scope);
790         return nullptr;
791     }
792
793     for (size_t k = 0; k < len; ++k) {
794         const String& locale = requestedLocales[k];
795         String noExtensionsLocale = removeUnicodeLocaleExtension(locale);
796         String availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale);
797         if (!availableLocale.isNull()) {
798             subset->push(&state, jsString(&state, locale));
799             RETURN_IF_EXCEPTION(scope, nullptr);
800         }
801     }
802
803     return subset;
804 }
805
806 static JSArray* bestFitSupportedLocales(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
807 {
808     // BestFitSupportedLocales (availableLocales, requestedLocales)
809     // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
810
811     // FIXME: Implement something better than lookup.
812     return lookupSupportedLocales(state, availableLocales, requestedLocales);
813 }
814
815 JSValue supportedLocales(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, JSValue options)
816 {
817     // SupportedLocales (availableLocales, requestedLocales, options)
818     // https://tc39.github.io/ecma402/#sec-supportedlocales
819
820     VM& vm = state.vm();
821     auto scope = DECLARE_THROW_SCOPE(vm);
822     String matcher;
823
824     if (!options.isUndefined()) {
825         matcher = intlStringOption(state, options, vm.propertyNames->localeMatcher, { "lookup", "best fit" }, "localeMatcher must be either \"lookup\" or \"best fit\"", "best fit");
826         RETURN_IF_EXCEPTION(scope, JSValue());
827     } else
828         matcher = ASCIILiteral("best fit");
829
830     JSArray* supportedLocales = (matcher == "best fit")
831         ? bestFitSupportedLocales(state, availableLocales, requestedLocales)
832         : lookupSupportedLocales(state, availableLocales, requestedLocales);
833     RETURN_IF_EXCEPTION(scope, JSValue());
834
835     PropertyNameArray keys(&state, PropertyNameMode::Strings);
836     supportedLocales->getOwnPropertyNames(supportedLocales, &state, keys, EnumerationMode());
837     RETURN_IF_EXCEPTION(scope, JSValue());
838
839     PropertyDescriptor desc;
840     desc.setConfigurable(false);
841     desc.setWritable(false);
842
843     size_t len = keys.size();
844     for (size_t i = 0; i < len; ++i) {
845         supportedLocales->defineOwnProperty(supportedLocales, &state, keys[i], desc, true);
846         RETURN_IF_EXCEPTION(scope, JSValue());
847     }
848
849     return supportedLocales;
850 }
851
852 Vector<String> numberingSystemsForLocale(const String& locale)
853 {
854     static NeverDestroyed<Vector<String>> cachedNumberingSystems;
855     Vector<String>& availableNumberingSystems = cachedNumberingSystems.get();
856     if (availableNumberingSystems.isEmpty()) {
857         UErrorCode status = U_ZERO_ERROR;
858         UEnumeration* numberingSystemNames = unumsys_openAvailableNames(&status);
859         ASSERT(U_SUCCESS(status));
860
861         int32_t resultLength;
862         // Numbering system names are always ASCII, so use char[].
863         while (const char* result = uenum_next(numberingSystemNames, &resultLength, &status)) {
864             ASSERT(U_SUCCESS(status));
865             availableNumberingSystems.append(String(result, resultLength));
866         }
867         uenum_close(numberingSystemNames);
868     }
869
870     UErrorCode status = U_ZERO_ERROR;
871     UNumberingSystem* defaultSystem = unumsys_open(locale.utf8().data(), &status);
872     ASSERT(U_SUCCESS(status));
873     String defaultSystemName(unumsys_getName(defaultSystem));
874     unumsys_close(defaultSystem);
875
876     Vector<String> numberingSystems({ defaultSystemName });
877     numberingSystems.appendVector(availableNumberingSystems);
878     return numberingSystems;
879 }
880
881 EncodedJSValue JSC_HOST_CALL intlObjectFuncGetCanonicalLocales(ExecState* state)
882 {
883     // Intl.getCanonicalLocales(locales)
884     // https://tc39.github.io/ecma402/#sec-intl.getcanonicallocales
885
886     VM& vm = state->vm();
887     auto scope = DECLARE_THROW_SCOPE(vm);
888
889     Vector<String> localeList = canonicalizeLocaleList(*state, state->argument(0));
890     RETURN_IF_EXCEPTION(scope, encodedJSValue());
891
892     JSGlobalObject* globalObject = state->jsCallee()->globalObject();
893     JSArray* localeArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous));
894     if (!localeArray) {
895         throwOutOfMemoryError(state, scope);
896         return encodedJSValue();
897     }
898
899     auto length = localeList.size();
900     for (size_t i = 0; i < length; ++i) {
901         localeArray->push(state, jsString(state, localeList[i]));
902         RETURN_IF_EXCEPTION(scope, encodedJSValue());
903     }
904     return JSValue::encode(localeArray);
905 }
906
907 } // namespace JSC
908
909 #endif // ENABLE(INTL)