JSC: Intl API should ignore encoding when parsing BCP 47 language tag from ISO 15897...
authorcommit-queue@webkit.org <commit-queue@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 26 Jul 2018 16:01:04 +0000 (16:01 +0000)
committercommit-queue@webkit.org <commit-queue@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 26 Jul 2018 16:01:04 +0000 (16:01 +0000)
https://bugs.webkit.org/show_bug.cgi?id=167991

Patch by Andy VanWagoner <andy@vanwagoner.family> on 2018-07-26
Reviewed by Michael Catanzaro.

Source/JavaScriptCore:

Improved the conversion of ICU locales to BCP47 tags, using their preferred method.
Checked locale.isEmpty() before returning it from defaultLocale, so there should be
no more cases where you might have an invalid locale come back from resolveLocale.

* runtime/IntlObject.cpp:
(JSC::convertICULocaleToBCP47LanguageTag):
(JSC::defaultLocale):
(JSC::lookupMatcher):
* runtime/IntlObject.h:
* runtime/JSGlobalObject.cpp:
(JSC::JSGlobalObject::intlCollatorAvailableLocales):
(JSC::JSGlobalObject::intlDateTimeFormatAvailableLocales):
(JSC::JSGlobalObject::intlNumberFormatAvailableLocales):
(JSC::JSGlobalObject::intlPluralRulesAvailableLocales):

LayoutTests:

Replaced expecting throwing a runtime error to avoid a crash, with testing for good default locale fallback behavior.

* js/intl-default-locale-expected.txt: Added.
* js/intl-default-locale.html: Added.
* js/intl-invalid-locale-crash-expected.txt: Removed.
* js/intl-invalid-locale-crash.html: Removed.
* platform/win/TestExpectations:

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@234260 268f45cc-cd09-0410-ab3c-d52691b4dbfc

LayoutTests/ChangeLog
LayoutTests/js/intl-default-locale-expected.txt [new file with mode: 0644]
LayoutTests/js/intl-default-locale.html [new file with mode: 0644]
LayoutTests/js/intl-invalid-locale-crash-expected.txt [deleted file]
LayoutTests/js/intl-invalid-locale-crash.html [deleted file]
LayoutTests/platform/win/TestExpectations
Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/runtime/IntlObject.cpp
Source/JavaScriptCore/runtime/IntlObject.h
Source/JavaScriptCore/runtime/JSGlobalObject.cpp

index 119f646..c8c37c8 100644 (file)
@@ -1,3 +1,18 @@
+2018-07-26  Andy VanWagoner  <andy@vanwagoner.family>
+
+        JSC: Intl API should ignore encoding when parsing BCP 47 language tag from ISO 15897 locale string (passed via LANG)
+        https://bugs.webkit.org/show_bug.cgi?id=167991
+
+        Reviewed by Michael Catanzaro.
+
+        Replaced expecting throwing a runtime error to avoid a crash, with testing for good default locale fallback behavior.
+
+        * js/intl-default-locale-expected.txt: Added.
+        * js/intl-default-locale.html: Added.
+        * js/intl-invalid-locale-crash-expected.txt: Removed.
+        * js/intl-invalid-locale-crash.html: Removed.
+        * platform/win/TestExpectations:
+
 2018-07-26  Miguel Gomez  <magomez@igalia.com>
 
         Unreviewed GTK+ and WPE gardening after r234252.
diff --git a/LayoutTests/js/intl-default-locale-expected.txt b/LayoutTests/js/intl-default-locale-expected.txt
new file mode 100644 (file)
index 0000000..233f26e
--- /dev/null
@@ -0,0 +1,7 @@
+PASS new Intl.DateTimeFormat().resolvedOptions().locale is 'tlh'
+PASS new Intl.NumberFormat().resolvedOptions().locale is 'tlh'
+PASS new Intl.Collator().resolvedOptions().locale is 'tlh'
+PASS successfullyParsed is true
+
+TEST COMPLETE
+
diff --git a/LayoutTests/js/intl-default-locale.html b/LayoutTests/js/intl-default-locale.html
new file mode 100644 (file)
index 0000000..b105958
--- /dev/null
@@ -0,0 +1,21 @@
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
+<html>
+<head>
+<meta charset="utf-8">
+<script src="../resources/js-test-pre.js"></script>
+</head>
+<body>
+<script>
+if (window.internals) {
+    // Any language name with less than two characters is considered invalid, so we use "a" here.
+    // "i-klingon" is grandfathered, and is canonicalized "tlh".
+    // It should not be part of any available locale sets, so we know it came from here.
+    window.internals.setUserPreferredLanguages([ "a", "*", "en_US.utf8", "i-klingon", "en-US" ]);
+}
+shouldBe("new Intl.DateTimeFormat().resolvedOptions().locale", "'tlh'");
+shouldBe("new Intl.NumberFormat().resolvedOptions().locale", "'tlh'");
+shouldBe("new Intl.Collator().resolvedOptions().locale", "'tlh'");
+</script>
+<script src="../resources/js-test-post.js"></script>
+</body>
+</html>
diff --git a/LayoutTests/js/intl-invalid-locale-crash-expected.txt b/LayoutTests/js/intl-invalid-locale-crash-expected.txt
deleted file mode 100644 (file)
index 2011d66..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-PASS new Intl.DateTimeFormat().resolvedOptions() threw exception TypeError: failed to initialize DateTimeFormat due to invalid locale.
-PASS new Intl.NumberFormat().resolvedOptions() threw exception TypeError: failed to initialize NumberFormat due to invalid locale.
-PASS new Intl.Collator().resolvedOptions() threw exception TypeError: failed to initialize Collator due to invalid locale.
-PASS successfullyParsed is true
-
-TEST COMPLETE
-
diff --git a/LayoutTests/js/intl-invalid-locale-crash.html b/LayoutTests/js/intl-invalid-locale-crash.html
deleted file mode 100644 (file)
index a113d37..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
-<html>
-<head>
-<meta charset="utf-8">
-<script src="../resources/js-test-pre.js"></script>
-</head>
-<body>
-<script>
-if (window.internals) {
-    // Any language name with less than two characters is considered invalid, so we use "a" here.
-    window.internals.setUserPreferredLanguages(["a"]);
-}
-shouldThrow("new Intl.DateTimeFormat().resolvedOptions()", "'TypeError: failed to initialize DateTimeFormat due to invalid locale'");
-shouldThrow("new Intl.NumberFormat().resolvedOptions()", "'TypeError: failed to initialize NumberFormat due to invalid locale'");
-shouldThrow("new Intl.Collator().resolvedOptions()", "'TypeError: failed to initialize Collator due to invalid locale'");
-</script>
-<script src="../resources/js-test-post.js"></script>
-</body>
-</html>
index 5bd3528..0ed6605 100644 (file)
@@ -2481,6 +2481,7 @@ js/array-toLocaleString.html [ Skip ]
 js/date-toLocaleString.html [ Skip ]
 js/intl-collator.html [ Skip ]
 js/intl-datetimeformat.html [ Skip ]
+js/intl-default-locale.html [ Skip ]
 js/intl-numberformat.html [ Skip ]
 js/intl-numberformat-format-to-parts.html [ Skip ]
 js/intl-pluralrules.html [ Skip ]
@@ -3347,7 +3348,6 @@ js/dom/deep-recursion-test.html [ Failure ]
 js/dom/regress-157246.html [ Failure ]
 js/dom/string-prototype-properties.html [ Failure ]
 js/dom/webidl-type-mapping.html [ Failure ]
-js/intl-invalid-locale-crash.html [ Failure ]
 js/regress-141098.html [ Failure ]
 pageoverlay/overlay-installation.html [ Failure ]
 pageoverlay/overlay-large-document-scrolled.html [ Failure ]
index 4a269a0..90dfc7e 100644 (file)
@@ -1,3 +1,25 @@
+2018-07-26  Andy VanWagoner  <andy@vanwagoner.family>
+
+        JSC: Intl API should ignore encoding when parsing BCP 47 language tag from ISO 15897 locale string (passed via LANG)
+        https://bugs.webkit.org/show_bug.cgi?id=167991
+
+        Reviewed by Michael Catanzaro.
+
+        Improved the conversion of ICU locales to BCP47 tags, using their preferred method.
+        Checked locale.isEmpty() before returning it from defaultLocale, so there should be
+        no more cases where you might have an invalid locale come back from resolveLocale.
+
+        * runtime/IntlObject.cpp:
+        (JSC::convertICULocaleToBCP47LanguageTag):
+        (JSC::defaultLocale):
+        (JSC::lookupMatcher):
+        * runtime/IntlObject.h:
+        * runtime/JSGlobalObject.cpp:
+        (JSC::JSGlobalObject::intlCollatorAvailableLocales):
+        (JSC::JSGlobalObject::intlDateTimeFormatAvailableLocales):
+        (JSC::JSGlobalObject::intlNumberFormatAvailableLocales):
+        (JSC::JSGlobalObject::intlPluralRulesAvailableLocales):
+
 2018-07-26  Fujii Hironori  <Hironori.Fujii@sony.com>
 
         REGRESSION(r234248) [Win] testapi.c: nonstandard extension used: non-constant aggregate initializer
index 170f8dd..b2027e4 100644 (file)
@@ -130,9 +130,19 @@ Structure* IntlObject::createStructure(VM& vm, JSGlobalObject* globalObject, JSV
     return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info());
 }
 
-void convertICULocaleToBCP47LanguageTag(String& locale)
+String convertICULocaleToBCP47LanguageTag(const char* localeID)
 {
-    locale.replace('_', '-');
+    UErrorCode status = U_ZERO_ERROR;
+    Vector<char, 32> buffer(32);
+    auto length = uloc_toLanguageTag(localeID, buffer.data(), buffer.size(), false, &status);
+    if (status == U_BUFFER_OVERFLOW_ERROR) {
+        buffer.grow(length);
+        status = U_ZERO_ERROR;
+        uloc_toLanguageTag(localeID, buffer.data(), buffer.size(), false, &status);
+    }
+    if (!U_FAILURE(status))
+        return String(buffer.data(), length);
+    return String();
 }
 
 bool intlBooleanOption(ExecState& state, JSValue options, PropertyName property, bool& usesFallback)
@@ -590,20 +600,25 @@ String defaultLocale(ExecState& state)
     // same thing as userPreferredLanguages()[0].
     VM& vm = state.vm();
     if (auto defaultLanguage = state.jsCallee()->globalObject(vm)->globalObjectMethodTable()->defaultLanguage) {
-        String locale = defaultLanguage();
+        String locale = canonicalizeLanguageTag(defaultLanguage());
         if (!locale.isEmpty())
-            return canonicalizeLanguageTag(locale);
+            return locale;
     }
 
     Vector<String> languages = userPreferredLanguages();
-    if (!languages.isEmpty() && !languages[0].isEmpty())
-        return canonicalizeLanguageTag(languages[0]);
+    for (const auto& language : languages) {
+        String locale = canonicalizeLanguageTag(language);
+        if (!locale.isEmpty())
+            return locale;
+    }
 
     // If all else fails, ask ICU. It will probably say something bogus like en_us even if the user
     // has configured some other language, but being wrong is better than crashing.
-    String locale = uloc_getDefault();
-    convertICULocaleToBCP47LanguageTag(locale);
-    return locale;
+    String locale = convertICULocaleToBCP47LanguageTag(uloc_getDefault());
+    if (!locale.isEmpty())
+        return locale;
+
+    return "en"_s;
 }
 
 String removeUnicodeLocaleExtension(const String& locale)
@@ -646,7 +661,7 @@ static MatcherResult lookupMatcher(ExecState& state, const HashSet<String>& avai
     }
 
     MatcherResult result;
-    if (!availableLocale.isNull()) {
+    if (!availableLocale.isEmpty()) {
         result.locale = availableLocale;
         if (locale != noExtensionsLocale) {
             size_t extensionIndex = locale.find("-u-");
index 7e53864..b9d870e 100644 (file)
@@ -59,7 +59,7 @@ private:
 };
 
 String defaultLocale(ExecState&);
-void convertICULocaleToBCP47LanguageTag(String& locale);
+String convertICULocaleToBCP47LanguageTag(const char* localeID);
 bool intlBooleanOption(ExecState&, JSValue options, PropertyName, bool& usesFallback);
 String intlStringOption(ExecState&, JSValue options, PropertyName, std::initializer_list<const char*> values, const char* notFound, const char* fallback);
 unsigned intlNumberOption(ExecState&, JSValue options, PropertyName, unsigned minimum, unsigned maximum, unsigned fallback);
index aee3d2c..047cedf 100644 (file)
@@ -1582,9 +1582,9 @@ const HashSet<String>& JSGlobalObject::intlCollatorAvailableLocales()
     if (m_intlCollatorAvailableLocales.isEmpty()) {
         int32_t count = ucol_countAvailable();
         for (int32_t i = 0; i < count; ++i) {
-            String locale(ucol_getAvailable(i));
-            convertICULocaleToBCP47LanguageTag(locale);
-            m_intlCollatorAvailableLocales.add(locale);
+            String locale = convertICULocaleToBCP47LanguageTag(ucol_getAvailable(i));
+            if (!locale.isEmpty())
+                m_intlCollatorAvailableLocales.add(locale);
         }
         addMissingScriptLocales(m_intlCollatorAvailableLocales);
     }
@@ -1596,9 +1596,9 @@ const HashSet<String>& JSGlobalObject::intlDateTimeFormatAvailableLocales()
     if (m_intlDateTimeFormatAvailableLocales.isEmpty()) {
         int32_t count = udat_countAvailable();
         for (int32_t i = 0; i < count; ++i) {
-            String locale(udat_getAvailable(i));
-            convertICULocaleToBCP47LanguageTag(locale);
-            m_intlDateTimeFormatAvailableLocales.add(locale);
+            String locale = convertICULocaleToBCP47LanguageTag(udat_getAvailable(i));
+            if (!locale.isEmpty())
+                m_intlDateTimeFormatAvailableLocales.add(locale);
         }
         addMissingScriptLocales(m_intlDateTimeFormatAvailableLocales);
     }
@@ -1610,9 +1610,9 @@ const HashSet<String>& JSGlobalObject::intlNumberFormatAvailableLocales()
     if (m_intlNumberFormatAvailableLocales.isEmpty()) {
         int32_t count = unum_countAvailable();
         for (int32_t i = 0; i < count; ++i) {
-            String locale(unum_getAvailable(i));
-            convertICULocaleToBCP47LanguageTag(locale);
-            m_intlNumberFormatAvailableLocales.add(locale);
+            String locale = convertICULocaleToBCP47LanguageTag(unum_getAvailable(i));
+            if (!locale.isEmpty())
+                m_intlNumberFormatAvailableLocales.add(locale);
         }
         addMissingScriptLocales(m_intlNumberFormatAvailableLocales);
     }
@@ -1624,9 +1624,9 @@ const HashSet<String>& JSGlobalObject::intlPluralRulesAvailableLocales()
     if (m_intlPluralRulesAvailableLocales.isEmpty()) {
         int32_t count = uloc_countAvailable();
         for (int32_t i = 0; i < count; ++i) {
-            String locale(uloc_getAvailable(i));
-            convertICULocaleToBCP47LanguageTag(locale);
-            m_intlPluralRulesAvailableLocales.add(locale);
+            String locale = convertICULocaleToBCP47LanguageTag(uloc_getAvailable(i));
+            if (!locale.isEmpty())
+                m_intlPluralRulesAvailableLocales.add(locale);
         }
         addMissingScriptLocales(m_intlPluralRulesAvailableLocales);
     }