Reviewed by John.
authordarin <darin@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 4 Mar 2005 17:54:28 +0000 (17:54 +0000)
committerdarin <darin@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 4 Mar 2005 17:54:28 +0000 (17:54 +0000)
        - fixed <rdar://problem/3965666> IDN spoofing vulnerability caused by Unicode characters that look like ASCII characters

        * Misc.subproj/WebNSURLExtras.m:
        (readIDNScriptWhiteListFile): Added. Reads file and parses script names.
        (readIDNScriptWhiteList): Added. Calls readIDNScriptWhiteList on each of the white list locations in succession.
        (allCharactersInIDNScriptWhiteList): Renamed from containsPossibleLatinLookalikes and changed sense.
        Now calls readIDNScriptWhiteList first time, and then uses the read-in list to check the scripts.
        (-[NSString _web_mapHostNameWithRange:encode:makeString:]): Call allCharactersInIDNScriptWhiteList instead of
        containsPossibleLatinLookalikes.
        * Resources/IDNScriptWhiteList.txt: Added.
        * WebKit.pbproj/project.pbxproj: Added IDNScriptWhiteList.txt file.

        * Misc.subproj/WebKitLocalizableStrings.m: Removed. This is simply unused.
        * English.lproj/StringsNotToBeLocalized.txt: Updated for recent changes.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@8776 268f45cc-cd09-0410-ab3c-d52691b4dbfc

WebKit/ChangeLog
WebKit/English.lproj/StringsNotToBeLocalized.txt
WebKit/Misc.subproj/WebKitLocalizableStrings.m [deleted file]
WebKit/Misc.subproj/WebNSURLExtras.m
WebKit/Resources/IDNScriptWhiteList.txt [new file with mode: 0644]
WebKit/WebKit.pbproj/project.pbxproj

index 93f41c7..45968ab 100644 (file)
@@ -2,6 +2,25 @@
 
         Reviewed by John.
 
+        - fixed <rdar://problem/3965666> IDN spoofing vulnerability caused by Unicode characters that look like ASCII characters
+
+        * Misc.subproj/WebNSURLExtras.m:
+        (readIDNScriptWhiteListFile): Added. Reads file and parses script names.
+        (readIDNScriptWhiteList): Added. Calls readIDNScriptWhiteList on each of the white list locations in succession.
+        (allCharactersInIDNScriptWhiteList): Renamed from containsPossibleLatinLookalikes and changed sense.
+        Now calls readIDNScriptWhiteList first time, and then uses the read-in list to check the scripts.
+        (-[NSString _web_mapHostNameWithRange:encode:makeString:]): Call allCharactersInIDNScriptWhiteList instead of
+        containsPossibleLatinLookalikes.
+        * Resources/IDNScriptWhiteList.txt: Added.
+        * WebKit.pbproj/project.pbxproj: Added IDNScriptWhiteList.txt file.
+
+        * Misc.subproj/WebKitLocalizableStrings.m: Removed. This is simply unused.
+        * English.lproj/StringsNotToBeLocalized.txt: Updated for recent changes.
+
+2005-03-04  Darin Adler  <darin@apple.com>
+
+        Reviewed by John.
+
         - fixed <rdar://problem/3937667> REGRESSION (Mail): Zooming a window from titlebar button doesn't paint newly-exposed portions of window
 
         * WebView.subproj/WebHTMLView.m: (-[WebHTMLView _recursiveDisplayAllDirtyWithLockFocus:visRect:]):
index 10a5fe5..4706da3 100644 (file)
@@ -2,6 +2,8 @@
 "   %s -> %6ld : %.0f : %.0f\n"
 "   %s : %6ld : %.0f : %.0f\n"
 " "
+" #%*[^\n\r]%*[\n\r]"
+" %32[^# \t\n\r]%*[^# \t\n\r] "
 " *POST*"
 " *target*"
 " >>>"
 "SpellingDot"
 "Times"
 "UTF-8"
-"Unable to cache glyph widths for %@ %f"
 "UseBackForwardList"
 "WebActionButtonKey"
 "WebActionElementKey"
 "mainFrameTitle"
 "mainFrameURL"
 "missing_image"
+"multipart/x-mixed-replace"
 "must-revalidate"
 "nullplugin"
 "pluginspage"
 "proxy-revalidate"
 "public.tiff"
+"r"
 "rgb(%.0f,%.0f,%.0f)"
 "rgba(%.0f,%.0f,%.0f,%f)"
 "s"
 "text/xml"
 "text/xsl"
 "tiff"
+"txt"
 "u"
 "unable to get glyphsfor %@ %f error = (%d)"
 "unexpected result from ATSUGetGlyphBounds():  actualNumBounds(%d) != 1"
@@ -406,6 +410,8 @@ History.subproj/WebHistoryItem.m:"children"
 History.subproj/WebHistoryItem.m:"title"
 Misc.subproj/WebIconDatabase.m:"????"
 Misc.subproj/WebKitLogging.m
+Misc.subproj/WebNSURLExtras.m:"IDNScriptWhiteList"
+Misc.subproj/WebNSURLExtras.m:"IDNScriptWhiteList.txt"
 Misc.subproj/WebNSURLExtras.m:"file"
 Plugins.subproj/WebBaseNetscapePluginView.m:"height"
 Plugins.subproj/WebBaseNetscapePluginView.m:"width"
@@ -430,6 +436,7 @@ WebCoreSupport.subproj/WebTextRenderer.m:"YES"
 WebCoreSupport.subproj/WebTextRenderer.m:"not known"
 WebCoreSupport.subproj/WebTextRendererFactory.m:"Arabic"
 WebCoreSupport.subproj/WebTextRendererFactory.m:"Geeza Pro"
+WebCoreSupport.subproj/WebTextRendererFactory.m:"Osaka-Mono"
 WebCoreSupport.subproj/WebTextRendererFactory.m:"Pashto"
 WebCoreSupport.subproj/WebTextRendererFactory.m:"Urdu"
 WebView.subproj/WebDataSource.m:"Refresh"
@@ -437,6 +444,7 @@ WebView.subproj/WebDefaultContextMenuDelegate.m:"Look Up in Dictionary"
 WebView.subproj/WebDefaultContextMenuDelegate.m:"Search in Google"
 WebView.subproj/WebDefaultContextMenuDelegate.m:"Search in Spotlight"
 WebView.subproj/WebHTMLView.m:"%0.fpx"
+WebView.subproj/WebHTMLView.m:"'%@'"
 WebView.subproj/WebHTMLView.m:"Search With Google"
 WebView.subproj/WebHTMLView.m:"Version:"
 WebView.subproj/WebHTMLView.m:"applet"
diff --git a/WebKit/Misc.subproj/WebKitLocalizableStrings.m b/WebKit/Misc.subproj/WebKitLocalizableStrings.m
deleted file mode 100644 (file)
index 4f41cf2..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-//
-//  WebKitLocalizableStrings.m
-//  WebKit
-//
-//  Created by Darin Adler on Fri Oct 04 2002.
-//  Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
-//
-
-#import <WebFoundation/WebLocalizableStrings.h>
-
-WebLocalizableStringsBundle WebKitLocalizableStringsBundle = { "com.apple.WebKit", 0 };
index c9b9a74..6f5da84 100644 (file)
@@ -15,6 +15,7 @@
 #import <Foundation/NSURLRequest.h>
 #import <Foundation/NSURL_NSURLExtras.h>
 
+#import <unicode/uchar.h>
 #import <unicode/uidna.h>
 #import <unicode/uscript.h>
 
@@ -26,6 +27,9 @@ typedef void (* StringRangeApplierFunction)(NSString *string, NSRange range, voi
 
 #define URL_BYTES_BUFFER_LENGTH 2048
 
+static pthread_once_t IDNScriptWhiteListFileRead = PTHREAD_ONCE_INIT;
+static uint32_t IDNScriptWhiteList[(USCRIPT_CODE_LIMIT + 31) / 32];
+
 static char hexDigit(int i)
 {
     if (i < 0 || i > 16) {
@@ -772,8 +776,72 @@ static NSString *mapHostNames(NSString *string, BOOL encode)
     return lastChar == '/' && [self _web_hasCaseInsensitivePrefix:@"ftp:"];
 }
 
-static bool containsPossibleLatinLookalikes(const UChar *buffer, int32_t length)
+
+static BOOL readIDNScriptWhiteListFile(NSString *filename)
 {
+    if (!filename) {
+        return NO;
+    }
+    FILE *file = fopen([filename fileSystemRepresentation], "r");
+    if (file == NULL) {
+        return NO;
+    }
+
+    // Read a word at a time.
+    // Allow comments, starting with # character to the end of the line.
+    while (1) {
+        // Skip a comment if present.
+        int result = fscanf(file, " #%*[^\n\r]%*[\n\r]");
+        if (result == EOF) {
+            break;
+        }
+
+        // Read a script name if present.
+        char word[33];
+        result = fscanf(file, " %32[^# \t\n\r]%*[^# \t\n\r] ", word);
+        if (result == EOF) {
+            break;
+        }
+        if (result == 1) {
+            // Got a word, map to script code and put it into the array.
+            int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, word);
+            if (script == UCHAR_INVALID_CODE) {
+                NSLog(@"%@: unknown script code: %s", filename, word);
+            } else if (script >= 0 && script < USCRIPT_CODE_LIMIT) {
+                size_t index = script / 32;
+                uint32_t mask = 1 << (script % 32);
+                if (IDNScriptWhiteList[index] & mask) {
+                    NSLog(@"%@: script code %s is listed twice\n", filename, word);
+                }
+                IDNScriptWhiteList[script] |= mask;
+            }
+        }
+    }
+    fclose(file);
+    return YES;
+}
+
+static void readIDNScriptWhiteList(void)
+{
+    // Read white list from library.
+    NSArray *dirs = NSSearchPathForDirectoriesInDomains(NSLibraryDirectory, NSAllDomainsMask, YES);
+    int i, numDirs = [dirs count];
+    for (i = 0; i < numDirs; i++) {
+       NSString *dir = [dirs objectAtIndex:i];
+       if (readIDNScriptWhiteListFile([dir stringByAppendingPathComponent:@"IDNScriptWhiteList.txt"])) {
+            return;
+        }
+    }
+
+    // Fall back on white list inside bundle.
+    NSBundle *bundle = [NSBundle bundleWithIdentifier:@"com.apple.WebKit"];
+    readIDNScriptWhiteListFile([bundle pathForResource:@"IDNScriptWhiteList" ofType:@"txt"]);
+}
+
+static BOOL allCharactersInIDNScriptWhiteList(const UChar *buffer, int32_t length)
+{
+    pthread_once(&IDNScriptWhiteListFileRead, readIDNScriptWhiteList);
+
     int32_t i = 0;
     while (i < length) {
         UChar32 c;
@@ -782,22 +850,22 @@ static bool containsPossibleLatinLookalikes(const UChar *buffer, int32_t length)
         UScriptCode script = uscript_getScript(c, &error);
         if (error != U_ZERO_ERROR) {
             ERROR("got ICU error while trying to look at scripts: %d", error);
-            return true;
+            return NO;
         }
-        // According to Deborah Goldsmith and the Unicode Technical committee, these are the
-        // three scripts that contain characters that look like Latin characters. So as a
-        // matter of policy, we don't display host names containing characters from these
-        // scripts in a "nice" way, to protect the user from misleading host names.
-        switch (script) {
-            case USCRIPT_CHEROKEE:
-            case USCRIPT_CYRILLIC:
-            case USCRIPT_GREEK:
-                return true;
-            default:
-                break;
+        if (script < 0) {
+            ERROR("got negative number for script code from ICU: %d", script);
+            return NO;
+        }
+        if (script >= USCRIPT_CODE_LIMIT) {
+            return NO;
+        }
+        size_t index = script / 32;
+        uint32_t mask = 1 << (script % 32);
+        if (!(IDNScriptWhiteList[index] & mask)) {
+            return NO;
         }
     }
-    return false;
+    return YES;
 }
 
 // Return value of nil means no mapping is necessary.
@@ -836,7 +904,7 @@ static bool containsPossibleLatinLookalikes(const UChar *buffer, int32_t length)
     if (numCharactersConverted == length && memcmp(sourceBuffer, destinationBuffer, length * sizeof(UChar)) == 0) {
         return nil;
     }
-    if (!encode && containsPossibleLatinLookalikes(destinationBuffer, numCharactersConverted)) {
+    if (!encode && !allCharactersInIDNScriptWhiteList(destinationBuffer, numCharactersConverted)) {
         return nil;
     }
     return makeString ? [NSString stringWithCharacters:destinationBuffer length:numCharactersConverted] : self;
diff --git a/WebKit/Resources/IDNScriptWhiteList.txt b/WebKit/Resources/IDNScriptWhiteList.txt
new file mode 100644 (file)
index 0000000..d635a34
--- /dev/null
@@ -0,0 +1,23 @@
+# Default Web Kit International Domain Name Script White List.
+
+Common
+Inherited
+
+Arabic
+Armenian
+Bopomofo
+Canadian_Aboriginal
+Devanagari
+Deseret
+Gujarati
+Gurmukhi
+Hangul
+Han
+Hebrew
+Hiragana
+Katakana_Or_Hiragana
+Katakana
+Latin
+Tamil
+Thai
+Yi
index ef8314a..0842e0d 100644 (file)
                                F5B67131023EDF8901C1A525,
                                5128F1FA04719A4200CA2D3A,
                                8476113406EF82DB00AB2914,
+                               9325FBDD07D829AE00159862,
                        );
                        isa = PBXResourcesBuildPhase;
                        runOnlyForDeploymentPostprocessing = 0;
                                9345D4E70365C58D008635CE,
                                8476113206EF82DB00AB2914,
                                5128F1F904719A4200CA2D3A,
+                               9325FBDC07D829AE00159862,
                        );
                        isa = PBXGroup;
                        name = Resources;
                        refType = 4;
                        sourceTree = "<group>";
                };
+               9325FBDC07D829AE00159862 = {
+                       fileEncoding = 4;
+                       isa = PBXFileReference;
+                       lastKnownFileType = text;
+                       name = IDNScriptWhiteList.txt;
+                       path = Resources/IDNScriptWhiteList.txt;
+                       refType = 4;
+                       sourceTree = "<group>";
+               };
+               9325FBDD07D829AE00159862 = {
+                       fileRef = 9325FBDC07D829AE00159862;
+                       isa = PBXBuildFile;
+                       settings = {
+                       };
+               };
                933D659903413FF2008635CE = {
                        fileEncoding = 4;
                        isa = PBXFileReference;