Reviewed by John, except for one bit reviewed by Maciej.
authordarin <darin@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Wed, 5 Nov 2003 01:39:58 +0000 (01:39 +0000)
committerdarin <darin@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Wed, 5 Nov 2003 01:39:58 +0000 (01:39 +0000)
        - first step for IDNA support; helper functions for Safari

        * Misc.subproj/WebNSURLExtras.h: Add six new methods to manipulate host names directly.
        * Misc.subproj/WebNSURLExtras.m:
        (applyHostNameFunctionToMailToURLString): Added. Finds host names within a mailto URL.
        (applyHostNameFunctionToURLString): Added. Finds host names within a URL.
        (collectRangesThatNeedMapping): Added. Builds a list of host name ranges that need mapping.
        (collectRangesThatNeedEncoding): Added. Calls the above for encoding.
        (collectRangesThatNeedDecoding): Added. Calls the above for decoding.
        (mapHostNames): Added. Helper function that does the entire mapping process for a URL.
        (+[NSURL _web_URLWithUserTypedString:]): Call mapHostNames to encode after trimming whitespace.
        (-[NSURL _web_userVisibleString]): Call mapHostNames to decode after decoding escape sequences.
        (-[NSURL _webkit_URLByRemovingFragment]): Removed unneeded redundant NULL check.
        (-[NSString _web_mapHostNameWithRange:encode:makeString:]): Added. Workhorse function to call
        the IDN functions in the Unicode library.
        (-[NSString _web_hostNameNeedsDecodingWithRange:]): Added.
        (-[NSString _web_hostNameNeedsEncodingWithRange:]): Added.
        (-[NSString _web_decodeHostNameWithRange:]): Added.
        (-[NSString _web_encodeHostNameWithRange:]): Added.
        (-[NSString _web_decodeHostName]): Added.
        (-[NSString _web_encodeHostName]): Added.

        * WebKit.pbproj/project.pbxproj: Added libicucore.dylib.
        * English.lproj/StringsNotToBeLocalized.txt: Updated for above changes.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@5386 268f45cc-cd09-0410-ab3c-d52691b4dbfc

WebKit/ChangeLog
WebKit/English.lproj/StringsNotToBeLocalized.txt
WebKit/Misc.subproj/WebNSURLExtras.h
WebKit/Misc.subproj/WebNSURLExtras.m
WebKit/WebKit.pbproj/project.pbxproj

index abc1cf4650ab08e6887e215040ee2192c0885fe1..8c10dc5e7a1638d40a5d91d40a825f99f73f52ee 100644 (file)
@@ -1,3 +1,32 @@
+2003-11-04  Darin Adler  <darin@apple.com>
+
+        Reviewed by John, except for one bit reviewed by Maciej.
+
+        - first step for IDNA support; helper functions for Safari
+
+        * Misc.subproj/WebNSURLExtras.h: Add six new methods to manipulate host names directly.
+        * Misc.subproj/WebNSURLExtras.m:
+        (applyHostNameFunctionToMailToURLString): Added. Finds host names within a mailto URL.
+        (applyHostNameFunctionToURLString): Added. Finds host names within a URL.
+        (collectRangesThatNeedMapping): Added. Builds a list of host name ranges that need mapping.
+        (collectRangesThatNeedEncoding): Added. Calls the above for encoding.
+        (collectRangesThatNeedDecoding): Added. Calls the above for decoding.
+        (mapHostNames): Added. Helper function that does the entire mapping process for a URL.
+        (+[NSURL _web_URLWithUserTypedString:]): Call mapHostNames to encode after trimming whitespace.
+        (-[NSURL _web_userVisibleString]): Call mapHostNames to decode after decoding escape sequences.
+        (-[NSURL _webkit_URLByRemovingFragment]): Removed unneeded redundant NULL check.
+        (-[NSString _web_mapHostNameWithRange:encode:makeString:]): Added. Workhorse function to call
+        the IDN functions in the Unicode library.
+        (-[NSString _web_hostNameNeedsDecodingWithRange:]): Added.
+        (-[NSString _web_hostNameNeedsEncodingWithRange:]): Added.
+        (-[NSString _web_decodeHostNameWithRange:]): Added.
+        (-[NSString _web_encodeHostNameWithRange:]): Added.
+        (-[NSString _web_decodeHostName]): Added.
+        (-[NSString _web_encodeHostName]): Added.
+
+        * WebKit.pbproj/project.pbxproj: Added libicucore.dylib.
+        * English.lproj/StringsNotToBeLocalized.txt: Updated for above changes.
+
 2003-11-04  John Sullivan  <sullivan@apple.com>
 
         - a little optimization I noticed when looking at 3125137
index f3efaab9396ed44c6b469f9dfa08a76dfbfeb40b..e71b542462e286e88671b09955b0ca183e5e8471 100644 (file)
@@ -35,6 +35,8 @@
 "4194304"
 "7"
 "9"
+"://"
+":/?"
 ";"
 "<!--framePath "
 "="
@@ -44,6 +46,9 @@
 "=================\nERROR: "
 "=================\nFATAL ERROR: "
 "=================\nSHOULD NEVER BE REACHED (%s:%d %s)\n=================\n"
+">,"
+"@"
+"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-."
 "ATSUCreateStyle failed (%d)"
 "ATSUCreateTextLayoutWithTextPtr failed(%d)"
 "ATSUGetGlyphBounds() failed(%d)"
 "WebSiteURLToIconURLKey"
 "WebURLsWithTitlesPboardType"
 "\""
+"\"@?"
+"\"\\"
 "\0"
 "\n        "
 "\n    "
 "javascript:"
 "lastVisitedDate"
 "localized string not found"
+"mailto:"
 "nullplugin"
 "pluginspage"
 "text/"
 "visitCount"
 "x"
 "x-apple-web-kit/"
+"xn--"
 "~/Library/Icons"
 History.subproj/WebHistoryItem.m:" in \"%@\""
 History.subproj/WebHistoryItem.m:"children"
index a2a94828fc272113dcdd281a5de48e44251f5e6b..033cc1f841fecc4f14ad29b2b44d686f641d7291 100644 (file)
 
 - (NSData *)_web_originalData;
 - (NSString *)_web_originalDataAsString;
+- (const char *)_web_URLCString;
 
 - (NSString *)_web_userVisibleString;
-- (const char *)_web_URLCString;
 
 - (BOOL)_web_isEmpty;
 
 // FIXME: change these names back to _web_ when identically-named
 // methods are removed from Foundation
+
 - (NSURL *)_webkit_canonicalize;
 - (NSURL *)_webkit_URLByRemovingFragment;
+
 - (BOOL)_webkit_isJavaScriptURL;
 - (NSString *)_webkit_scriptIfJavaScriptURL;
 - (BOOL)_webkit_isFTPDirectoryURL;
+
 - (BOOL)_webkit_shouldLoadAsEmptyDocument;
 
 @end
 
 @interface NSString (WebNSURLExtras)
 
+- (BOOL)_web_hostNameNeedsDecodingWithRange:(NSRange)range; // returns NO if decodeHostNameWithRange: would return nil, but more efficient
+- (BOOL)_web_hostNameNeedsEncodingWithRange:(NSRange)range; // returns NO if encodeHostNameWithRange: would return nil, but more efficient
+
+- (NSString *)_web_decodeHostNameWithRange:(NSRange)range; // turns funny-looking ASCII form into Unicode, returns nil if no decoding needed
+- (NSString *)_web_encodeHostNameWithRange:(NSRange)range; // turns Unicode into funny-looking ASCII form, returns nil if no decoding needed
+
+- (NSString *)_web_decodeHostName; // turns funny-looking ASCII form into Unicode, returns self if no decoding needed, convenient cover
+- (NSString *)_web_encodeHostName; // turns Unicode into funny-looking ASCII form, returns self if no decoding needed, convenient cover
+
 // FIXME: change these names back to _web_ when identically-named
 // methods are removed from Foundation
-- (NSString *)_webkit_stringByReplacingValidPercentEscapes;
-- (NSString *)_webkit_scriptIfJavaScriptURL;
 - (BOOL)_webkit_isJavaScriptURL;
+- (NSString *)_webkit_scriptIfJavaScriptURL;
 - (BOOL)_webkit_isFTPDirectoryURL;
 
 @end
index df46ba44fd689cff08981e0bde3adea2e83b7f51..c5c32b6c9ee2324577da0bba15d79905cfa056a6 100644 (file)
 #import <Foundation/NSURLRequest.h>
 #import <Foundation/NSURL_NSURLExtras.h>
 
-static int URLBytesBufferLength = 2048;
+#import <unicode/uidna.h>
 
-static inline void ReleaseIfNotNULL(CFTypeRef object)
-{
-    if (object) {
-        CFRelease(object);
-    }
-}
+typedef void (* StringRangeApplierFunction)(NSString *string, NSRange range, void *context);
+
+// Needs to be big enough to hold an IDNA-encoded name.
+// This is way bigger than needed, since I think there's a 63-character limit.
+#define HOST_NAME_BUFFER_LENGTH 2048
 
-static char hexDigit(int i) {
+#define URL_BYTES_BUFFER_LENGTH 2048
+
+static char hexDigit(int i)
+{
     if (i < 0 || i > 16) {
         ERROR("illegal hex digit");
         return '0';
@@ -57,17 +59,202 @@ static int hexDigitValue(char c)
     return 0;
 }
 
+static void applyHostNameFunctionToMailToURLString(NSString *string, StringRangeApplierFunction f, void *context)
+{
+    // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' character.
+    // Skip quoted strings so that characters in them don't confuse us.
+    // When we find a '?' character, we are past the part of the URL that contains host names.
+
+    static NSCharacterSet *hostNameOrStringStartCharacters;
+    if (hostNameOrStringStartCharacters == nil) {
+        hostNameOrStringStartCharacters = [[NSCharacterSet characterSetWithCharactersInString:@"\"@?"] retain];
+    }
+    static NSCharacterSet *hostNameEndCharacters;
+    if (hostNameEndCharacters == nil) {
+        hostNameEndCharacters = [[NSCharacterSet characterSetWithCharactersInString:@">,"] retain];
+    }
+    static NSCharacterSet *quotedStringCharacters;
+    if (quotedStringCharacters == nil) {
+        quotedStringCharacters = [[NSCharacterSet characterSetWithCharactersInString:@"\"\\"] retain];
+    }
+
+    unsigned stringLength = [string length];
+    NSRange remaining = NSMakeRange(0, stringLength);
+    
+    while (1) {
+        // Find start of host name or of quoted string.
+        NSRange hostNameOrStringStart = [string rangeOfCharacterFromSet:hostNameOrStringStartCharacters options:0 range:remaining];
+        if (hostNameOrStringStart.location == NSNotFound) {
+            return;
+        }
+        unichar c = [string characterAtIndex:hostNameOrStringStart.location];
+        remaining.location = NSMaxRange(hostNameOrStringStart);
+        remaining.length = stringLength - remaining.location;
+
+        if (c == '?') {
+            return;
+        }
+        
+        if (c == '@') {
+            // Find end of host name.
+            unsigned hostNameStart = remaining.location;
+            NSRange hostNameEnd = [string rangeOfCharacterFromSet:hostNameEndCharacters options:0 range:remaining];
+            if (hostNameEnd.location == NSNotFound) {
+                return;
+            }
+            remaining.location = NSMaxRange(hostNameEnd);
+            remaining.length = stringLength - remaining.location;
+
+            // Process host name range.
+            f(string, NSMakeRange(hostNameStart, hostNameEnd.location - hostNameStart), context);
+        } else {
+            // Skip quoted string.
+            ASSERT(c == '"');
+            while (1) {
+                NSRange escapedCharacterOrStringEnd = [string rangeOfCharacterFromSet:quotedStringCharacters options:0 range:remaining];
+                if (escapedCharacterOrStringEnd.location == NSNotFound) {
+                    return;
+                }
+                c = [string characterAtIndex:escapedCharacterOrStringEnd.location];
+                remaining.location = NSMaxRange(escapedCharacterOrStringEnd);
+                remaining.length = stringLength - remaining.location;
+                
+                // If we are the end of the string, then break from the string loop back to the host name loop.
+                if (c == '"') {
+                    break;
+                }
+                
+                // Skip escaped character.
+                ASSERT(c == '\\');
+                if (remaining.length == 0) {
+                    return;
+                }                
+                remaining.location += 1;
+                remaining.length -= 1;
+            }
+        }
+    }
+}
+
+static void applyHostNameFunctionToURLString(NSString *string, StringRangeApplierFunction f, void *context)
+{
+    // Find hostnames. Too bad we can't use any real URL-parsing code to do this,
+    // but we have to do it before doing all the %-escaping, and this is the only
+    // code we have that parses mailto URLs anyway.
+
+    // Maybe we should implement this using a character buffer instead?
+
+    if ([string _web_hasCaseInsensitivePrefix:@"mailto:"]) {
+        applyHostNameFunctionToMailToURLString(string, f, context);
+        return;
+    }
+
+    // Find the host name in a hierarchical URL.
+    // It comes after a "://" sequence, with scheme characters preceding.
+    // If ends with the end of the string or a ":", "/", or a "?".
+    // If there is a "@" character, the host part is just the part after the "@".
+    NSRange separatorRange = [string rangeOfString:@"://"];
+    if (separatorRange.location == NSNotFound) {
+        return;
+    }
+
+    // Check that all characters before the :// are valid scheme characters.
+    static NSCharacterSet *nonSchemeCharacters;
+    if (nonSchemeCharacters == nil) {
+        nonSchemeCharacters = [[[NSCharacterSet characterSetWithCharactersInString:@"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-."] invertedSet] retain];
+    }
+    if ([string rangeOfCharacterFromSet:nonSchemeCharacters options:0 range:NSMakeRange(0, separatorRange.location)].location != NSNotFound) {
+        return;
+    }
+
+    unsigned stringLength = [string length];
+
+    static NSCharacterSet *hostTerminators;
+    if (hostTerminators == nil) {
+        hostTerminators = [[NSCharacterSet characterSetWithCharactersInString:@":/?"] retain];
+    }
+
+    // Start after the separator.
+    unsigned authorityStart = NSMaxRange(separatorRange);
+
+    // Find terminating character.
+    NSRange hostNameTerminator = [string rangeOfCharacterFromSet:hostTerminators options:0 range:NSMakeRange(authorityStart, stringLength - authorityStart)];
+    unsigned hostNameEnd = hostNameTerminator.location == NSNotFound ? stringLength : hostNameTerminator.location;
+
+    // Find "@" for the start of the host name.
+    NSRange userInfoTerminator = [string rangeOfString:@"@" options:0 range:NSMakeRange(authorityStart, hostNameEnd - authorityStart)];
+    unsigned hostNameStart = userInfoTerminator.location == NSNotFound ? authorityStart : NSMaxRange(userInfoTerminator);
+
+    f(string, NSMakeRange(hostNameStart, hostNameEnd - hostNameStart), context);
+}
+
 @implementation NSURL (WebNSURLExtras)
 
+static void collectRangesThatNeedMapping(NSString *string, NSRange range, void *context, BOOL encode)
+{
+    BOOL needsMapping = encode
+        ? [string _web_hostNameNeedsEncodingWithRange:range]
+        : [string _web_hostNameNeedsDecodingWithRange:range];
+    if (!needsMapping) {
+        return;
+    }
+
+    NSMutableArray **array = (NSMutableArray **)context;
+    if (*array == nil) {
+        *array = [[NSMutableArray alloc] init];
+    }
+
+    [*array addObject:[NSValue valueWithRange:range]];
+}
+
+static void collectRangesThatNeedEncoding(NSString *string, NSRange range, void *context)
+{
+    return collectRangesThatNeedMapping(string, range, context, YES);
+}
+
+static void collectRangesThatNeedDecoding(NSString *string, NSRange range, void *context)
+{
+    return collectRangesThatNeedMapping(string, range, context, NO);
+}
+
+static NSString *mapHostNames(NSString *string, BOOL encode)
+{
+    // Generally, we want to optimize for the case where there is one host name that does not need mapping.
+    
+    // Make a list of ranges that actually need mapping.
+    NSMutableArray *hostNameRanges = nil;
+    StringRangeApplierFunction f = encode
+        ? collectRangesThatNeedEncoding
+        : collectRangesThatNeedDecoding;
+    applyHostNameFunctionToURLString(string, f, &hostNameRanges);
+    if (hostNameRanges == nil) {
+        return string;
+    }
+
+    // Do the mapping.
+    NSMutableString *mutableCopy = [string mutableCopy];
+    unsigned i = [hostNameRanges count];
+    while (i-- != 0) {
+        NSRange hostNameRange = [[hostNameRanges objectAtIndex:i] rangeValue];
+        NSString *mappedHostName = encode
+            ? [string _web_encodeHostNameWithRange:hostNameRange]
+            : [string _web_decodeHostNameWithRange:hostNameRange];
+        [mutableCopy replaceCharactersInRange:hostNameRange withString:mappedHostName];
+    }
+    [hostNameRanges release];
+    return [mutableCopy autorelease];
+}
+
 + (NSURL *)_web_URLWithUserTypedString:(NSString *)string
 {
     if (string == nil) {
         return nil;
     }
-    string = [string _web_stringByTrimmingWhitespace];
+    string = mapHostNames([string _web_stringByTrimmingWhitespace], YES);
+
     NSData *userTypedData = [string dataUsingEncoding:NSUTF8StringEncoding];
     ASSERT(userTypedData);
-        
+
     const UInt8 *inBytes = [userTypedData bytes];
     int inLength = [userTypedData length];
     if (inLength == 0) {
@@ -152,8 +339,8 @@ static int hexDigitValue(char c)
 {
     NSData *data = nil;
 
-    UInt8 static_buffer[URLBytesBufferLength];
-    CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, static_buffer, URLBytesBufferLength);
+    UInt8 static_buffer[URL_BYTES_BUFFER_LENGTH];
+    CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, static_buffer, URL_BYTES_BUFFER_LENGTH);
     if (bytesFilled != -1) {
         data = [NSData dataWithBytes:static_buffer length:bytesFilled];
     }
@@ -220,7 +407,7 @@ static int hexDigitValue(char c)
         }
     }
     *q = '\0';
-  
+    
     // Check string to see if it can be converted to display using UTF-8  
     NSString *result = [NSString stringWithUTF8String:after];
     if (!result) {
@@ -248,9 +435,13 @@ static int hexDigitValue(char c)
         *q = '\0';
         result = [NSString stringWithUTF8String:after];
     }
+
+    // As an optimization, only do host name decoding if we have xn-- somewhere.
+    bool needsHostNameDecoding = strcasestr(after, "xn--") != NULL;
+  
     free(after);
     
-    return result;
+    return needsHostNameDecoding ? mapHostNames(result, NO) : result;
 }
 
 - (BOOL)_web_isEmpty
@@ -298,8 +489,7 @@ static int hexDigitValue(char c)
     if (!frag) {
         return self;
     }
-    
-    ReleaseIfNotNULL(frag);
+    CFRelease(frag);
     
     WebURLComponents components = [self _web_URLComponents];
     components.fragment = nil;
@@ -329,7 +519,6 @@ static int hexDigitValue(char c)
 
 @end
 
-
 @implementation NSString (WebNSURLExtras)
 
 - (BOOL)_webkit_isJavaScriptURL
@@ -383,4 +572,61 @@ static int hexDigitValue(char c)
     return lastChar == '/' && [self _web_hasCaseInsensitivePrefix:@"ftp:"];
 }
 
+// Return value of nil means no mapping is necessary.
+// If makeString is NO, then return value is either nil or self to indicate mapping is necessary.
+// If makeString is YES, then return value is either nil or the mapped string.
+- (NSString *)_web_mapHostNameWithRange:(NSRange)range encode:(BOOL)encode makeString:(BOOL)makeString
+{
+    if (range.length > HOST_NAME_BUFFER_LENGTH) {
+        return nil;
+    }
+    
+    UChar sourceBuffer[HOST_NAME_BUFFER_LENGTH];
+    UChar destinationBuffer[HOST_NAME_BUFFER_LENGTH];
+    
+    [self getCharacters:sourceBuffer range:range];
+    UErrorCode error = U_ZERO_ERROR;
+    int32_t numCharactersConverted = (encode ? uidna_IDNToASCII : uidna_IDNToUnicode)
+        (sourceBuffer, range.length, destinationBuffer, HOST_NAME_BUFFER_LENGTH, UIDNA_ALLOW_UNASSIGNED, NULL, &error);
+    if (error != U_ZERO_ERROR) {
+        return nil;
+    }
+    if (numCharactersConverted == (int)range.length && memcmp(sourceBuffer, destinationBuffer, range.length * sizeof(UChar)) == 0) {
+        return nil;
+    }
+    return makeString ? [NSString stringWithCharacters:destinationBuffer length:numCharactersConverted] : self;
+}
+
+- (BOOL)_web_hostNameNeedsDecodingWithRange:(NSRange)range
+{
+    return [self _web_mapHostNameWithRange:range encode:NO makeString:NO] != nil;
+}
+
+- (BOOL)_web_hostNameNeedsEncodingWithRange:(NSRange)range
+{
+    return [self _web_mapHostNameWithRange:range encode:YES makeString:NO] != nil;
+}
+
+- (NSString *)_web_decodeHostNameWithRange:(NSRange)range
+{
+    return [self _web_mapHostNameWithRange:range encode:NO makeString:YES];
+}
+
+- (NSString *)_web_encodeHostNameWithRange:(NSRange)range
+{
+    return [self _web_mapHostNameWithRange:range encode:YES makeString:YES];
+}
+
+- (NSString *)_web_decodeHostName
+{
+    NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:NO makeString:YES];
+    return name == nil ? self : name;
+}
+
+- (NSString *)_web_encodeHostName
+{
+    NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:YES makeString:YES];
+    return name == nil ? self : name;
+}
+
 @end
index a14ab358a66f86de8dc16b7470367a30e34d0c19..15c2f6775fb2b96cf3aa254f7ba5d6c9d2e44892 100644 (file)
                                F5C2869302846DCD018635CA,
                                F5C2869402846DCD018635CA,
                                F5C2869502846DCD018635CA,
+                               93D623DD051E791F002F47DD,
                                F738C9E903FAD3DF0321FBE0,
                                F738C9EA03FAD3DF0321FBE0,
                        );
                                F738CA5703FAD3DF0321FBE0,
                                F738CA5803FAD3DF0321FBE0,
                                BE1B2F5604755C7700CA289C,
+                               93D623DE051E791F002F47DD,
                        );
                        isa = PBXFrameworksBuildPhase;
                        runOnlyForDeploymentPostprocessing = 0;
                        settings = {
                        };
                };
+               93D623DD051E791F002F47DD = {
+                       expectedFileType = "compiled.mach-o.dylib";
+                       isa = PBXFileReference;
+                       name = libicucore.dylib;
+                       path = /usr/lib/libicucore.dylib;
+                       refType = 0;
+                       sourceTree = "<absolute>";
+               };
+               93D623DE051E791F002F47DD = {
+                       fileRef = 93D623DD051E791F002F47DD;
+                       isa = PBXBuildFile;
+                       settings = {
+                       };
+               };
 //930
 //931
 //932