4 Copyright 2002, Apple, Inc. All rights reserved.
7 #import <WebKit/WebNSURLExtras.h>
9 #import <WebKit/WebAssertions.h>
10 #import <WebKit/WebNSDataExtras.h>
11 #import <WebKit/WebNSObjectExtras.h>
13 #import <Foundation/NSString_NSURLExtras.h>
14 #import <Foundation/NSURLProtocolPrivate.h>
15 #import <Foundation/NSURLRequest.h>
16 #import <Foundation/NSURL_NSURLExtras.h>
18 #import <unicode/uchar.h>
19 #import <unicode/uidna.h>
20 #import <unicode/uscript.h>
22 typedef void (* StringRangeApplierFunction)(NSString *string, NSRange range, void *context);
24 // Needs to be big enough to hold an IDN-encoded name.
25 // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.
26 #define HOST_NAME_BUFFER_LENGTH 2048
28 #define URL_BYTES_BUFFER_LENGTH 2048
30 static pthread_once_t IDNScriptWhiteListFileRead = PTHREAD_ONCE_INIT;
31 static uint32_t IDNScriptWhiteList[(USCRIPT_CODE_LIMIT + 31) / 32];
33 static char hexDigit(int i)
35 if (i < 0 || i > 16) {
36 ERROR("illegal hex digit");
49 static BOOL isHexDigit(char c)
51 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
54 static int hexDigitValue(char c)
56 if (c >= '0' && c <= '9') {
59 if (c >= 'A' && c <= 'F') {
62 if (c >= 'a' && c <= 'f') {
65 ERROR("illegal hex digit");
69 static void applyHostNameFunctionToMailToURLString(NSString *string, StringRangeApplierFunction f, void *context)
71 // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' character.
72 // Skip quoted strings so that characters in them don't confuse us.
73 // When we find a '?' character, we are past the part of the URL that contains host names.
75 static NSCharacterSet *hostNameOrStringStartCharacters;
76 if (hostNameOrStringStartCharacters == nil) {
77 hostNameOrStringStartCharacters = [[NSCharacterSet characterSetWithCharactersInString:@"\"@?"] retain];
79 static NSCharacterSet *hostNameEndCharacters;
80 if (hostNameEndCharacters == nil) {
81 hostNameEndCharacters = [[NSCharacterSet characterSetWithCharactersInString:@">,?"] retain];
83 static NSCharacterSet *quotedStringCharacters;
84 if (quotedStringCharacters == nil) {
85 quotedStringCharacters = [[NSCharacterSet characterSetWithCharactersInString:@"\"\\"] retain];
88 unsigned stringLength = [string length];
89 NSRange remaining = NSMakeRange(0, stringLength);
92 // Find start of host name or of quoted string.
93 NSRange hostNameOrStringStart = [string rangeOfCharacterFromSet:hostNameOrStringStartCharacters options:0 range:remaining];
94 if (hostNameOrStringStart.location == NSNotFound) {
97 unichar c = [string characterAtIndex:hostNameOrStringStart.location];
98 remaining.location = NSMaxRange(hostNameOrStringStart);
99 remaining.length = stringLength - remaining.location;
106 // Find end of host name.
107 unsigned hostNameStart = remaining.location;
108 NSRange hostNameEnd = [string rangeOfCharacterFromSet:hostNameEndCharacters options:0 range:remaining];
110 if (hostNameEnd.location == NSNotFound) {
111 hostNameEnd.location = stringLength;
114 remaining.location = hostNameEnd.location;
115 remaining.length = stringLength - remaining.location;
119 // Process host name range.
120 f(string, NSMakeRange(hostNameStart, hostNameEnd.location - hostNameStart), context);
126 // Skip quoted string.
129 NSRange escapedCharacterOrStringEnd = [string rangeOfCharacterFromSet:quotedStringCharacters options:0 range:remaining];
130 if (escapedCharacterOrStringEnd.location == NSNotFound) {
133 c = [string characterAtIndex:escapedCharacterOrStringEnd.location];
134 remaining.location = NSMaxRange(escapedCharacterOrStringEnd);
135 remaining.length = stringLength - remaining.location;
137 // If we are the end of the string, then break from the string loop back to the host name loop.
142 // Skip escaped character.
144 if (remaining.length == 0) {
147 remaining.location += 1;
148 remaining.length -= 1;
154 static void applyHostNameFunctionToURLString(NSString *string, StringRangeApplierFunction f, void *context)
156 // Find hostnames. Too bad we can't use any real URL-parsing code to do this,
157 // but we have to do it before doing all the %-escaping, and this is the only
158 // code we have that parses mailto URLs anyway.
160 // Maybe we should implement this using a character buffer instead?
162 if ([string _web_hasCaseInsensitivePrefix:@"mailto:"]) {
163 applyHostNameFunctionToMailToURLString(string, f, context);
167 // Find the host name in a hierarchical URL.
168 // It comes after a "://" sequence, with scheme characters preceding.
169 // If ends with the end of the string or a ":", "/", or a "?".
170 // If there is a "@" character, the host part is just the part after the "@".
171 NSRange separatorRange = [string rangeOfString:@"://"];
172 if (separatorRange.location == NSNotFound) {
176 // Check that all characters before the :// are valid scheme characters.
177 static NSCharacterSet *nonSchemeCharacters;
178 if (nonSchemeCharacters == nil) {
179 nonSchemeCharacters = [[[NSCharacterSet characterSetWithCharactersInString:@"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-."] invertedSet] retain];
181 if ([string rangeOfCharacterFromSet:nonSchemeCharacters options:0 range:NSMakeRange(0, separatorRange.location)].location != NSNotFound) {
185 unsigned stringLength = [string length];
187 static NSCharacterSet *hostTerminators;
188 if (hostTerminators == nil) {
189 hostTerminators = [[NSCharacterSet characterSetWithCharactersInString:@":/?#"] retain];
192 // Start after the separator.
193 unsigned authorityStart = NSMaxRange(separatorRange);
195 // Find terminating character.
196 NSRange hostNameTerminator = [string rangeOfCharacterFromSet:hostTerminators options:0 range:NSMakeRange(authorityStart, stringLength - authorityStart)];
197 unsigned hostNameEnd = hostNameTerminator.location == NSNotFound ? stringLength : hostNameTerminator.location;
199 // Find "@" for the start of the host name.
200 NSRange userInfoTerminator = [string rangeOfString:@"@" options:0 range:NSMakeRange(authorityStart, hostNameEnd - authorityStart)];
201 unsigned hostNameStart = userInfoTerminator.location == NSNotFound ? authorityStart : NSMaxRange(userInfoTerminator);
203 f(string, NSMakeRange(hostNameStart, hostNameEnd - hostNameStart), context);
206 @implementation NSURL (WebNSURLExtras)
208 static void collectRangesThatNeedMapping(NSString *string, NSRange range, void *context, BOOL encode)
210 BOOL needsMapping = encode
211 ? [string _web_hostNameNeedsEncodingWithRange:range]
212 : [string _web_hostNameNeedsDecodingWithRange:range];
217 NSMutableArray **array = (NSMutableArray **)context;
219 *array = [[NSMutableArray alloc] init];
222 [*array addObject:[NSValue valueWithRange:range]];
225 static void collectRangesThatNeedEncoding(NSString *string, NSRange range, void *context)
227 return collectRangesThatNeedMapping(string, range, context, YES);
230 static void collectRangesThatNeedDecoding(NSString *string, NSRange range, void *context)
232 return collectRangesThatNeedMapping(string, range, context, NO);
235 static NSString *mapHostNames(NSString *string, BOOL encode)
237 // Generally, we want to optimize for the case where there is one host name that does not need mapping.
239 if (encode && [string canBeConvertedToEncoding:NSASCIIStringEncoding])
242 // Make a list of ranges that actually need mapping.
243 NSMutableArray *hostNameRanges = nil;
244 StringRangeApplierFunction f = encode
245 ? collectRangesThatNeedEncoding
246 : collectRangesThatNeedDecoding;
247 applyHostNameFunctionToURLString(string, f, &hostNameRanges);
248 if (hostNameRanges == nil) {
253 NSMutableString *mutableCopy = [string mutableCopy];
254 unsigned i = [hostNameRanges count];
256 NSRange hostNameRange = [[hostNameRanges objectAtIndex:i] rangeValue];
257 NSString *mappedHostName = encode
258 ? [string _web_encodeHostNameWithRange:hostNameRange]
259 : [string _web_decodeHostNameWithRange:hostNameRange];
260 [mutableCopy replaceCharactersInRange:hostNameRange withString:mappedHostName];
262 [hostNameRanges release];
263 return [mutableCopy autorelease];
266 + (NSURL *)_web_URLWithUserTypedString:(NSString *)string relativeToURL:(NSURL *)URL
271 string = mapHostNames([string _web_stringByTrimmingWhitespace], YES);
273 NSData *userTypedData = [string dataUsingEncoding:NSUTF8StringEncoding];
274 ASSERT(userTypedData);
276 const UInt8 *inBytes = [userTypedData bytes];
277 int inLength = [userTypedData length];
279 return [NSURL URLWithString:@""];
282 char *outBytes = malloc(inLength * 3); // large enough to %-escape every character
286 for (i = 0; i < inLength; i++) {
287 UInt8 c = inBytes[i];
288 if (c <= 0x20 || c >= 0x7f) {
290 *p++ = hexDigit(c >> 4);
291 *p++ = hexDigit(c & 0xf);
300 NSData *data = [NSData dataWithBytesNoCopy:outBytes length:outLength]; // adopts outBytes
301 return [self _web_URLWithData:data relativeToURL:URL];
304 + (NSURL *)_web_URLWithUserTypedString:(NSString *)string
306 return [self _web_URLWithUserTypedString:string relativeToURL:nil];
309 + (NSURL *)_web_URLWithDataAsString:(NSString *)string
314 return [self _web_URLWithDataAsString:string relativeToURL:nil];
317 + (NSURL *)_web_URLWithDataAsString:(NSString *)string relativeToURL:(NSURL *)baseURL
322 string = [string _web_stringByTrimmingWhitespace];
323 NSData *data = [string dataUsingEncoding:NSISOLatin1StringEncoding];
324 return [self _web_URLWithData:data relativeToURL:baseURL];
327 + (NSURL *)_web_URLWithData:(NSData *)data
332 return [self _web_URLWithData:data relativeToURL:nil];
335 + (NSURL *)_web_URLWithData:(NSData *)data relativeToURL:(NSURL *)baseURL
342 int length = [data length];
344 const UInt8 *bytes = [data bytes];
345 // NOTE: We use UTF-8 here since this encoding is used when computing strings when returning URL components
346 // (e.g calls to NSURL -path). However, this function is not tolerant of illegal UTF-8 sequences, which
347 // could either be a malformed string or bytes in a different encoding, like shift-jis, so we fall back
348 // onto using ISO Latin 1 in those cases.
349 result = WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, bytes, length, kCFStringEncodingUTF8, (CFURLRef)baseURL, YES));
351 result = WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, bytes, length, kCFStringEncodingISOLatin1, (CFURLRef)baseURL, YES));
355 result = [NSURL URLWithString:@""];
360 - (NSData *)_web_originalData
364 UInt8 static_buffer[URL_BYTES_BUFFER_LENGTH];
365 CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, static_buffer, URL_BYTES_BUFFER_LENGTH);
366 if (bytesFilled != -1) {
367 data = [NSData dataWithBytes:static_buffer length:bytesFilled];
370 CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0);
371 UInt8 *buffer = malloc(bytesToAllocate);
372 bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, bytesToAllocate);
373 ASSERT(bytesFilled == bytesToAllocate);
374 // buffer is adopted by the NSData
375 data = [NSData dataWithBytesNoCopy:buffer length:bytesFilled];
378 NSURL *baseURL = (NSURL *)CFURLGetBaseURL((CFURLRef)self);
380 NSURL *URL = [NSURL _web_URLWithData:data relativeToURL:baseURL];
381 return [URL _web_originalData];
388 - (NSString *)_web_originalDataAsString
390 return [[[NSString alloc] initWithData:[self _web_originalData] encoding:NSISOLatin1StringEncoding] autorelease];
393 - (NSString *)_web_userVisibleString
395 NSData *data = [self _web_originalData];
396 const unsigned char *before = [data bytes];
397 int length = [data length];
399 bool needsHostNameDecoding = false;
401 const unsigned char *p = before;
402 int bufferLength = (length * 3) + 1;
403 char *after = malloc(bufferLength); // large enough to %-escape every character
406 for (i = 0; i < length; i++) {
407 unsigned char c = p[i];
408 // escape control characters, space, and delete
409 if (c <= 0x20 || c == 0x7f) {
411 *q++ = hexDigit(c >> 4);
412 *q++ = hexDigit(c & 0xf);
414 // unescape escape sequences that indicate bytes greater than 0x7f
415 else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) {
416 unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]);
432 // Check for "xn--" in an efficient, non-case-sensitive, way.
433 if (c == '-' && i >= 3 && !needsHostNameDecoding && (q[-4] | 0x20) == 'x' && (q[-3] | 0x20) == 'n' && q[-2] == '-')
434 needsHostNameDecoding = true;
439 // Check string to see if it can be converted to display using UTF-8
440 NSString *result = [NSString stringWithUTF8String:after];
442 // Could not convert to UTF-8.
443 // Convert characters greater than 0x7f to escape sequences.
444 // Shift current string to the end of the buffer
445 // then we will copy back bytes to the start of the buffer
447 int afterlength = q - after;
448 char *p = after + bufferLength - afterlength - 1;
449 memmove(p, after, afterlength + 1); // copies trailing '\0'
452 unsigned char c = *p;
455 *q++ = hexDigit(c >> 4);
456 *q++ = hexDigit(c & 0xf);
464 result = [NSString stringWithUTF8String:after];
469 // As an optimization, only do host name decoding if we have "xn--" somewhere.
470 return needsHostNameDecoding ? mapHostNames(result, NO) : result;
476 if (!CFURLGetBaseURL((CFURLRef)self)) {
477 length = CFURLGetBytes((CFURLRef)self, NULL, 0);
480 length = [[self _web_userVisibleString] length];
485 - (const char *)_web_URLCString
487 NSMutableData *data = [NSMutableData data];
488 [data appendData:[self _web_originalData]];
489 [data appendBytes:"\0" length:1];
490 return (const char *)[data bytes];
493 - (NSURL *)_webkit_canonicalize
495 NSURLRequest *request = [[NSURLRequest alloc] initWithURL:self];
496 Class concreteClass = [NSURLProtocol _protocolClassForRequest:request];
497 if (!concreteClass) {
503 NSURLRequest *newRequest = [concreteClass canonicalRequestForRequest:request];
504 NSURL *newURL = [newRequest URL];
505 result = [[newURL retain] autorelease];
511 - (NSURL *)_webkit_URLByRemovingFragment
513 // Check to see if a fragment exists before decomposing the URL.
514 CFStringRef frag = CFURLCopyFragment((CFURLRef)self, NULL);
520 WebURLComponents components = [self _web_URLComponents];
521 components.fragment = nil;
522 NSURL *result = [NSURL _web_URLWithComponents:components];
523 return result ? result : self;
526 - (BOOL)_webkit_isJavaScriptURL
528 return [[self _web_originalDataAsString] _webkit_isJavaScriptURL];
531 - (NSString *)_webkit_scriptIfJavaScriptURL
533 return [[self _web_originalDataAsString] _webkit_scriptIfJavaScriptURL];
536 - (BOOL)_webkit_isFTPDirectoryURL
538 return [[self _web_originalDataAsString] _webkit_isFTPDirectoryURL];
541 - (BOOL)_webkit_shouldLoadAsEmptyDocument
543 return [[self _web_originalDataAsString] _web_hasCaseInsensitivePrefix:@"about:"] || [self _web_isEmpty];
546 - (NSURL *)_web_URLWithLowercasedScheme
549 CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &range);
550 if (range.location == kCFNotFound) {
554 UInt8 static_buffer[URL_BYTES_BUFFER_LENGTH];
555 UInt8 *buffer = static_buffer;
556 CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, URL_BYTES_BUFFER_LENGTH);
557 if (bytesFilled == -1) {
558 CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0);
559 buffer = malloc(bytesToAllocate);
560 bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, bytesToAllocate);
561 ASSERT(bytesFilled == bytesToAllocate);
566 for (i = 0; i < range.length; ++i) {
567 UInt8 c = buffer[range.location + i];
568 UInt8 lower = tolower(c);
570 buffer[range.location + i] = lower;
575 NSURL *result = changed
576 ? WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, buffer, bytesFilled, kCFStringEncodingUTF8, nil, YES))
579 if (buffer != static_buffer) {
587 -(BOOL)_web_hasQuestionMarkOnlyQueryString
589 CFRange rangeWithSeparators;
590 CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentQuery, &rangeWithSeparators);
591 if (rangeWithSeparators.location != kCFNotFound && rangeWithSeparators.length == 1) {
597 -(NSData *)_web_schemeSeparatorWithoutColon
599 NSData *result = nil;
600 CFRange rangeWithSeparators;
601 CFRange range = CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &rangeWithSeparators);
602 if (rangeWithSeparators.location != kCFNotFound) {
603 NSString *absoluteString = [self absoluteString];
604 NSRange separatorsRange = NSMakeRange(range.location + range.length + 1, rangeWithSeparators.length - range.length - 1);
605 if (separatorsRange.location + separatorsRange.length <= [absoluteString length]) {
606 NSString *slashes = [absoluteString substringWithRange:separatorsRange];
607 result = [slashes dataUsingEncoding:NSISOLatin1StringEncoding];
613 #define completeURL (CFURLComponentType)-1
615 -(NSData *)_web_dataForURLComponentType:(CFURLComponentType)componentType
617 static int URLComponentTypeBufferLength = 2048;
619 UInt8 staticAllBytesBuffer[URLComponentTypeBufferLength];
620 UInt8 *allBytesBuffer = staticAllBytesBuffer;
622 CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, URLComponentTypeBufferLength);
623 if (bytesFilled == -1) {
624 CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0);
625 allBytesBuffer = malloc(bytesToAllocate);
626 bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, bytesToAllocate);
630 if (componentType != completeURL) {
631 range = CFURLGetByteRangeForComponent((CFURLRef)self, componentType, NULL);
632 if (range.location == kCFNotFound) {
638 range.length = bytesFilled;
641 NSData *componentData = [NSData dataWithBytes:allBytesBuffer + range.location length:range.length];
643 const unsigned char *bytes = [componentData bytes];
644 NSMutableData *resultData = [NSMutableData data];
645 // NOTE: add leading '?' to query strings non-zero length query strings.
646 // NOTE: retain question-mark only query strings.
647 if (componentType == kCFURLComponentQuery) {
648 if (range.length > 0 || [self _web_hasQuestionMarkOnlyQueryString]) {
649 [resultData appendBytes:"?" length:1];
653 for (i = 0; i < range.length; i++) {
654 unsigned char c = bytes[i];
655 if (c <= 0x20 || c >= 0x7f) {
658 escaped[1] = hexDigit(c >> 4);
659 escaped[2] = hexDigit(c & 0xf);
660 [resultData appendBytes:escaped length:3];
665 [resultData appendBytes:b length:1];
669 if (staticAllBytesBuffer != allBytesBuffer) {
670 free(allBytesBuffer);
676 -(NSData *)_web_schemeData
678 return [self _web_dataForURLComponentType:kCFURLComponentScheme];
681 -(NSData *)_web_hostData
683 NSData *result = [self _web_dataForURLComponentType:kCFURLComponentHost];
684 NSData *scheme = [self _web_schemeData];
685 // Take off localhost for file
686 if ([scheme _web_isCaseInsensitiveEqualToCString:"file"]) {
687 return ([result _web_isCaseInsensitiveEqualToCString:"localhost"]) ? nil : result;
692 - (NSString *)_web_hostString
694 NSData *data = [self _web_hostData];
696 data = [NSData data];
698 return [[[NSString alloc] initWithData:[self _web_hostData] encoding:NSUTF8StringEncoding] autorelease];
703 @implementation NSString (WebNSURLExtras)
705 - (BOOL)_web_isUserVisibleURL
710 char static_buffer[1024];
712 BOOL success = CFStringGetCString((CFStringRef)self, static_buffer, 1023, kCFStringEncodingUTF8);
716 p = [self UTF8String];
719 int length = strlen(p);
721 // check for characters <= 0x20 or >=0x7f, %-escape sequences of %7f, and xn--, these
722 // are the things that will lead _web_userVisisbleString to actually change things.
724 for (i = 0; i < length; i++) {
725 unsigned char c = p[i];
726 // escape control characters, space, and delete
727 if (c <= 0x20 || c == 0x7f) {
730 } else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) {
731 unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]);
738 // Check for "xn--" in an efficient, non-case-sensitive, way.
739 if (c == '-' && i >= 3 && (p[-3] | 0x20) == 'x' && (p[-2] | 0x20) == 'n' && p[-1] == '-') {
750 - (BOOL)_webkit_isJavaScriptURL
752 return [self _web_hasCaseInsensitivePrefix:@"javascript:"];
755 - (NSString *)_webkit_stringByReplacingValidPercentEscapes
757 NSString *s = [self stringByReplacingPercentEscapesUsingEncoding:NSUTF8StringEncoding];
761 - (NSString *)_webkit_scriptIfJavaScriptURL
763 if (![self _webkit_isJavaScriptURL]) {
766 return [[self substringFromIndex:11] _webkit_stringByReplacingValidPercentEscapes];
769 - (BOOL)_webkit_isFTPDirectoryURL
771 int length = [self length];
772 if (length < 5) { // 5 is length of "ftp:/"
775 unichar lastChar = [self characterAtIndex:length - 1];
776 return lastChar == '/' && [self _web_hasCaseInsensitivePrefix:@"ftp:"];
780 static BOOL readIDNScriptWhiteListFile(NSString *filename)
785 FILE *file = fopen([filename fileSystemRepresentation], "r");
790 // Read a word at a time.
791 // Allow comments, starting with # character to the end of the line.
793 // Skip a comment if present.
794 int result = fscanf(file, " #%*[^\n\r]%*[\n\r]");
799 // Read a script name if present.
801 result = fscanf(file, " %32[^# \t\n\r]%*[^# \t\n\r] ", word);
806 // Got a word, map to script code and put it into the array.
807 int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, word);
808 if (script == UCHAR_INVALID_CODE) {
809 NSLog(@"%@: unknown script code: %s", filename, word);
810 } else if (script >= 0 && script < USCRIPT_CODE_LIMIT) {
811 size_t index = script / 32;
812 uint32_t mask = 1 << (script % 32);
813 if (IDNScriptWhiteList[index] & mask) {
814 NSLog(@"%@: script code %s is listed twice\n", filename, word);
816 IDNScriptWhiteList[index] |= mask;
824 static void readIDNScriptWhiteList(void)
826 // Read white list from library.
827 NSArray *dirs = NSSearchPathForDirectoriesInDomains(NSLibraryDirectory, NSAllDomainsMask, YES);
828 int i, numDirs = [dirs count];
829 for (i = 0; i < numDirs; i++) {
830 NSString *dir = [dirs objectAtIndex:i];
831 if (readIDNScriptWhiteListFile([dir stringByAppendingPathComponent:@"IDNScriptWhiteList.txt"])) {
836 // Fall back on white list inside bundle.
837 NSBundle *bundle = [NSBundle bundleWithIdentifier:@"com.apple.WebKit"];
838 readIDNScriptWhiteListFile([bundle pathForResource:@"IDNScriptWhiteList" ofType:@"txt"]);
841 static BOOL allCharactersInIDNScriptWhiteList(const UChar *buffer, int32_t length)
843 pthread_once(&IDNScriptWhiteListFileRead, readIDNScriptWhiteList);
848 U16_NEXT(buffer, i, length, c)
849 UErrorCode error = U_ZERO_ERROR;
850 UScriptCode script = uscript_getScript(c, &error);
851 if (error != U_ZERO_ERROR) {
852 ERROR("got ICU error while trying to look at scripts: %d", error);
856 ERROR("got negative number for script code from ICU: %d", script);
859 if (script >= USCRIPT_CODE_LIMIT) {
862 size_t index = script / 32;
863 uint32_t mask = 1 << (script % 32);
864 if (!(IDNScriptWhiteList[index] & mask)) {
871 // Return value of nil means no mapping is necessary.
872 // If makeString is NO, then return value is either nil or self to indicate mapping is necessary.
873 // If makeString is YES, then return value is either nil or the mapped string.
874 - (NSString *)_web_mapHostNameWithRange:(NSRange)range encode:(BOOL)encode makeString:(BOOL)makeString
876 if (range.length > HOST_NAME_BUFFER_LENGTH) {
880 if ([self length] == 0)
883 UChar sourceBuffer[HOST_NAME_BUFFER_LENGTH];
884 UChar destinationBuffer[HOST_NAME_BUFFER_LENGTH];
886 NSString *string = self;
887 if (encode && [self rangeOfString:@"%" options:NSLiteralSearch range:range].location != NSNotFound) {
888 NSString *substring = [self substringWithRange:range];
889 substring = WebCFAutorelease(CFURLCreateStringByReplacingPercentEscapes(NULL, (CFStringRef)substring, CFSTR("")));
890 if (substring != nil) {
891 range = NSMakeRange(0, [string length]);
895 int length = range.length;
896 [string getCharacters:sourceBuffer range:range];
898 UErrorCode error = U_ZERO_ERROR;
899 int32_t numCharactersConverted = (encode ? uidna_IDNToASCII : uidna_IDNToUnicode)
900 (sourceBuffer, length, destinationBuffer, HOST_NAME_BUFFER_LENGTH, UIDNA_ALLOW_UNASSIGNED, NULL, &error);
901 if (error != U_ZERO_ERROR) {
904 if (numCharactersConverted == length && memcmp(sourceBuffer, destinationBuffer, length * sizeof(UChar)) == 0) {
907 if (!encode && !allCharactersInIDNScriptWhiteList(destinationBuffer, numCharactersConverted)) {
910 return makeString ? [NSString stringWithCharacters:destinationBuffer length:numCharactersConverted] : self;
913 - (BOOL)_web_hostNameNeedsDecodingWithRange:(NSRange)range
915 return [self _web_mapHostNameWithRange:range encode:NO makeString:NO] != nil;
918 - (BOOL)_web_hostNameNeedsEncodingWithRange:(NSRange)range
920 return [self _web_mapHostNameWithRange:range encode:YES makeString:NO] != nil;
923 - (NSString *)_web_decodeHostNameWithRange:(NSRange)range
925 return [self _web_mapHostNameWithRange:range encode:NO makeString:YES];
928 - (NSString *)_web_encodeHostNameWithRange:(NSRange)range
930 return [self _web_mapHostNameWithRange:range encode:YES makeString:YES];
933 - (NSString *)_web_decodeHostName
935 NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:NO makeString:YES];
936 return name == nil ? self : name;
939 - (NSString *)_web_encodeHostName
941 NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:YES makeString:YES];
942 return name == nil ? self : name;