URLParser: Handle \ in path according to spec
[WebKit-https.git] / Source / WebCore / platform / URLParser.cpp
1 /*
2  * Copyright (C) 2016 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "URLParser.h"
28
29 #include "Logging.h"
30 #include <array>
31 #include <unicode/uidna.h>
32 #include <wtf/HashMap.h>
33 #include <wtf/NeverDestroyed.h>
34 #include <wtf/text/StringBuilder.h>
35 #include <wtf/text/StringHash.h>
36
37 namespace WebCore {
38
39 template<typename CharacterType> static bool isC0Control(CharacterType character) { return character <= 0x0001F; }
40 template<typename CharacterType> static bool isC0ControlOrSpace(CharacterType character) { return isC0Control(character) || character == 0x0020; }
41 template<typename CharacterType> static bool isTabOrNewline(CharacterType character) { return character == 0x0009 || character == 0x000A || character == 0x000D; }
42 template<typename CharacterType> static bool isInSimpleEncodeSet(CharacterType character) { return isC0Control(character) || character > 0x007E; }
43 template<typename CharacterType> static bool isInDefaultEncodeSet(CharacterType character) { return isInSimpleEncodeSet(character) || character == 0x0020 || character == '"' || character == '#' || character == '<' || character == '>' || character == '?' || character == '`' || character == '{' || character == '}'; }
44 template<typename CharacterType> static bool isInUserInfoEncodeSet(CharacterType character) { return isInDefaultEncodeSet(character) || character == '/' || character == ':' || character == ';' || character == '=' || character == '@' || character == '[' || character == '\\' || character == ']' || character == '^' || character == '|'; }
45 template<typename CharacterType> static bool isInvalidDomainCharacter(CharacterType character) { return character == 0x0000 || character == 0x0009 || character == 0x000A || character == 0x000D || character == 0x0020 || character == '#' || character == '%' || character == '/' || character == ':' || character == '?' || character == '@' || character == '[' || character == '\\' || character == ']'; }
46     
47 static bool isWindowsDriveLetter(StringView::CodePoints::Iterator iterator, const StringView::CodePoints::Iterator& end)
48 {
49     if (iterator == end || !isASCIIAlpha(*iterator))
50         return false;
51     ++iterator;
52     if (iterator == end)
53         return false;
54     return *iterator == ':' || *iterator == '|';
55 }
56
57 static bool isWindowsDriveLetter(const StringBuilder& builder, size_t index)
58 {
59     if (builder.length() < index + 2)
60         return false;
61     return isASCIIAlpha(builder[index]) && (builder[index + 1] == ':' || builder[index + 1] == '|');
62 }
63
64 static bool shouldCopyFileURL(StringView::CodePoints::Iterator iterator, const StringView::CodePoints::Iterator end)
65 {
66     if (isWindowsDriveLetter(iterator, end))
67         return true;
68     if (iterator == end)
69         return false;
70     ++iterator;
71     if (iterator == end)
72         return true;
73     ++iterator;
74     if (iterator == end)
75         return true;
76     return *iterator != '/' && *iterator != '\\' && *iterator != '?' && *iterator != '#';
77 }
78
79 static void percentEncode(uint8_t byte, StringBuilder& builder)
80 {
81     builder.append('%');
82     builder.append(upperNibbleToASCIIHexDigit(byte));
83     builder.append(lowerNibbleToASCIIHexDigit(byte));
84 }
85
86 static void utf8PercentEncode(UChar32 codePoint, StringBuilder& builder, bool(*isInCodeSet)(UChar32))
87 {
88     if (isInCodeSet(codePoint)) {
89         uint8_t buffer[U8_MAX_LENGTH];
90         int32_t offset = 0;
91         UBool error = false;
92         U8_APPEND(buffer, offset, U8_MAX_LENGTH, codePoint, error);
93         // FIXME: Check error.
94         for (int32_t i = 0; i < offset; ++i)
95             percentEncode(buffer[i], builder);
96     } else
97         builder.append(codePoint);
98 }
99
100 static bool shouldPercentEncodeQueryByte(uint8_t byte)
101 {
102     if (byte < 0x21)
103         return true;
104     if (byte > 0x7E)
105         return true;
106     if (byte == 0x22)
107         return true;
108     if (byte == 0x23)
109         return true;
110     if (byte == 0x3C)
111         return true;
112     return byte == 0x3E;
113 }
114
115 static void encodeQuery(const StringBuilder& source, StringBuilder& destination, const TextEncoding& encoding)
116 {
117     // FIXME: It is unclear in the spec what to do when encoding fails. The behavior should be specified and tested.
118     CString encoded = encoding.encode(StringView(source.toStringPreserveCapacity()), URLEncodedEntitiesForUnencodables);
119     const char* data = encoded.data();
120     size_t length = encoded.length();
121     for (size_t i = 0; i < length; ++i) {
122         uint8_t byte = data[i];
123         if (shouldPercentEncodeQueryByte(byte))
124             percentEncode(byte, destination);
125         else
126             destination.append(byte);
127     }
128 }
129
130 static bool isDefaultPort(const String& scheme, uint16_t port)
131 {
132     static NeverDestroyed<HashMap<String, uint16_t>> defaultPorts(HashMap<String, uint16_t>({
133         {"ftp", 21},
134         {"gopher", 70},
135         {"http", 80},
136         {"https", 443},
137         {"ws", 80},
138         {"wss", 443}}));
139     return defaultPorts.get().get(scheme) == port;
140 }
141
142 static bool isSpecialScheme(StringView scheme)
143 {
144     return scheme == "ftp"
145         || scheme == "file"
146         || scheme == "gopher"
147         || scheme == "http"
148         || scheme == "https"
149         || scheme == "ws"
150         || scheme == "wss";
151 }
152
153 static StringView bufferView(const StringBuilder& builder, unsigned length)
154 {
155     ASSERT(builder.length() >= length);
156     if (builder.is8Bit())
157         return StringView(builder.characters8(), length);
158     return StringView(builder.characters16(), length);
159 }
160
161 enum class URLParser::URLPart {
162     SchemeEnd,
163     UserStart,
164     UserEnd,
165     PasswordEnd,
166     HostEnd,
167     PortEnd,
168     PathAfterLastSlash,
169     PathEnd,
170     QueryEnd,
171     FragmentEnd,
172 };
173
174 size_t URLParser::urlLengthUntilPart(const URL& url, URLPart part)
175 {
176     switch (part) {
177     case URLPart::FragmentEnd:
178         return url.m_fragmentEnd;
179     case URLPart::QueryEnd:
180         return url.m_queryEnd;
181     case URLPart::PathEnd:
182         return url.m_pathEnd;
183     case URLPart::PathAfterLastSlash:
184         return url.m_pathAfterLastSlash;
185     case URLPart::PortEnd:
186         return url.m_portEnd;
187     case URLPart::HostEnd:
188         return url.m_hostEnd;
189     case URLPart::PasswordEnd:
190         return url.m_passwordEnd;
191     case URLPart::UserEnd:
192         return url.m_userEnd;
193     case URLPart::UserStart:
194         return url.m_userStart;
195     case URLPart::SchemeEnd:
196         return url.m_schemeEnd;
197     }
198     ASSERT_NOT_REACHED();
199     return 0;
200 }
201     
202 void URLParser::copyURLPartsUntil(const URL& base, URLPart part)
203 {
204     m_buffer.clear();
205     m_buffer.append(base.m_string.substring(0, urlLengthUntilPart(base, part)));
206     switch (part) {
207     case URLPart::FragmentEnd:
208         m_url.m_fragmentEnd = base.m_fragmentEnd;
209         FALLTHROUGH;
210     case URLPart::QueryEnd:
211         m_url.m_queryEnd = base.m_queryEnd;
212         FALLTHROUGH;
213     case URLPart::PathEnd:
214         m_url.m_pathEnd = base.m_pathEnd;
215         FALLTHROUGH;
216     case URLPart::PathAfterLastSlash:
217         m_url.m_pathAfterLastSlash = base.m_pathAfterLastSlash;
218         FALLTHROUGH;
219     case URLPart::PortEnd:
220         m_url.m_portEnd = base.m_portEnd;
221         FALLTHROUGH;
222     case URLPart::HostEnd:
223         m_url.m_hostEnd = base.m_hostEnd;
224         FALLTHROUGH;
225     case URLPart::PasswordEnd:
226         m_url.m_passwordEnd = base.m_passwordEnd;
227         FALLTHROUGH;
228     case URLPart::UserEnd:
229         m_url.m_userEnd = base.m_userEnd;
230         FALLTHROUGH;
231     case URLPart::UserStart:
232         m_url.m_userStart = base.m_userStart;
233         FALLTHROUGH;
234     case URLPart::SchemeEnd:
235         m_url.m_isValid = base.m_isValid;
236         m_url.m_protocolIsInHTTPFamily = base.m_protocolIsInHTTPFamily;
237         m_url.m_schemeEnd = base.m_schemeEnd;
238     }
239     m_urlIsSpecial = isSpecialScheme(bufferView(m_buffer, m_url.m_schemeEnd));
240 }
241
242 static const char* dotASCIICode = "2e";
243
244 static bool isPercentEncodedDot(StringView::CodePoints::Iterator c, const StringView::CodePoints::Iterator& end)
245 {
246     if (c == end)
247         return false;
248     if (*c != '%')
249         return false;
250     ++c;
251     if (c == end)
252         return false;
253     if (*c != dotASCIICode[0])
254         return false;
255     ++c;
256     if (c == end)
257         return false;
258     return toASCIILower(*c) == dotASCIICode[1];
259 }
260
261 static bool isSingleDotPathSegment(StringView::CodePoints::Iterator c, const StringView::CodePoints::Iterator& end)
262 {
263     if (c == end)
264         return false;
265     if (*c == '.') {
266         ++c;
267         return c == end || *c == '/' || *c == '\\' || *c == '?' || *c == '#';
268     }
269     if (*c != '%')
270         return false;
271     ++c;
272     if (c == end || *c != dotASCIICode[0])
273         return false;
274     ++c;
275     if (c == end)
276         return false;
277     if (toASCIILower(*c) == dotASCIICode[1]) {
278         ++c;
279         return c == end || *c == '/' || *c == '\\' || *c == '?' || *c == '#';
280     }
281     return false;
282 }
283     
284 static bool isDoubleDotPathSegment(StringView::CodePoints::Iterator c, const StringView::CodePoints::Iterator& end)
285 {
286     if (c == end)
287         return false;
288     if (*c == '.') {
289         ++c;
290         return isSingleDotPathSegment(c, end);
291     }
292     if (*c != '%')
293         return false;
294     ++c;
295     if (c == end || *c != dotASCIICode[0])
296         return false;
297     ++c;
298     if (c == end)
299         return false;
300     if (toASCIILower(*c) == dotASCIICode[1]) {
301         ++c;
302         return isSingleDotPathSegment(c, end);
303     }
304     return false;
305 }
306
307 static void consumeSingleDotPathSegment(StringView::CodePoints::Iterator& c, const StringView::CodePoints::Iterator end)
308 {
309     ASSERT(isSingleDotPathSegment(c, end));
310     if (*c == '.') {
311         ++c;
312         if (c != end) {
313             if (*c == '/' || *c == '\\')
314                 ++c;
315             else
316                 ASSERT(*c == '?' || *c == '#');
317         }
318     } else {
319         ASSERT(*c == '%');
320         ++c;
321         ASSERT(*c == dotASCIICode[0]);
322         ++c;
323         ASSERT(toASCIILower(*c) == dotASCIICode[1]);
324         ++c;
325         if (c != end) {
326             if (*c == '/' || *c == '\\')
327                 ++c;
328             else
329                 ASSERT(*c == '?' || *c == '#');
330         }
331     }
332 }
333
334 static void consumeDoubleDotPathSegment(StringView::CodePoints::Iterator& c, const StringView::CodePoints::Iterator end)
335 {
336     ASSERT(isDoubleDotPathSegment(c, end));
337     if (*c == '.')
338         ++c;
339     else {
340         ASSERT(*c == '%');
341         ++c;
342         ASSERT(*c == dotASCIICode[0]);
343         ++c;
344         ASSERT(toASCIILower(*c) == dotASCIICode[1]);
345         ++c;
346     }
347     consumeSingleDotPathSegment(c, end);
348 }
349
350 void URLParser::popPath()
351 {
352     if (m_url.m_pathAfterLastSlash > m_url.m_portEnd + 1) {
353         m_url.m_pathAfterLastSlash--;
354         if (m_buffer[m_url.m_pathAfterLastSlash] == '/')
355             m_url.m_pathAfterLastSlash--;
356         while (m_url.m_pathAfterLastSlash > m_url.m_portEnd && m_buffer[m_url.m_pathAfterLastSlash] != '/')
357             m_url.m_pathAfterLastSlash--;
358         m_url.m_pathAfterLastSlash++;
359     }
360     m_buffer.resize(m_url.m_pathAfterLastSlash);
361 }
362
363 URL URLParser::failure(const String& input)
364 {
365     URL url;
366     url.m_isValid = false;
367     url.m_protocolIsInHTTPFamily = false;
368     url.m_schemeEnd = 0;
369     url.m_userStart = 0;
370     url.m_userEnd = 0;
371     url.m_passwordEnd = 0;
372     url.m_hostEnd = 0;
373     url.m_portEnd = 0;
374     url.m_pathAfterLastSlash = 0;
375     url.m_pathEnd = 0;
376     url.m_queryEnd = 0;
377     url.m_fragmentEnd = 0;
378     url.m_string = input;
379     return url;
380 }
381
382 URL URLParser::parse(const String& input, const URL& base, const TextEncoding& encoding)
383 {
384     LOG(URLParser, "Parsing URL <%s> base <%s>", input.utf8().data(), base.string().utf8().data());
385     m_url = { };
386     m_buffer.clear();
387     m_buffer.reserveCapacity(input.length());
388     
389     // FIXME: We shouldn't need to allocate another buffer for this.
390     StringBuilder queryBuffer;
391
392     auto codePoints = StringView(input).codePoints();
393     auto c = codePoints.begin();
394     auto end = codePoints.end();
395     auto authorityOrHostBegin = codePoints.begin();
396     while (c != end && isC0ControlOrSpace(*c))
397         ++c;
398     
399     enum class State : uint8_t {
400         SchemeStart,
401         Scheme,
402         NoScheme,
403         SpecialRelativeOrAuthority,
404         PathOrAuthority,
405         Relative,
406         RelativeSlash,
407         SpecialAuthoritySlashes,
408         SpecialAuthorityIgnoreSlashes,
409         AuthorityOrHost,
410         Host,
411         File,
412         FileSlash,
413         FileHost,
414         PathStart,
415         Path,
416         CannotBeABaseURLPath,
417         Query,
418         Fragment,
419     };
420
421 #define LOG_STATE(x) LOG(URLParser, "State %s, code point %c, buffer length %d", x, *c, m_buffer.length())
422 #define LOG_FINAL_STATE(x) LOG(URLParser, "Final State: %s", x)
423
424     State state = State::SchemeStart;
425     while (c != end) {
426         if (isTabOrNewline(*c)) {
427             ++c;
428             continue;
429         }
430
431         switch (state) {
432         case State::SchemeStart:
433             LOG_STATE("SchemeStart");
434             if (isASCIIAlpha(*c)) {
435                 m_buffer.append(toASCIILower(*c));
436                 ++c;
437                 state = State::Scheme;
438             } else
439                 state = State::NoScheme;
440             break;
441         case State::Scheme:
442             LOG_STATE("Scheme");
443             if (isASCIIAlphanumeric(*c) || *c == '+' || *c == '-' || *c == '.')
444                 m_buffer.append(toASCIILower(*c));
445             else if (*c == ':') {
446                 m_url.m_schemeEnd = m_buffer.length();
447                 StringView urlScheme = bufferView(m_buffer, m_url.m_schemeEnd);
448                 m_url.m_protocolIsInHTTPFamily = urlScheme == "http" || urlScheme == "https";
449                 if (urlScheme == "file") {
450                     m_urlIsSpecial = true;
451                     state = State::File;
452                     m_buffer.append(':');
453                     ++c;
454                     break;
455                 }
456                 m_buffer.append(':');
457                 if (isSpecialScheme(urlScheme)) {
458                     m_urlIsSpecial = true;
459                     if (base.protocol() == urlScheme)
460                         state = State::SpecialRelativeOrAuthority;
461                     else
462                         state = State::SpecialAuthoritySlashes;
463                 } else {
464                     m_url.m_userStart = m_buffer.length();
465                     m_url.m_userEnd = m_url.m_userStart;
466                     m_url.m_passwordEnd = m_url.m_userStart;
467                     m_url.m_hostEnd = m_url.m_userStart;
468                     m_url.m_portEnd = m_url.m_userStart;
469                     auto maybeSlash = c;
470                     ++maybeSlash;
471                     if (maybeSlash != end && *maybeSlash == '/') {
472                         m_buffer.append('/');
473                         m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
474                         state = State::PathOrAuthority;
475                         ++c;
476                         ASSERT(*c == '/');
477                     } else {
478                         m_url.m_pathAfterLastSlash = m_url.m_userStart;
479                         state = State::CannotBeABaseURLPath;
480                     }
481                     ++c;
482                     break;
483                 }
484             } else {
485                 m_buffer.clear();
486                 state = State::NoScheme;
487                 c = codePoints.begin();
488                 break;
489             }
490             ++c;
491             while (c != end && isTabOrNewline(*c))
492                 ++c;
493             if (c == end) {
494                 m_buffer.clear();
495                 state = State::NoScheme;
496                 c = codePoints.begin();
497             }
498             break;
499         case State::NoScheme:
500             LOG_STATE("NoScheme");
501             if (base.isNull()) {
502                 if (*c == '#') {
503                     copyURLPartsUntil(base, URLPart::QueryEnd);
504                     state = State::Fragment;
505                     ++c;
506                 } else
507                     return failure(input);
508             } else if (base.protocol() == "file") {
509                 copyURLPartsUntil(base, URLPart::SchemeEnd);
510                 m_buffer.append(':');
511                 state = State::File;
512             } else
513                 state = State::Relative;
514             break;
515         case State::SpecialRelativeOrAuthority:
516             LOG_STATE("SpecialRelativeOrAuthority");
517             if (*c == '/') {
518                 m_buffer.append('/');
519                 ++c;
520                 while (c != end && isTabOrNewline(*c))
521                     ++c;
522                 if (c == end)
523                     return failure(input);
524                 if (*c == '/') {
525                     m_buffer.append('/');
526                     state = State::SpecialAuthorityIgnoreSlashes;
527                     ++c;
528                 }
529             } else
530                 state = State::Relative;
531             break;
532         case State::PathOrAuthority:
533             LOG_STATE("PathOrAuthority");
534             if (*c == '/') {
535                 m_buffer.append('/');
536                 m_url.m_userStart = m_buffer.length();
537                 state = State::AuthorityOrHost;
538                 ++c;
539                 authorityOrHostBegin = c;
540             } else
541                 state = State::Path;
542             break;
543         case State::Relative:
544             LOG_STATE("Relative");
545             switch (*c) {
546             case '/':
547             case '\\':
548                 state = State::RelativeSlash;
549                 ++c;
550                 break;
551             case '?':
552                 copyURLPartsUntil(base, URLPart::PathEnd);
553                 m_buffer.append('?');
554                 state = State::Query;
555                 ++c;
556                 break;
557             case '#':
558                 copyURLPartsUntil(base, URLPart::QueryEnd);
559                 m_buffer.append('#');
560                 state = State::Fragment;
561                 ++c;
562                 break;
563             default:
564                 copyURLPartsUntil(base, URLPart::PathAfterLastSlash);
565                 state = State::Path;
566                 break;
567             }
568             break;
569         case State::RelativeSlash:
570             LOG_STATE("RelativeSlash");
571             if (*c == '/' || *c == '\\') {
572                 ++c;
573                 copyURLPartsUntil(base, URLPart::SchemeEnd);
574                 m_buffer.append("://");
575                 state = State::SpecialAuthorityIgnoreSlashes;
576             } else {
577                 copyURLPartsUntil(base, URLPart::PortEnd);
578                 m_buffer.append('/');
579                 m_url.m_pathAfterLastSlash = base.m_portEnd + 1;
580                 state = State::Path;
581             }
582             break;
583         case State::SpecialAuthoritySlashes:
584             LOG_STATE("SpecialAuthoritySlashes");
585             m_buffer.append("//");
586             if (*c == '/') {
587                 ++c;
588                 while (c != end && isTabOrNewline(*c))
589                     ++c;
590                 if (c == end)
591                     return failure(input);
592                 if (*c == '/')
593                     ++c;
594             }
595             state = State::SpecialAuthorityIgnoreSlashes;
596             break;
597         case State::SpecialAuthorityIgnoreSlashes:
598             LOG_STATE("SpecialAuthorityIgnoreSlashes");
599             if (*c == '/' || *c == '\\') {
600                 m_buffer.append('/');
601                 ++c;
602             }
603             m_url.m_userStart = m_buffer.length();
604             state = State::AuthorityOrHost;
605             authorityOrHostBegin = c;
606             break;
607         case State::AuthorityOrHost:
608             LOG_STATE("AuthorityOrHost");
609             {
610                 if (*c == '@') {
611                     parseAuthority(authorityOrHostBegin, c);
612                     ++c;
613                     while (c != end && isTabOrNewline(*c))
614                         ++c;
615                     authorityOrHostBegin = c;
616                     state = State::Host;
617                     break;
618                 }
619                 bool isSlash = *c == '/' || (m_urlIsSpecial && *c == '\\');
620                 if (isSlash || *c == '?' || *c == '#') {
621                     m_url.m_userEnd = m_buffer.length();
622                     m_url.m_passwordEnd = m_url.m_userEnd;
623                     if (!parseHost(authorityOrHostBegin, c))
624                         return failure(input);
625                     if (!isSlash) {
626                         m_buffer.append('/');
627                         m_url.m_pathAfterLastSlash = m_buffer.length();
628                     }
629                     state = State::Path;
630                     break;
631                 }
632                 ++c;
633             }
634             break;
635         case State::Host:
636             LOG_STATE("Host");
637             if (*c == '/' || *c == '?' || *c == '#') {
638                 if (!parseHost(authorityOrHostBegin, c))
639                     return failure(input);
640                 state = State::Path;
641                 break;
642             }
643             ++c;
644             break;
645         case State::File:
646             LOG_STATE("File");
647             switch (*c) {
648             case '/':
649             case '\\':
650                 m_buffer.append('/');
651                 state = State::FileSlash;
652                 ++c;
653                 break;
654             case '?':
655                 if (!base.isNull() && base.protocolIs("file"))
656                     copyURLPartsUntil(base, URLPart::PathEnd);
657                 m_buffer.append("///?");
658                 m_url.m_userStart = m_buffer.length() - 2;
659                 m_url.m_userEnd = m_url.m_userStart;
660                 m_url.m_passwordEnd = m_url.m_userStart;
661                 m_url.m_hostEnd = m_url.m_userStart;
662                 m_url.m_portEnd = m_url.m_userStart;
663                 m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
664                 m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
665                 state = State::Query;
666                 ++c;
667                 break;
668             case '#':
669                 if (!base.isNull() && base.protocolIs("file"))
670                     copyURLPartsUntil(base, URLPart::QueryEnd);
671                 m_buffer.append("///#");
672                 m_url.m_userStart = m_buffer.length() - 2;
673                 m_url.m_userEnd = m_url.m_userStart;
674                 m_url.m_passwordEnd = m_url.m_userStart;
675                 m_url.m_hostEnd = m_url.m_userStart;
676                 m_url.m_portEnd = m_url.m_userStart;
677                 m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
678                 m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
679                 m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
680                 state = State::Fragment;
681                 ++c;
682                 break;
683             default:
684                 if (!base.isNull() && base.protocolIs("file") && shouldCopyFileURL(c, end))
685                     copyURLPartsUntil(base, URLPart::PathAfterLastSlash);
686                 else {
687                     m_buffer.append("///");
688                     m_url.m_userStart = m_buffer.length() - 1;
689                     m_url.m_userEnd = m_url.m_userStart;
690                     m_url.m_passwordEnd = m_url.m_userStart;
691                     m_url.m_hostEnd = m_url.m_userStart;
692                     m_url.m_portEnd = m_url.m_userStart;
693                     m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
694                 }
695                 state = State::Path;
696                 break;
697             }
698             break;
699         case State::FileSlash:
700             LOG_STATE("FileSlash");
701             if (*c == '/' || *c == '\\') {
702                 ++c;
703                 m_buffer.append('/');
704                 m_url.m_userStart = m_buffer.length();
705                 m_url.m_userEnd = m_url.m_userStart;
706                 m_url.m_passwordEnd = m_url.m_userStart;
707                 m_url.m_hostEnd = m_url.m_userStart;
708                 m_url.m_portEnd = m_url.m_userStart;
709                 authorityOrHostBegin = c;
710                 state = State::FileHost;
711                 break;
712             }
713             if (!base.isNull() && base.protocol() == "file") {
714                 String basePath = base.path();
715                 auto basePathCodePoints = StringView(basePath).codePoints();
716                 if (basePath.length() >= 2 && isWindowsDriveLetter(basePathCodePoints.begin(), basePathCodePoints.end())) {
717                     m_buffer.append(basePath[0]);
718                     m_buffer.append(basePath[1]);
719                 }
720                 state = State::Path;
721                 break;
722             }
723             m_buffer.append("//");
724             m_url.m_userStart = m_buffer.length() - 1;
725             m_url.m_userEnd = m_url.m_userStart;
726             m_url.m_passwordEnd = m_url.m_userStart;
727             m_url.m_hostEnd = m_url.m_userStart;
728             m_url.m_portEnd = m_url.m_userStart;
729             m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
730             state = State::Path;
731             break;
732         case State::FileHost:
733             LOG_STATE("FileHost");
734             if (*c == '/' || *c == '\\' || *c == '?' || *c == '#') {
735                 if (isWindowsDriveLetter(m_buffer, m_url.m_portEnd + 1)) {
736                     state = State::Path;
737                     break;
738                 }
739                 if (authorityOrHostBegin == c) {
740                     ASSERT(m_buffer[m_buffer.length() - 1] == '/');
741                     if (*c == '?') {
742                         m_buffer.append("/?");
743                         m_url.m_pathAfterLastSlash = m_buffer.length() - 1;
744                         m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
745                         state = State::Query;
746                         ++c;
747                         break;
748                     }
749                     if (*c == '#') {
750                         m_buffer.append("/#");
751                         m_url.m_pathAfterLastSlash = m_buffer.length() - 1;
752                         m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
753                         m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
754                         state = State::Fragment;
755                         ++c;
756                         break;
757                     }
758                     state = State::Path;
759                     break;
760                 }
761                 if (!parseHost(authorityOrHostBegin, c))
762                     return failure(input);
763                 
764                 // FIXME: Don't allocate a new string for this comparison.
765                 if (m_buffer.toString().substring(m_url.m_passwordEnd) == "localhost")  {
766                     m_buffer.resize(m_url.m_passwordEnd);
767                     m_url.m_hostEnd = m_buffer.length();
768                     m_url.m_portEnd = m_url.m_hostEnd;
769                 }
770                 
771                 state = State::PathStart;
772                 break;
773             }
774             ++c;
775             break;
776         case State::PathStart:
777             LOG_STATE("PathStart");
778             if (*c != '/' && *c != '\\')
779                 ++c;
780             state = State::Path;
781             break;
782         case State::Path:
783             LOG_STATE("Path");
784             if (*c == '/' || (m_urlIsSpecial && *c == '\\')) {
785                 m_buffer.append('/');
786                 m_url.m_pathAfterLastSlash = m_buffer.length();
787                 ++c;
788                 break;
789             }
790             if (m_buffer.length() && m_buffer[m_buffer.length() - 1] == '/') {
791                 if (isDoubleDotPathSegment(c, end)) {
792                     consumeDoubleDotPathSegment(c, end);
793                     popPath();
794                     break;
795                 }
796                 if (m_buffer[m_buffer.length() - 1] == '/' && isSingleDotPathSegment(c, end)) {
797                     consumeSingleDotPathSegment(c, end);
798                     break;
799                 }
800             }
801             if (*c == '?') {
802                 m_url.m_pathEnd = m_buffer.length();
803                 state = State::Query;
804                 break;
805             }
806             if (*c == '#') {
807                 m_url.m_pathEnd = m_buffer.length();
808                 m_url.m_queryEnd = m_url.m_pathEnd;
809                 state = State::Fragment;
810                 break;
811             }
812             if (isPercentEncodedDot(c, end)) {
813                 m_buffer.append('.');
814                 ASSERT(*c == '%');
815                 ++c;
816                 ASSERT(*c == dotASCIICode[0]);
817                 ++c;
818                 ASSERT(toASCIILower(*c) == dotASCIICode[1]);
819                 ++c;
820                 break;
821             }
822             utf8PercentEncode(*c, m_buffer, isInDefaultEncodeSet);
823             ++c;
824             break;
825         case State::CannotBeABaseURLPath:
826             LOG_STATE("CannotBeABaseURLPath");
827             if (*c == '?') {
828                 m_url.m_pathEnd = m_buffer.length();
829                 state = State::Query;
830             } else if (*c == '#') {
831                 m_url.m_pathEnd = m_buffer.length();
832                 m_url.m_queryEnd = m_url.m_pathEnd;
833                 state = State::Fragment;
834             } else {
835                 m_buffer.append(*c);
836                 ++c;
837             }
838             break;
839         case State::Query:
840             LOG_STATE("Query");
841             if (*c == '#') {
842                 encodeQuery(queryBuffer, m_buffer, encoding);
843                 m_url.m_queryEnd = m_buffer.length();
844                 state = State::Fragment;
845                 break;
846             }
847             queryBuffer.append(*c);
848             ++c;
849             break;
850         case State::Fragment:
851             LOG_STATE("Fragment");
852             m_buffer.append(*c);
853             ++c;
854             break;
855         }
856     }
857
858     switch (state) {
859     case State::SchemeStart:
860         LOG_FINAL_STATE("SchemeStart");
861         if (!m_buffer.length() && !base.isNull())
862             return base;
863         return failure(input);
864     case State::Scheme:
865         LOG_FINAL_STATE("Scheme");
866         break;
867     case State::NoScheme:
868         LOG_FINAL_STATE("NoScheme");
869         break;
870     case State::SpecialRelativeOrAuthority:
871         LOG_FINAL_STATE("SpecialRelativeOrAuthority");
872         break;
873     case State::PathOrAuthority:
874         LOG_FINAL_STATE("PathOrAuthority");
875         break;
876     case State::Relative:
877         LOG_FINAL_STATE("Relative");
878         copyURLPartsUntil(base, URLPart::FragmentEnd);
879         break;
880     case State::RelativeSlash:
881         LOG_FINAL_STATE("RelativeSlash");
882         copyURLPartsUntil(base, URLPart::PortEnd);
883         m_buffer.append('/');
884         m_url.m_pathAfterLastSlash = base.m_portEnd + 1;
885         m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
886         m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
887         m_url.m_fragmentEnd = m_url.m_pathAfterLastSlash;
888         break;
889     case State::SpecialAuthoritySlashes:
890         LOG_FINAL_STATE("SpecialAuthoritySlashes");
891         break;
892     case State::SpecialAuthorityIgnoreSlashes:
893         LOG_FINAL_STATE("SpecialAuthorityIgnoreSlashes");
894         return failure(input);
895     case State::AuthorityOrHost:
896         LOG_FINAL_STATE("AuthorityOrHost");
897         m_url.m_userEnd = m_buffer.length();
898         m_url.m_passwordEnd = m_url.m_userEnd;
899         FALLTHROUGH;
900     case State::Host:
901         if (state == State::Host)
902             LOG_FINAL_STATE("Host");
903         if (!parseHost(authorityOrHostBegin, end))
904             return failure(input);
905         m_buffer.append('/');
906         m_url.m_pathEnd = m_url.m_portEnd + 1;
907         m_url.m_pathAfterLastSlash = m_url.m_pathEnd;
908         m_url.m_queryEnd = m_url.m_pathEnd;
909         m_url.m_fragmentEnd = m_url.m_pathEnd;
910         break;
911     case State::File:
912         LOG_FINAL_STATE("File");
913         if (!base.isNull() && base.protocol() == "file") {
914             copyURLPartsUntil(base, URLPart::QueryEnd);
915             m_buffer.append(':');
916         }
917         m_buffer.append("///");
918         m_url.m_userStart = m_buffer.length() - 1;
919         m_url.m_userEnd = m_url.m_userStart;
920         m_url.m_passwordEnd = m_url.m_userStart;
921         m_url.m_hostEnd = m_url.m_userStart;
922         m_url.m_portEnd = m_url.m_userStart;
923         m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
924         m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
925         m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
926         m_url.m_fragmentEnd = m_url.m_pathAfterLastSlash;
927         break;
928     case State::FileSlash:
929         LOG_FINAL_STATE("FileSlash");
930         m_buffer.append("//");
931         m_url.m_userStart = m_buffer.length() - 1;
932         m_url.m_userEnd = m_url.m_userStart;
933         m_url.m_passwordEnd = m_url.m_userStart;
934         m_url.m_hostEnd = m_url.m_userStart;
935         m_url.m_portEnd = m_url.m_userStart;
936         m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
937         m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
938         m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
939         m_url.m_fragmentEnd = m_url.m_pathAfterLastSlash;
940         break;
941     case State::FileHost:
942         LOG_FINAL_STATE("FileHost");
943         if (authorityOrHostBegin == c) {
944             m_buffer.append('/');
945             m_url.m_userStart = m_buffer.length() - 1;
946             m_url.m_userEnd = m_url.m_userStart;
947             m_url.m_passwordEnd = m_url.m_userStart;
948             m_url.m_hostEnd = m_url.m_userStart;
949             m_url.m_portEnd = m_url.m_userStart;
950             m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
951             m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
952             m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
953             m_url.m_fragmentEnd = m_url.m_pathAfterLastSlash;
954             break;
955         }
956
957         m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
958         m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
959         m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
960         m_url.m_fragmentEnd = m_url.m_pathAfterLastSlash;
961         if (!parseHost(authorityOrHostBegin, c))
962             return failure(input);
963         
964         // FIXME: Don't allocate a new string for this comparison.
965         if (m_buffer.toString().substring(m_url.m_passwordEnd) == "localhost")  {
966             m_buffer.resize(m_url.m_passwordEnd);
967             m_url.m_hostEnd = m_buffer.length();
968             m_url.m_portEnd = m_url.m_hostEnd;
969             m_buffer.append('/');
970             m_url.m_pathAfterLastSlash = m_url.m_hostEnd + 1;
971             m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
972             m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
973             m_url.m_fragmentEnd = m_url.m_pathAfterLastSlash;
974         }
975         break;
976     case State::PathStart:
977         LOG_FINAL_STATE("PathStart");
978         break;
979     case State::Path:
980         LOG_FINAL_STATE("Path");
981         m_url.m_pathEnd = m_buffer.length();
982         m_url.m_queryEnd = m_url.m_pathEnd;
983         m_url.m_fragmentEnd = m_url.m_pathEnd;
984         break;
985     case State::CannotBeABaseURLPath:
986         LOG_FINAL_STATE("CannotBeABaseURLPath");
987         m_url.m_pathEnd = m_buffer.length();
988         m_url.m_queryEnd = m_url.m_pathEnd;
989         m_url.m_fragmentEnd = m_url.m_pathEnd;
990         break;
991     case State::Query:
992         LOG_FINAL_STATE("Query");
993         encodeQuery(queryBuffer, m_buffer, encoding);
994         m_url.m_queryEnd = m_buffer.length();
995         m_url.m_fragmentEnd = m_url.m_queryEnd;
996         break;
997     case State::Fragment:
998         LOG_FINAL_STATE("Fragment");
999         m_url.m_fragmentEnd = m_buffer.length();
1000         break;
1001     }
1002
1003     m_url.m_string = m_buffer.toString();
1004     m_url.m_isValid = true;
1005     LOG(URLParser, "Parsed URL <%s>", m_url.m_string.utf8().data());
1006     return m_url;
1007 }
1008
1009 void URLParser::parseAuthority(StringView::CodePoints::Iterator& iterator, const StringView::CodePoints::Iterator& end)
1010 {
1011     if (iterator == end) {
1012         m_url.m_userEnd = m_buffer.length();
1013         m_url.m_passwordEnd = m_url.m_userEnd;
1014         return;
1015     }
1016     for (; iterator != end; ++iterator) {
1017         if (*iterator == ':') {
1018             ++iterator;
1019             m_url.m_userEnd = m_buffer.length();
1020             if (iterator == end) {
1021                 m_url.m_passwordEnd = m_url.m_userEnd;
1022                 if (m_url.m_userEnd > m_url.m_userStart)
1023                     m_buffer.append('@');
1024                 return;
1025             }
1026             m_buffer.append(':');
1027             break;
1028         }
1029         m_buffer.append(*iterator);
1030     }
1031     for (; iterator != end; ++iterator)
1032         m_buffer.append(*iterator);
1033     m_url.m_passwordEnd = m_buffer.length();
1034     if (!m_url.m_userEnd)
1035         m_url.m_userEnd = m_url.m_passwordEnd;
1036     m_buffer.append('@');
1037 }
1038
1039 static void serializeIPv4(uint32_t address, StringBuilder& buffer)
1040 {
1041     buffer.appendNumber(address >> 24);
1042     buffer.append('.');
1043     buffer.appendNumber((address >> 16) & 0xFF);
1044     buffer.append('.');
1045     buffer.appendNumber((address >> 8) & 0xFF);
1046     buffer.append('.');
1047     buffer.appendNumber(address & 0xFF);
1048 }
1049     
1050 static size_t zeroSequenceLength(const std::array<uint16_t, 8>& address, size_t begin)
1051 {
1052     size_t end = begin;
1053     for (; end < 8; end++) {
1054         if (address[end])
1055             break;
1056     }
1057     return end - begin;
1058 }
1059
1060 static Optional<size_t> findLongestZeroSequence(const std::array<uint16_t, 8>& address)
1061 {
1062     Optional<size_t> longest;
1063     size_t longestLength = 0;
1064     for (size_t i = 0; i < 8; i++) {
1065         size_t length = zeroSequenceLength(address, i);
1066         if (length) {
1067             if (length > 1 && (!longest || longestLength < length)) {
1068                 longest = i;
1069                 longestLength = length;
1070             }
1071             i += length;
1072         }
1073     }
1074     return longest;
1075 }
1076     
1077 static void serializeIPv6Piece(uint16_t piece, StringBuilder& buffer)
1078 {
1079     bool printed = false;
1080     if (auto nibble0 = piece >> 12) {
1081         buffer.append(lowerNibbleToLowercaseASCIIHexDigit(nibble0));
1082         printed = true;
1083     }
1084     auto nibble1 = piece >> 8 & 0xF;
1085     if (printed || nibble1) {
1086         buffer.append(lowerNibbleToLowercaseASCIIHexDigit(nibble1));
1087         printed = true;
1088     }
1089     auto nibble2 = piece >> 4 & 0xF;
1090     if (printed || nibble2)
1091         buffer.append(lowerNibbleToLowercaseASCIIHexDigit(nibble2));
1092     buffer.append(lowerNibbleToLowercaseASCIIHexDigit(piece & 0xF));
1093 }
1094
1095 static void serializeIPv6(std::array<uint16_t, 8> address, StringBuilder& buffer)
1096 {
1097     buffer.append('[');
1098     auto compressPointer = findLongestZeroSequence(address);
1099     for (size_t piece = 0; piece < 8; piece++) {
1100         if (compressPointer && compressPointer.value() == piece) {
1101             ASSERT(!address[piece]);
1102             if (piece)
1103                 buffer.append(':');
1104             else
1105                 buffer.append("::");
1106             while (piece < 8 && !address[piece])
1107                 piece++;
1108             if (piece == 8)
1109                 break;
1110         }
1111         serializeIPv6Piece(address[piece], buffer);
1112         if (piece < 7)
1113             buffer.append(':');
1114     }
1115     buffer.append(']');
1116 }
1117
1118 static Optional<uint32_t> parseIPv4Number(StringView::CodePoints::Iterator& iterator, const StringView::CodePoints::Iterator& end)
1119 {
1120     // FIXME: Check for overflow.
1121     enum class State : uint8_t {
1122         UnknownBase,
1123         Decimal,
1124         OctalOrHex,
1125         Octal,
1126         Hex,
1127     };
1128     State state = State::UnknownBase;
1129     uint32_t value = 0;
1130     while (iterator != end) {
1131         if (*iterator == '.') {
1132             ++iterator;
1133             return value;
1134         }
1135         switch (state) {
1136         case State::UnknownBase:
1137             if (*iterator == '0') {
1138                 ++iterator;
1139                 state = State::OctalOrHex;
1140                 break;
1141             }
1142             state = State::Decimal;
1143             break;
1144         case State::OctalOrHex:
1145             if (*iterator == 'x' || *iterator == 'X') {
1146                 ++iterator;
1147                 state = State::Hex;
1148                 break;
1149             }
1150             state = State::Octal;
1151             break;
1152         case State::Decimal:
1153             if (*iterator < '0' || *iterator > '9')
1154                 return Nullopt;
1155             value *= 10;
1156             value += *iterator - '0';
1157             ++iterator;
1158             break;
1159         case State::Octal:
1160             if (*iterator < '0' || *iterator > '7')
1161                 return Nullopt;
1162             value *= 8;
1163             value += *iterator - '0';
1164             ++iterator;
1165             break;
1166         case State::Hex:
1167             if (!isASCIIHexDigit(*iterator))
1168                 return Nullopt;
1169             value *= 16;
1170             value += toASCIIHexValue(*iterator);
1171             ++iterator;
1172             break;
1173         }
1174     }
1175     return value;
1176 }
1177
1178 static uint64_t pow256(size_t exponent)
1179 {
1180     RELEASE_ASSERT(exponent <= 4);
1181     uint64_t values[5] = {1, 256, 256 * 256, 256 * 256 * 256, 256ull * 256 * 256 * 256 };
1182     return values[exponent];
1183 }
1184
1185 static Optional<uint32_t> parseIPv4Host(StringView::CodePoints::Iterator iterator, const StringView::CodePoints::Iterator& end)
1186 {
1187     Vector<uint32_t, 4> items;
1188     items.reserveInitialCapacity(4);
1189     while (iterator != end) {
1190         if (items.size() >= 4)
1191             return Nullopt;
1192         if (auto item = parseIPv4Number(iterator, end))
1193             items.append(item.value());
1194         else
1195             return Nullopt;
1196     }
1197     if (!items.size() || items.size() > 4)
1198         return Nullopt;
1199     for (size_t i = 0; i < items.size() - 2; i++) {
1200         if (items[i] > 255)
1201             return Nullopt;
1202     }
1203     if (items[items.size() - 1] >= pow256(5 - items.size()))
1204         return Nullopt;
1205     for (auto item : items) {
1206         if (item > 255)
1207             return Nullopt;
1208     }
1209     uint32_t ipv4 = items.takeLast();
1210     for (size_t counter = 0; counter < items.size(); ++counter)
1211         ipv4 += items[counter] * pow256(3 - counter);
1212     return ipv4;
1213 }
1214
1215 static Optional<std::array<uint16_t, 8>> parseIPv6Host(StringView::CodePoints::Iterator c, StringView::CodePoints::Iterator end)
1216 {
1217     if (c == end)
1218         return Nullopt;
1219
1220     std::array<uint16_t, 8> address = {{0, 0, 0, 0, 0, 0, 0, 0}};
1221     size_t piecePointer = 0;
1222     Optional<size_t> compressPointer;
1223
1224     if (*c == ':') {
1225         ++c;
1226         if (c == end)
1227             return Nullopt;
1228         if (*c != ':')
1229             return Nullopt;
1230         ++c;
1231         ++piecePointer;
1232         compressPointer = piecePointer;
1233     }
1234     
1235     while (c != end) {
1236         if (piecePointer == 8)
1237             return Nullopt;
1238         if (*c == ':') {
1239             if (compressPointer)
1240                 return Nullopt;
1241             ++c;
1242             ++piecePointer;
1243             compressPointer = piecePointer;
1244             continue;
1245         }
1246         uint16_t value = 0;
1247         for (size_t length = 0; length < 4; length++) {
1248             if (c == end)
1249                 break;
1250             if (!isASCIIHexDigit(*c))
1251                 break;
1252             value = value * 0x10 + toASCIIHexValue(*c);
1253             ++c;
1254         }
1255         address[piecePointer++] = value;
1256         if (c == end)
1257             break;
1258         if (*c != ':')
1259             return Nullopt;
1260         ++c;
1261     }
1262     
1263     if (c != end) {
1264         if (piecePointer > 6)
1265             return Nullopt;
1266         size_t dotsSeen = 0;
1267         while (c != end) {
1268             Optional<uint16_t> value;
1269             if (!isASCIIDigit(*c))
1270                 return Nullopt;
1271             while (isASCIIDigit(*c)) {
1272                 auto number = *c - '0';
1273                 if (!value)
1274                     value = number;
1275                 else if (!value.value())
1276                     return Nullopt;
1277                 else
1278                     value = value.value() * 10 + number;
1279                 ++c;
1280                 if (c == end)
1281                     return Nullopt;
1282                 if (value.value() > 255)
1283                     return Nullopt;
1284             }
1285             if (dotsSeen < 3 && *c != '.')
1286                 return Nullopt;
1287             address[piecePointer] = address[piecePointer] * 0x100 + value.valueOr(0);
1288             if (dotsSeen == 1 || dotsSeen == 3)
1289                 piecePointer++;
1290             if (c != end)
1291                 ++c;
1292             if (dotsSeen == 3 && c != end)
1293                 return Nullopt;
1294             dotsSeen++;
1295         }
1296     }
1297     if (compressPointer) {
1298         size_t swaps = piecePointer - compressPointer.value();
1299         piecePointer = 7;
1300         while (swaps)
1301             std::swap(address[piecePointer--], address[compressPointer.value() + swaps-- - 1]);
1302     } else if (piecePointer != 8)
1303         return Nullopt;
1304     return address;
1305 }
1306
1307 static String percentDecode(const String& input)
1308 {
1309     StringBuilder output;
1310     RELEASE_ASSERT(input.is8Bit());
1311     const LChar* inputBytes = input.characters8();
1312     size_t length = input.length();
1313     
1314     for (size_t i = 0; i < length; ++i) {
1315         uint8_t byte = inputBytes[i];
1316         if (byte != '%')
1317             output.append(byte);
1318         else if (i < length - 2) {
1319             if (isASCIIHexDigit(inputBytes[i + 1]) && isASCIIHexDigit(inputBytes[i + 2])) {
1320                 output.append(toASCIIHexValue(inputBytes[i + 1], inputBytes[i + 2]));
1321                 i += 2;
1322             } else
1323                 output.append(byte);
1324         } else
1325             output.append(byte);
1326     }
1327     return output.toStringPreserveCapacity();
1328 }
1329
1330 static bool containsOnlyASCII(const String& string)
1331 {
1332     if (string.is8Bit())
1333         return charactersAreAllASCII(string.characters8(), string.length());
1334     return charactersAreAllASCII(string.characters16(), string.length());
1335 }
1336
1337 static Optional<String> domainToASCII(const String& domain)
1338 {
1339     const unsigned hostnameBufferLength = 2048;
1340
1341     if (containsOnlyASCII(domain)) {
1342         if (domain.is8Bit())
1343             return domain;
1344         Vector<LChar, hostnameBufferLength> buffer;
1345         size_t length = domain.length();
1346         buffer.reserveInitialCapacity(length);
1347         for (size_t i = 0; i < length; ++i)
1348             buffer.append(domain[i]);
1349         return String(buffer.data(), length);
1350     }
1351     
1352     UChar hostnameBuffer[hostnameBufferLength];
1353     UErrorCode error = U_ZERO_ERROR;
1354     
1355     int32_t numCharactersConverted = uidna_IDNToASCII(StringView(domain).upconvertedCharacters(), domain.length(), hostnameBuffer, hostnameBufferLength, UIDNA_ALLOW_UNASSIGNED, nullptr, &error);
1356
1357     if (error == U_ZERO_ERROR) {
1358         LChar buffer[hostnameBufferLength];
1359         for (int32_t i = 0; i < numCharactersConverted; ++i) {
1360             ASSERT(isASCII(hostnameBuffer[i]));
1361             buffer[i] = hostnameBuffer[i];
1362         }
1363         return String(buffer, numCharactersConverted);
1364     }
1365
1366     // FIXME: Check for U_BUFFER_OVERFLOW_ERROR and retry with an allocated buffer.
1367     return Nullopt;
1368 }
1369
1370 static bool hasInvalidDomainCharacter(const String& asciiDomain)
1371 {
1372     RELEASE_ASSERT(asciiDomain.is8Bit());
1373     const LChar* characters = asciiDomain.characters8();
1374     for (size_t i = 0; i < asciiDomain.length(); ++i) {
1375         if (isInvalidDomainCharacter(characters[i]))
1376             return true;
1377     }
1378     return false;
1379 }
1380
1381 bool URLParser::parsePort(StringView::CodePoints::Iterator& iterator, const StringView::CodePoints::Iterator& end)
1382 {
1383     uint32_t port = 0;
1384     if (iterator == end) {
1385         m_url.m_portEnd = m_buffer.length();
1386         return true;
1387     }
1388     m_buffer.append(':');
1389     for (; iterator != end; ++iterator) {
1390         if (isTabOrNewline(*iterator))
1391             continue;
1392         if (isASCIIDigit(*iterator)) {
1393             port = port * 10 + *iterator - '0';
1394             if (port > std::numeric_limits<uint16_t>::max())
1395                 return false;
1396         } else
1397             return false;
1398     }
1399     
1400     // FIXME: This shouldn't need a String allocation.
1401     String scheme = m_buffer.toStringPreserveCapacity().substring(0, m_url.m_schemeEnd);
1402     if (isDefaultPort(scheme, port)) {
1403         ASSERT(m_buffer[m_buffer.length() - 1] == ':');
1404         m_buffer.resize(m_buffer.length() - 1);
1405     } else
1406         m_buffer.appendNumber(port);
1407
1408     m_url.m_portEnd = m_buffer.length();
1409     return true;
1410 }
1411
1412 bool URLParser::parseHost(StringView::CodePoints::Iterator& iterator, const StringView::CodePoints::Iterator& end)
1413 {
1414     if (iterator == end)
1415         return false;
1416     if (*iterator == '[') {
1417         ++iterator;
1418         auto ipv6End = iterator;
1419         while (ipv6End != end && *ipv6End != ']')
1420             ++ipv6End;
1421         if (auto address = parseIPv6Host(iterator, ipv6End)) {
1422             serializeIPv6(address.value(), m_buffer);
1423             m_url.m_hostEnd = m_buffer.length();
1424             if (ipv6End != end) {
1425                 ++ipv6End;
1426                 if (ipv6End != end && *ipv6End == ':') {
1427                     ++ipv6End;
1428                     return parsePort(ipv6End, end);
1429                 }
1430                 m_url.m_portEnd = m_buffer.length();
1431                 return true;
1432             }
1433             return true;
1434         }
1435     }
1436
1437     // FIXME: We probably don't need to make so many buffers and String copies.
1438     StringBuilder utf8Encoded;
1439     for (; iterator != end; ++iterator) {
1440         if (isTabOrNewline(*iterator))
1441             continue;
1442         if (*iterator == ':')
1443             break;
1444         uint8_t buffer[U8_MAX_LENGTH];
1445         int32_t offset = 0;
1446         UBool error = false;
1447         U8_APPEND(buffer, offset, U8_MAX_LENGTH, *iterator, error);
1448         // FIXME: Check error.
1449         utf8Encoded.append(buffer, offset);
1450     }
1451     String percentDecoded = percentDecode(utf8Encoded.toStringPreserveCapacity());
1452     RELEASE_ASSERT(percentDecoded.is8Bit());
1453     String domain = String::fromUTF8(percentDecoded.characters8(), percentDecoded.length());
1454     auto asciiDomain = domainToASCII(domain);
1455     if (!asciiDomain || hasInvalidDomainCharacter(asciiDomain.value()))
1456         return false;
1457     
1458     auto asciiDomainCodePoints = StringView(asciiDomain.value()).codePoints();
1459     if (auto address = parseIPv4Host(asciiDomainCodePoints.begin(), asciiDomainCodePoints.end())) {
1460         serializeIPv4(address.value(), m_buffer);
1461         m_url.m_hostEnd = m_buffer.length();
1462         if (iterator == end) {
1463             m_url.m_portEnd = m_buffer.length();
1464             return true;
1465         }
1466         ++iterator;
1467         return parsePort(iterator, end);
1468     }
1469     
1470     m_buffer.append(asciiDomain.value());
1471     m_url.m_hostEnd = m_buffer.length();
1472     if (iterator != end) {
1473         ASSERT(*iterator == ':');
1474         ++iterator;
1475         while (iterator != end && isTabOrNewline(*iterator))
1476             ++iterator;
1477         return parsePort(iterator, end);
1478     }
1479     m_url.m_portEnd = m_buffer.length();
1480     return true;
1481 }
1482
1483 bool URLParser::allValuesEqual(const URL& a, const URL& b)
1484 {
1485     LOG(URLParser, "%d %d %d %d %d %d %d %d %d %d %d %d %s\n%d %d %d %d %d %d %d %d %d %d %d %d %s",
1486         a.m_isValid,
1487         a.m_protocolIsInHTTPFamily,
1488         a.m_schemeEnd,
1489         a.m_userStart,
1490         a.m_userEnd,
1491         a.m_passwordEnd,
1492         a.m_hostEnd,
1493         a.m_portEnd,
1494         a.m_pathAfterLastSlash,
1495         a.m_pathEnd,
1496         a.m_queryEnd,
1497         a.m_fragmentEnd,
1498         a.m_string.utf8().data(),
1499         b.m_isValid,
1500         b.m_protocolIsInHTTPFamily,
1501         b.m_schemeEnd,
1502         b.m_userStart,
1503         b.m_userEnd,
1504         b.m_passwordEnd,
1505         b.m_hostEnd,
1506         b.m_portEnd,
1507         b.m_pathAfterLastSlash,
1508         b.m_pathEnd,
1509         b.m_queryEnd,
1510         b.m_fragmentEnd,
1511         b.m_string.utf8().data());
1512
1513     return a.m_string == b.m_string
1514         && a.m_isValid == b.m_isValid
1515         && a.m_protocolIsInHTTPFamily == b.m_protocolIsInHTTPFamily
1516         && a.m_schemeEnd == b.m_schemeEnd
1517         && a.m_userStart == b.m_userStart
1518         && a.m_userEnd == b.m_userEnd
1519         && a.m_passwordEnd == b.m_passwordEnd
1520         && a.m_hostEnd == b.m_hostEnd
1521         && a.m_portEnd == b.m_portEnd
1522         && a.m_pathAfterLastSlash == b.m_pathAfterLastSlash
1523         && a.m_pathEnd == b.m_pathEnd
1524         && a.m_queryEnd == b.m_queryEnd
1525         && a.m_fragmentEnd == b.m_fragmentEnd;
1526 }
1527
1528 static bool urlParserEnabled = false;
1529
1530 void URLParser::setEnabled(bool enabled)
1531 {
1532     urlParserEnabled = enabled;
1533 }
1534
1535 bool URLParser::enabled()
1536 {
1537     return urlParserEnabled;
1538 }
1539
1540 } // namespace WebCore