2 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile Inc. http://www.torchmobile.com/
5 * Copyright (C) 2009 Google Inc. All rights reserved.
6 * Copyright (C) 2011 Apple Inc. All Rights Reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of Apple Inc. ("Apple") nor the names of
18 * its contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
22 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
25 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include "HTTPParsers.h"
36 #include "ContentSecurityPolicy.h"
37 #include <wtf/DateMath.h>
38 #include <wtf/text/CString.h>
39 #include <wtf/text/StringBuilder.h>
40 #include <wtf/text/WTFString.h>
41 #include <wtf/unicode/CharacterNames.h>
47 // true if there is more to parse, after incrementing pos past whitespace.
48 // Note: Might return pos == str.length()
49 static inline bool skipWhiteSpace(const String& str, unsigned& pos, bool fromHttpEquivMeta)
51 unsigned len = str.length();
53 if (fromHttpEquivMeta) {
54 while (pos < len && str[pos] <= ' ')
57 while (pos < len && (str[pos] == '\t' || str[pos] == ' '))
64 // Returns true if the function can match the whole token (case insensitive)
65 // incrementing pos on match, otherwise leaving pos unchanged.
66 // Note: Might return pos == str.length()
67 static inline bool skipToken(const String& str, unsigned& pos, const char* token)
69 unsigned len = str.length();
70 unsigned current = pos;
72 while (current < len && *token) {
73 if (toASCIILower(str[current]) != *token++)
85 // True if the expected equals sign is seen and there is more to follow.
86 static inline bool skipEquals(const String& str, unsigned &pos)
88 return skipWhiteSpace(str, pos, false) && str[pos++] == '=' && skipWhiteSpace(str, pos, false);
91 // True if a value present, incrementing pos to next space or semicolon, if any.
92 // Note: might return pos == str.length().
93 static inline bool skipValue(const String& str, unsigned& pos)
96 unsigned len = str.length();
98 if (str[pos] == ' ' || str[pos] == '\t' || str[pos] == ';')
105 // See RFC 7230, Section 3.2.3.
106 bool isValidHTTPHeaderValue(const String& value)
109 if (c == ' ' || c == '\t')
111 c = value[value.length() - 1];
112 if (c == ' ' || c == '\t')
114 for (unsigned i = 0; i < value.length(); ++i) {
116 if (c == 0x7F || c > 0xFF || (c < 0x20 && c != '\t'))
122 // See RFC 7230, Section 3.2.6.
123 bool isValidHTTPToken(const String& value)
127 for (unsigned i = 0; i < value.length(); ++i) {
129 if (c <= 0x20 || c >= 0x7F
130 || c == '(' || c == ')' || c == '<' || c == '>' || c == '@'
131 || c == ',' || c == ';' || c == ':' || c == '\\' || c == '"'
132 || c == '/' || c == '[' || c == ']' || c == '?' || c == '='
133 || c == '{' || c == '}')
139 static const size_t maxInputSampleSize = 128;
140 static String trimInputSample(const char* p, size_t length)
142 String s = String(p, std::min<size_t>(length, maxInputSampleSize));
143 if (length > maxInputSampleSize)
144 s.append(horizontalEllipsis);
148 ContentDispositionType contentDispositionType(const String& contentDisposition)
150 if (contentDisposition.isEmpty())
151 return ContentDispositionNone;
153 Vector<String> parameters;
154 contentDisposition.split(';', parameters);
156 String dispositionType = parameters[0];
157 dispositionType.stripWhiteSpace();
159 if (equalIgnoringCase(dispositionType, "inline"))
160 return ContentDispositionInline;
162 // Some broken sites just send bogus headers like
164 // Content-Disposition: ; filename="file"
165 // Content-Disposition: filename="file"
166 // Content-Disposition: name="file"
168 // without a disposition token... screen those out.
169 if (!isValidHTTPToken(dispositionType))
170 return ContentDispositionNone;
172 // We have a content-disposition of "attachment" or unknown.
173 // RFC 2183, section 2.8 says that an unknown disposition
174 // value should be treated as "attachment"
175 return ContentDispositionAttachment;
178 bool parseHTTPRefresh(const String& refresh, bool fromHttpEquivMeta, double& delay, String& url)
180 unsigned len = refresh.length();
183 if (!skipWhiteSpace(refresh, pos, fromHttpEquivMeta))
186 while (pos != len && refresh[pos] != ',' && refresh[pos] != ';')
189 if (pos == len) { // no URL
192 delay = refresh.stripWhiteSpace().toDouble(&ok);
196 delay = refresh.left(pos).stripWhiteSpace().toDouble(&ok);
201 skipWhiteSpace(refresh, pos, fromHttpEquivMeta);
202 unsigned urlStartPos = pos;
203 if (refresh.find("url", urlStartPos, false) == urlStartPos) {
205 skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
206 if (refresh[urlStartPos] == '=') {
208 skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
210 urlStartPos = pos; // e.g. "Refresh: 0; url.html"
213 unsigned urlEndPos = len;
215 if (refresh[urlStartPos] == '"' || refresh[urlStartPos] == '\'') {
216 UChar quotationMark = refresh[urlStartPos];
218 while (urlEndPos > urlStartPos) {
220 if (refresh[urlEndPos] == quotationMark)
224 // https://bugs.webkit.org/show_bug.cgi?id=27868
225 // Sometimes there is no closing quote for the end of the URL even though there was an opening quote.
226 // If we looped over the entire alleged URL string back to the opening quote, just go ahead and use everything
227 // after the opening quote instead.
228 if (urlEndPos == urlStartPos)
232 url = refresh.substring(urlStartPos, urlEndPos - urlStartPos).stripWhiteSpace();
237 double parseDate(const String& value)
239 return parseDateFromNullTerminatedCharacters(value.utf8().data());
242 // FIXME: This function doesn't comply with RFC 6266.
243 // For example, this function doesn't handle the interaction between " and ;
244 // that arises from quoted-string, nor does this function properly unquote
245 // attribute values. Further this function appears to process parameter names
246 // in a case-sensitive manner. (There are likely other bugs as well.)
247 String filenameFromHTTPContentDisposition(const String& value)
249 Vector<String> keyValuePairs;
250 value.split(';', keyValuePairs);
252 unsigned length = keyValuePairs.size();
253 for (unsigned i = 0; i < length; i++) {
254 size_t valueStartPos = keyValuePairs[i].find('=');
255 if (valueStartPos == notFound)
258 String key = keyValuePairs[i].left(valueStartPos).stripWhiteSpace();
260 if (key.isEmpty() || key != "filename")
263 String value = keyValuePairs[i].substring(valueStartPos + 1).stripWhiteSpace();
265 // Remove quotes if there are any
266 if (value[0] == '\"')
267 value = value.substring(1, value.length() - 2);
275 String extractMIMETypeFromMediaType(const String& mediaType)
277 StringBuilder mimeType;
278 unsigned length = mediaType.length();
279 mimeType.reserveCapacity(length);
280 for (unsigned i = 0; i < length; i++) {
281 UChar c = mediaType[i];
286 // While RFC 2616 does not allow it, other browsers allow multiple values in the HTTP media
287 // type header field, Content-Type. In such cases, the media type string passed here may contain
288 // the multiple values separated by commas. For now, this code ignores text after the first comma,
289 // which prevents it from simply failing to parse such types altogether. Later for better
290 // compatibility we could consider using the first or last valid MIME type instead.
291 // See https://bugs.webkit.org/show_bug.cgi?id=25352 for more discussion.
295 // FIXME: The following is not correct. RFC 2616 allows linear white space before and
296 // after the MIME type, but not within the MIME type itself. And linear white space
297 // includes only a few specific ASCII characters; a small subset of isSpaceOrNewline.
298 // See https://bugs.webkit.org/show_bug.cgi?id=8644 for a bug tracking part of this.
299 if (isSpaceOrNewline(c))
305 if (mimeType.length() == length)
307 return mimeType.toString();
310 String extractCharsetFromMediaType(const String& mediaType)
312 unsigned int pos, len;
313 findCharsetInMediaType(mediaType, pos, len);
314 return mediaType.substring(pos, len);
317 void findCharsetInMediaType(const String& mediaType, unsigned int& charsetPos, unsigned int& charsetLen, unsigned int start)
323 unsigned length = mediaType.length();
325 while (pos < length) {
326 pos = mediaType.find("charset", pos, false);
327 if (pos == notFound || pos == 0) {
332 // is what we found a beginning of a word?
333 if (mediaType[pos-1] > ' ' && mediaType[pos-1] != ';') {
341 while (pos != length && mediaType[pos] <= ' ')
344 if (mediaType[pos++] != '=') // this "charset" substring wasn't a parameter name, but there may be others
347 while (pos != length && (mediaType[pos] <= ' ' || mediaType[pos] == '"' || mediaType[pos] == '\''))
350 // we don't handle spaces within quoted parameter values, because charset names cannot have any
351 unsigned endpos = pos;
352 while (pos != length && mediaType[endpos] > ' ' && mediaType[endpos] != '"' && mediaType[endpos] != '\'' && mediaType[endpos] != ';')
356 charsetLen = endpos - pos;
361 ContentSecurityPolicy::ReflectedXSSDisposition parseXSSProtectionHeader(const String& header, String& failureReason, unsigned& failurePosition, String& reportURL)
363 DEPRECATED_DEFINE_STATIC_LOCAL(String, failureReasonInvalidToggle, (ASCIILiteral("expected 0 or 1")));
364 DEPRECATED_DEFINE_STATIC_LOCAL(String, failureReasonInvalidSeparator, (ASCIILiteral("expected semicolon")));
365 DEPRECATED_DEFINE_STATIC_LOCAL(String, failureReasonInvalidEquals, (ASCIILiteral("expected equals sign")));
366 DEPRECATED_DEFINE_STATIC_LOCAL(String, failureReasonInvalidMode, (ASCIILiteral("invalid mode directive")));
367 DEPRECATED_DEFINE_STATIC_LOCAL(String, failureReasonInvalidReport, (ASCIILiteral("invalid report directive")));
368 DEPRECATED_DEFINE_STATIC_LOCAL(String, failureReasonDuplicateMode, (ASCIILiteral("duplicate mode directive")));
369 DEPRECATED_DEFINE_STATIC_LOCAL(String, failureReasonDuplicateReport, (ASCIILiteral("duplicate report directive")));
370 DEPRECATED_DEFINE_STATIC_LOCAL(String, failureReasonInvalidDirective, (ASCIILiteral("unrecognized directive")));
374 if (!skipWhiteSpace(header, pos, false))
375 return ContentSecurityPolicy::ReflectedXSSUnset;
377 if (header[pos] == '0')
378 return ContentSecurityPolicy::AllowReflectedXSS;
380 if (header[pos++] != '1') {
381 failureReason = failureReasonInvalidToggle;
382 return ContentSecurityPolicy::ReflectedXSSInvalid;
385 ContentSecurityPolicy::ReflectedXSSDisposition result = ContentSecurityPolicy::FilterReflectedXSS;
386 bool modeDirectiveSeen = false;
387 bool reportDirectiveSeen = false;
390 // At end of previous directive: consume whitespace, semicolon, and whitespace.
391 if (!skipWhiteSpace(header, pos, false))
394 if (header[pos++] != ';') {
395 failureReason = failureReasonInvalidSeparator;
396 failurePosition = pos;
397 return ContentSecurityPolicy::ReflectedXSSInvalid;
400 if (!skipWhiteSpace(header, pos, false))
403 // At start of next directive.
404 if (skipToken(header, pos, "mode")) {
405 if (modeDirectiveSeen) {
406 failureReason = failureReasonDuplicateMode;
407 failurePosition = pos;
408 return ContentSecurityPolicy::ReflectedXSSInvalid;
410 modeDirectiveSeen = true;
411 if (!skipEquals(header, pos)) {
412 failureReason = failureReasonInvalidEquals;
413 failurePosition = pos;
414 return ContentSecurityPolicy::ReflectedXSSInvalid;
416 if (!skipToken(header, pos, "block")) {
417 failureReason = failureReasonInvalidMode;
418 failurePosition = pos;
419 return ContentSecurityPolicy::ReflectedXSSInvalid;
421 result = ContentSecurityPolicy::BlockReflectedXSS;
422 } else if (skipToken(header, pos, "report")) {
423 if (reportDirectiveSeen) {
424 failureReason = failureReasonDuplicateReport;
425 failurePosition = pos;
426 return ContentSecurityPolicy::ReflectedXSSInvalid;
428 reportDirectiveSeen = true;
429 if (!skipEquals(header, pos)) {
430 failureReason = failureReasonInvalidEquals;
431 failurePosition = pos;
432 return ContentSecurityPolicy::ReflectedXSSInvalid;
434 size_t startPos = pos;
435 if (!skipValue(header, pos)) {
436 failureReason = failureReasonInvalidReport;
437 failurePosition = pos;
438 return ContentSecurityPolicy::ReflectedXSSInvalid;
440 reportURL = header.substring(startPos, pos - startPos);
441 failurePosition = startPos; // If later semantic check deems unacceptable.
443 failureReason = failureReasonInvalidDirective;
444 failurePosition = pos;
445 return ContentSecurityPolicy::ReflectedXSSInvalid;
451 ContentTypeOptionsDisposition parseContentTypeOptionsHeader(const String& header)
453 if (header.stripWhiteSpace().lower() == "nosniff")
454 return ContentTypeOptionsNosniff;
455 return ContentTypeOptionsNone;
459 String extractReasonPhraseFromHTTPStatusLine(const String& statusLine)
461 size_t spacePos = statusLine.find(' ');
462 // Remove status code from the status line.
463 spacePos = statusLine.find(' ', spacePos + 1);
464 return statusLine.substring(spacePos + 1);
467 XFrameOptionsDisposition parseXFrameOptionsHeader(const String& header)
469 XFrameOptionsDisposition result = XFrameOptionsNone;
471 if (header.isEmpty())
474 Vector<String> headers;
475 header.split(',', headers);
477 for (size_t i = 0; i < headers.size(); i++) {
478 String currentHeader = headers[i].stripWhiteSpace();
479 XFrameOptionsDisposition currentValue = XFrameOptionsNone;
480 if (equalIgnoringCase(currentHeader, "deny"))
481 currentValue = XFrameOptionsDeny;
482 else if (equalIgnoringCase(currentHeader, "sameorigin"))
483 currentValue = XFrameOptionsSameOrigin;
484 else if (equalIgnoringCase(currentHeader, "allowall"))
485 currentValue = XFrameOptionsAllowAll;
487 currentValue = XFrameOptionsInvalid;
489 if (result == XFrameOptionsNone)
490 result = currentValue;
491 else if (result != currentValue)
492 return XFrameOptionsConflict;
497 bool parseRange(const String& range, long long& rangeOffset, long long& rangeEnd, long long& rangeSuffixLength)
499 // The format of "Range" header is defined in RFC 2616 Section 14.35.1.
500 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.1
501 // We don't support multiple range requests.
503 rangeOffset = rangeEnd = rangeSuffixLength = -1;
505 // The "bytes" unit identifier should be present.
506 static const char bytesStart[] = "bytes=";
507 if (!range.startsWith(bytesStart, false))
509 String byteRange = range.substring(sizeof(bytesStart) - 1);
511 // The '-' character needs to be present.
512 int index = byteRange.find('-');
516 // If the '-' character is at the beginning, the suffix length, which specifies the last N bytes, is provided.
520 String suffixLengthString = byteRange.substring(index + 1).stripWhiteSpace();
522 long long value = suffixLengthString.toInt64Strict(&ok);
524 rangeSuffixLength = value;
528 // Otherwise, the first-byte-position and the last-byte-position are provied.
532 String firstBytePosStr = byteRange.left(index).stripWhiteSpace();
534 long long firstBytePos = firstBytePosStr.toInt64Strict(&ok);
538 String lastBytePosStr = byteRange.substring(index + 1).stripWhiteSpace();
539 long long lastBytePos = -1;
540 if (!lastBytePosStr.isEmpty()) {
541 lastBytePos = lastBytePosStr.toInt64Strict(&ok);
546 if (firstBytePos < 0 || !(lastBytePos == -1 || lastBytePos >= firstBytePos))
549 rangeOffset = firstBytePos;
550 rangeEnd = lastBytePos;
554 // HTTP/1.1 - RFC 2616
555 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1
556 // Request-Line = Method SP Request-URI SP HTTP-Version CRLF
557 size_t parseHTTPRequestLine(const char* data, size_t length, String& failureReason, String& method, String& url, HTTPVersion& httpVersion)
561 httpVersion = Unknown;
563 const char* space1 = 0;
564 const char* space2 = 0;
566 size_t consumedLength;
568 for (p = data, consumedLength = 0; consumedLength < length; p++, consumedLength++) {
574 } else if (*p == '\n')
578 // Haven't finished header line.
579 if (consumedLength == length) {
580 failureReason = "Incomplete Request Line";
584 // RequestLine does not contain 3 parts.
585 if (!space1 || !space2) {
586 failureReason = "Request Line does not appear to contain: <Method> <Url> <HTTPVersion>.";
590 // The line must end with "\r\n".
591 const char* end = p + 1;
592 if (*(end - 2) != '\r') {
593 failureReason = "Request line does not end with CRLF";
598 method = String(data, space1 - data); // For length subtract 1 for space, but add 1 for data being the first character.
601 url = String(space1 + 1, space2 - space1 - 1); // For length subtract 1 for space.
604 String httpVersionString(space2 + 1, end - space2 - 3); // For length subtract 1 for space, and 2 for "\r\n".
605 if (httpVersionString.length() != 8 || !httpVersionString.startsWith("HTTP/1."))
606 httpVersion = Unknown;
607 else if (httpVersionString[7] == '0')
608 httpVersion = HTTP_1_0;
609 else if (httpVersionString[7] == '1')
610 httpVersion = HTTP_1_1;
612 httpVersion = Unknown;
617 size_t parseHTTPHeader(const char* start, size_t length, String& failureReason, String& nameStr, String& valueStr, bool strict)
619 const char* p = start;
620 const char* end = start + length;
627 for (; p < end; p++) {
630 if (name.isEmpty()) {
631 if (p + 1 < end && *(p + 1) == '\n')
632 return (p + 2) - start;
633 failureReason = "CR doesn't follow LF at " + trimInputSample(p, end - p);
636 failureReason = "Unexpected CR in name at " + trimInputSample(name.data(), name.size());
639 failureReason = "Unexpected LF in name at " + trimInputSample(name.data(), name.size());
653 for (; p < end && *p == 0x20; p++) { }
655 for (; p < end; p++) {
661 failureReason = "Unexpected LF in value at " + trimInputSample(value.data(), value.size());
668 if (*p == '\r' || (!strict && *p == '\n')) {
673 if (p >= end || (strict && *p != '\n')) {
674 failureReason = "CR doesn't follow LF after value at " + trimInputSample(p, end - p);
677 nameStr = String::fromUTF8(name.data(), name.size());
678 valueStr = String::fromUTF8(value.data(), value.size());
679 if (nameStr.isNull()) {
680 failureReason = "Invalid UTF-8 sequence in header name";
683 if (valueStr.isNull()) {
684 failureReason = "Invalid UTF-8 sequence in header value";
690 size_t parseHTTPRequestBody(const char* data, size_t length, Vector<unsigned char>& body)
693 body.append(data, length);