2011-06-02 Jay Civelli <jcivelli@chromium.org>
authorjcivelli@chromium.org <jcivelli@chromium.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 2 Jun 2011 21:59:29 +0000 (21:59 +0000)
committerjcivelli@chromium.org <jcivelli@chromium.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 2 Jun 2011 21:59:29 +0000 (21:59 +0000)
        Reviewed by Adam Barth.

        Added a method to generate RFC 2822 compliant date strings.
        https://bugs.webkit.org/show_bug.cgi?id=7169

        * wtf/DateMath.cpp:
        (WTF::twoDigitStringFromNumber):
        (WTF::makeRFC2822DateString):
        * wtf/DateMath.h:
2011-06-02  Jay Civelli  <jcivelli@chromium.org>

        Reviewed by Adam Barth.

        Adding MHTML generation support to MHTMLArchive.
        https://bugs.webkit.org/show_bug.cgi?id=7169

        * loader/archive/mhtml/MHTMLArchive.cpp:
        (WebCore::generateRandomBoundary):
        (WebCore::replaceNonPrintableCharacters):
        (WebCore::MHTMLArchive::generateMHTMLData):
        * loader/archive/mhtml/MHTMLArchive.h:
        * page/PageSerializer.cpp:
        (WebCore::PageSerializer::serializeFrame):
        (WebCore::PageSerializer::serializeCSSStyleSheet):
        * platform/SharedBuffer.cpp:
        (WebCore::SharedBuffer::append):
        * platform/SharedBuffer.h:
2011-06-02  Jay Civelli  <jcivelli@chromium.org>

        Reviewed by Adam Barth.

        Adding MHTML generation support to MHTMLArchive.
        https://bugs.webkit.org/show_bug.cgi?id=7169

        * public/WebPageSerializer.h:
        * src/WebPageSerializer.cpp:
        (WebKit::WebPageSerializer::serializeToMHTML):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@87958 268f45cc-cd09-0410-ab3c-d52691b4dbfc

12 files changed:
Source/JavaScriptCore/ChangeLog [changed mode: 0644->0755]
Source/JavaScriptCore/wtf/DateMath.cpp
Source/JavaScriptCore/wtf/DateMath.h
Source/WebCore/ChangeLog [changed mode: 0644->0755]
Source/WebCore/loader/archive/mhtml/MHTMLArchive.cpp
Source/WebCore/loader/archive/mhtml/MHTMLArchive.h
Source/WebCore/page/PageSerializer.cpp
Source/WebCore/platform/SharedBuffer.cpp
Source/WebCore/platform/SharedBuffer.h
Source/WebKit/chromium/ChangeLog [changed mode: 0644->0755]
Source/WebKit/chromium/public/WebPageSerializer.h
Source/WebKit/chromium/src/WebPageSerializer.cpp

old mode 100644 (file)
new mode 100755 (executable)
index c7de564..8ec5057
@@ -1,3 +1,15 @@
+2011-06-02  Jay Civelli  <jcivelli@chromium.org>
+
+        Reviewed by Adam Barth.
+
+        Added a method to generate RFC 2822 compliant date strings.
+        https://bugs.webkit.org/show_bug.cgi?id=7169
+
+        * wtf/DateMath.cpp:
+        (WTF::twoDigitStringFromNumber):
+        (WTF::makeRFC2822DateString):
+        * wtf/DateMath.h:
+
 2011-06-02  Alexis Menard  <alexis.menard@openbossa.org>
 
         Reviewed by Andreas Kling.
index 70c0cf4..9b15ae1 100644 (file)
@@ -90,7 +90,7 @@
 #include <limits>
 #include <stdint.h>
 #include <time.h>
-
+#include <wtf/text/StringBuilder.h>
 
 #if HAVE(ERRNO_H)
 #include <errno.h>
@@ -180,6 +180,14 @@ static inline double msToDays(double ms)
     return floor(ms / msPerDay);
 }
 
+static String twoDigitStringFromNumber(int number)
+{
+    ASSERT(number >= 0 && number < 100);
+    if (number > 9)
+        return String::number(number);
+    return makeString("0", String::number(number));
+}
+
 int msToYear(double ms)
 {
     int approxYear = static_cast<int>(floor(ms / (msPerDay * 365.2425)) + 1970);
@@ -1028,6 +1036,34 @@ double timeClip(double t)
         return NaN;
     return trunc(t);
 }
+
+// See http://tools.ietf.org/html/rfc2822#section-3.3 for more information.
+String makeRFC2822DateString(unsigned dayOfWeek, unsigned day, unsigned month, unsigned year, unsigned hours, unsigned minutes, unsigned seconds, int utcOffset)
+{
+    StringBuilder stringBuilder;
+    stringBuilder.append(weekdayName[dayOfWeek]);
+    stringBuilder.append(", ");
+    stringBuilder.append(String::number(day));
+    stringBuilder.append(" ");
+    stringBuilder.append(monthName[month]);
+    stringBuilder.append(" ");
+    stringBuilder.append(String::number(year));
+    stringBuilder.append(" ");
+
+    stringBuilder.append(twoDigitStringFromNumber(hours));
+    stringBuilder.append(':');
+    stringBuilder.append(twoDigitStringFromNumber(minutes));
+    stringBuilder.append(':');
+    stringBuilder.append(twoDigitStringFromNumber(seconds));
+    stringBuilder.append(' ');
+
+    stringBuilder.append(utcOffset > 0 ? "+" : "-");
+    int absoluteUTCOffset = abs(utcOffset);
+    stringBuilder.append(twoDigitStringFromNumber(absoluteUTCOffset / 60));
+    stringBuilder.append(twoDigitStringFromNumber(absoluteUTCOffset % 60));
+
+    return stringBuilder.toString();
+}
 } // namespace WTF
 
 #if USE(JSC)
index 2ac284e..cc2e842 100644 (file)
@@ -51,6 +51,7 @@
 #include <wtf/Noncopyable.h>
 #include <wtf/OwnArrayPtr.h>
 #include <wtf/PassOwnArrayPtr.h>
+#include <wtf/text/WTFString.h>
 #include <wtf/UnusedParam.h>
 
 namespace WTF {
@@ -61,6 +62,8 @@ int equivalentYearForDST(int year);
 double parseES5DateFromNullTerminatedCharacters(const char* dateString);
 double parseDateFromNullTerminatedCharacters(const char* dateString);
 double timeClip(double);
+// dayOfWeek: [0, 6] 0 being Monday, day: [1, 31], month: [0, 11], year: ex: 2011, hours: [0, 23], minutes: [0, 59], seconds: [0, 59], utcOffset: [-720,720]. 
+String makeRFC2822DateString(unsigned dayOfWeek, unsigned day, unsigned month, unsigned year, unsigned hours, unsigned minutes, unsigned seconds, int utcOffset);
 
 inline double jsCurrentTime()
 {
@@ -106,6 +109,7 @@ using WTF::msPerSecond;
 using WTF::msToYear;
 using WTF::secondsPerMinute;
 using WTF::parseDateFromNullTerminatedCharacters;
+using WTF::makeRFC2822DateString;
 using WTF::calculateUTCOffset;
 using WTF::calculateDSTOffset;
 
old mode 100644 (file)
new mode 100755 (executable)
index 8f21ab4..a3ede10
@@ -1,3 +1,22 @@
+2011-06-02  Jay Civelli  <jcivelli@chromium.org>
+
+        Reviewed by Adam Barth.
+
+        Adding MHTML generation support to MHTMLArchive.
+        https://bugs.webkit.org/show_bug.cgi?id=7169
+
+        * loader/archive/mhtml/MHTMLArchive.cpp:
+        (WebCore::generateRandomBoundary):
+        (WebCore::replaceNonPrintableCharacters):
+        (WebCore::MHTMLArchive::generateMHTMLData):
+        * loader/archive/mhtml/MHTMLArchive.h:
+        * page/PageSerializer.cpp:
+        (WebCore::PageSerializer::serializeFrame):
+        (WebCore::PageSerializer::serializeCSSStyleSheet):
+        * platform/SharedBuffer.cpp:
+        (WebCore::SharedBuffer::append):
+        * platform/SharedBuffer.h:
+
 2011-06-02  Simon Fraser  <simon.fraser@apple.com>
 
         Reviewed by Andreas Kling.
index 0d33237..ccef644 100644 (file)
 #if ENABLE(MHTML)
 #include "MHTMLArchive.h"
 
+#include "Base64.h"
+#include "CryptographicallyRandomNumber.h"
+#include "DateMath.h"
+#include "Document.h"
+#include "Frame.h"
 #include "MHTMLParser.h"
-#include "MIMEHeader.h"
+#include "MIMETypeRegistry.h"
+#include "Page.h"
+#include "PageSerializer.h"
+#include "QuotedPrintable.h"
+#include "SharedBuffer.h"
+#include <time.h>
+#include <wtf/StdLibExtras.h>
+#include <wtf/text/StringBuilder.h>
+
+#if HAVE(SYS_TIME_H)
+#include <sys/time.h>
+#endif
+
 
 namespace WebCore {
 
+const char* const quotedPrintable = "quoted-printable";
+const char* const base64 = "base64";
+
+static String generateRandomBoundary()
+{
+    // Trying to generate random boundaries similar to IE/UnMHT (ex: ----=_NextPart_000_001B_01CC157B.96F808A0).
+    const size_t randomValuesLength = 10;
+    char randomValues[randomValuesLength];
+    cryptographicallyRandomValues(&randomValues, randomValuesLength);
+    StringBuilder stringBuilder;
+    stringBuilder.append("----=_NextPart_000_");
+    for (size_t i = 0; i < randomValuesLength; ++i) {
+        if (i == 2)
+            stringBuilder.append('_');
+        else if (i == 6)
+            stringBuilder.append('.');
+        stringBuilder.append(lowerNibbleToASCIIHexDigit(randomValues[i]));
+        stringBuilder.append(upperNibbleToASCIIHexDigit(randomValues[i]));
+    }
+    return stringBuilder.toString();
+}
+
+static String replaceNonPrintableCharacters(const String& text)
+{
+    StringBuilder stringBuilder;
+    for (size_t i = 0; i < text.length(); ++i) {
+        if (isASCIIPrintable(text[i]))
+            stringBuilder.append(text[i]);
+        else
+            stringBuilder.append('?');
+    }
+    return stringBuilder.toString();
+}
+
 MHTMLArchive::MHTMLArchive()
 {
 }
@@ -71,5 +122,90 @@ PassRefPtr<MHTMLArchive> MHTMLArchive::create(const KURL& url, SharedBuffer* dat
     return mainArchive.release();
 }
 
+PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page)
+{
+    Vector<PageSerializer::Resource> resources;
+    PageSerializer pageSerializer(&resources);
+    pageSerializer.serialize(page);
+
+    String boundary = generateRandomBoundary();
+    String endOfResourceBoundary = makeString("--", boundary, "\r\n");
+
+    String dateString;
+    time_t localTime = time(0);
+    tm localTM;
+    getLocalTime(&localTime, &localTM);
+    dateString = makeRFC2822DateString(localTM.tm_wday, localTM.tm_mday, localTM.tm_mon, 1900 + localTM.tm_year, localTM.tm_hour, localTM.tm_min, localTM.tm_sec, calculateUTCOffset() / (1000 * 60));
+
+    StringBuilder stringBuilder;
+    stringBuilder.append("From: <Saved by WebKit>\r\n");
+    stringBuilder.append("Subject: ");
+    // We replace non ASCII characters with '?' characters to match IE's behavior.
+    stringBuilder.append(replaceNonPrintableCharacters(page->mainFrame()->document()->title()));
+    stringBuilder.append("\r\nDate: ");
+    stringBuilder.append(dateString);
+    stringBuilder.append("\r\nMIME-Version: 1.0\r\n");
+    stringBuilder.append("Content-Type: multipart/related;\r\n");
+    stringBuilder.append("\ttype=\"");
+    stringBuilder.append(page->mainFrame()->document()->suggestedMIMEType());
+    stringBuilder.append("\";\r\n");
+    stringBuilder.append("\tboundary=\"");
+    stringBuilder.append(boundary);
+    stringBuilder.append("\"\r\n\r\n");
+
+    // We use utf8() below instead of ascii() as ascii() replaces CRLFs with ?? (we still only have put ASCII characters in it).
+    ASSERT(stringBuilder.toString().containsOnlyASCII());
+    CString asciiString = stringBuilder.toString().utf8();
+    RefPtr<SharedBuffer> mhtmlData = SharedBuffer::create();
+    mhtmlData->append(asciiString.data(), asciiString.length());
+
+    for (size_t i = 0; i < resources.size(); ++i) {
+        const PageSerializer::Resource& resource = resources[i];
+
+        stringBuilder.clear();
+        stringBuilder.append(endOfResourceBoundary);
+        stringBuilder.append("Content-Type: ");
+        stringBuilder.append(resource.mimeType);
+
+        const char* contentEncoding = MIMETypeRegistry::isSupportedJavaScriptMIMEType(resource.mimeType) || MIMETypeRegistry::isSupportedNonImageMIMEType(resource.mimeType) ? quotedPrintable : base64;
+        stringBuilder.append("\r\nContent-Transfer-Encoding: ");
+        stringBuilder.append(contentEncoding);
+        stringBuilder.append("\r\nContent-Location: ");
+        stringBuilder.append(resource.url);
+        stringBuilder.append("\r\n\r\n");
+
+        asciiString = stringBuilder.toString().utf8();
+        mhtmlData->append(asciiString.data(), asciiString.length());
+
+        // FIXME: ideally we would encode the content as a stream without having to fetch it all.
+        const char* data = resource.data->data();
+        size_t dataLength = resource.data->size();
+        Vector<char> encodedData;
+        if (!strcmp(contentEncoding, quotedPrintable)) {
+            quotedPrintableEncode(data, dataLength, encodedData);
+            mhtmlData->append(encodedData.data(), encodedData.size());
+            mhtmlData->append("\r\n", 2);
+        } else {
+            ASSERT(!strcmp(contentEncoding, base64));
+            // We are not specifying insertLFs = true below as it would cut the lines with LFs and MHTML requires CRLFs.
+            base64Encode(data, dataLength, encodedData);
+            const size_t maximumLineLength = 76;
+            size_t index = 0;
+            size_t encodedDataLength = encodedData.size();
+            do {
+                size_t lineLength = std::min(encodedDataLength - index, maximumLineLength);
+                mhtmlData->append(encodedData.data() + index, lineLength);
+                mhtmlData->append("\r\n", 2);
+                index += maximumLineLength;
+            } while (index < encodedDataLength);
+        }
+    }
+
+    asciiString = makeString("--", boundary, "--\r\n").utf8();
+    mhtmlData->append(asciiString.data(), asciiString.length());
+
+    return mhtmlData.release();
+}
+
 }
 #endif
index 1b31dc3..86fe1c7 100644 (file)
@@ -38,6 +38,8 @@
 namespace WebCore {
 
 class MHTMLParser;
+class Page;
+class SharedBuffer;
 
 class MHTMLArchive : public Archive {
 public:
@@ -46,6 +48,8 @@ public:
     static PassRefPtr<MHTMLArchive> create();
     static PassRefPtr<MHTMLArchive> create(const KURL&, SharedBuffer*);
 
+    static PassRefPtr<SharedBuffer> generateMHTMLData(Page*);
+
 private:
     friend class MHTMLParser;
     MHTMLArchive();
index d56926f..e173879 100644 (file)
@@ -206,8 +206,12 @@ void PageSerializer::serializeFrame(Frame* frame)
 
     Vector<Node*> nodes;
     SerializerMarkupAccumulator accumulator(this, document, &nodes);
-    TextEncoding textEncoding(TextEncoding(document->charset()));
-    ASSERT(textEncoding.isValid());
+    TextEncoding textEncoding(document->charset());
+    CString data;
+    if (!textEncoding.isValid()) {
+        // FIXME: iframes used as images trigger this. We should deal with them correctly.
+        return;
+    }
     String text = accumulator.serializeNodes(document->documentElement(), 0, IncludeNode);
     CString frameHTML = textEncoding.encode(text.characters(), text.length(), EntitiesForUnencodables);
     m_resources->append(Resource(url, document->suggestedMIMEType(), SharedBuffer::create(frameHTML.data(), frameHTML.length())));
@@ -274,7 +278,7 @@ void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet* styleSheet, const KUR
 
     if (url.isValid() && !m_resourceURLs.contains(url)) {
         // FIXME: We should check whether a charset has been specified and if none was found add one.
-        TextEncoding textEncoding = TextEncoding(styleSheet->charset());
+        TextEncoding textEncoding(styleSheet->charset());
         ASSERT(textEncoding.isValid());
         String textString = cssText.toString();
         CString text = textEncoding.encode(textString.characters(), textString.length(), EntitiesForUnencodables);
index 0900ae4..ad8618e 100644 (file)
@@ -117,6 +117,16 @@ const char* SharedBuffer::data() const
     return buffer().data();
 }
 
+void SharedBuffer::append(SharedBuffer* data)
+{
+    const char* segment;
+    size_t position = 0;
+    while (size_t length = data->getSomeData(segment, position)) {
+        append(segment, length);
+        position += length;
+    }
+}
+
 void SharedBuffer::append(const char* data, unsigned length)
 {
     ASSERT(!m_purgeableBuffer);
index 4f1d629..68d993a 100644 (file)
@@ -84,6 +84,7 @@ public:
 
     bool isEmpty() const { return !size(); }
 
+    void append(SharedBuffer*);
     void append(const char*, unsigned);
     void clear();
     const char* platformData() const;
old mode 100644 (file)
new mode 100755 (executable)
index fdbd3a6..da1e878
@@ -1,3 +1,14 @@
+2011-06-02  Jay Civelli  <jcivelli@chromium.org>
+
+        Reviewed by Adam Barth.
+
+        Adding MHTML generation support to MHTMLArchive.
+        https://bugs.webkit.org/show_bug.cgi?id=7169
+
+        * public/WebPageSerializer.h:
+        * src/WebPageSerializer.cpp:
+        (WebKit::WebPageSerializer::serializeToMHTML):
+
 2011-06-02  Dimitri Glazkov  <dglazkov@chromium.org>
 
         Reviewed by Darin Adler.
index a108a62..e2db07b 100644 (file)
@@ -58,6 +58,9 @@ public:
     // Note that this also strips-out any script tag or link to JavaScript.
     WEBKIT_API static void serialize(WebView*, WebVector<Resource>*);
 
+    // Serializes the WebView contents to a MHTML representation.
+    WEBKIT_API static WebCString serializeToMHTML(WebView*);
+
     // IMPORTANT:
     // The API below is an older implementation of a pageserialization that
     // will be removed soon.
index 232e211..6df84bb 100644 (file)
@@ -39,6 +39,7 @@
 #include "HTMLInputElement.h"
 #include "HTMLNames.h"
 #include "KURL.h"
+#include "MHTMLArchive.h"
 #include "PageSerializer.h"
 #include "Vector.h"
 
@@ -200,6 +201,13 @@ void WebPageSerializer::serialize(WebView* view, WebVector<WebPageSerializer::Re
     *resourcesParam = result;         
 }
 
+WebCString WebPageSerializer::serializeToMHTML(WebView* view)
+{
+    RefPtr<SharedBuffer> mhtml = MHTMLArchive::generateMHTMLData(static_cast<WebViewImpl*>(view)->page());
+    // FIXME: we are copying all the data here. Idealy we would have a WebSharedData().
+    return WebCString(mhtml->data(), mhtml->size());
+}
+
 bool WebPageSerializer::serialize(WebFrame* frame,
                                   bool recursive,
                                   WebPageSerializerClient* client,