Reviewed by Tim.
[WebKit-https.git] / WebKit / Misc / WebNSAttributedStringExtras.m
1 /*
2  * Copyright (C) 2005 Apple Computer, Inc.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1.  Redistributions of source code must retain the above copyright
9  *     notice, this list of conditions and the following disclaimer. 
10  * 2.  Redistributions in binary form must reproduce the above copyright
11  *     notice, this list of conditions and the following disclaimer in the
12  *     documentation and/or other materials provided with the distribution. 
13  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14  *     its contributors may be used to endorse or promote products derived
15  *     from this software without specific prior written permission. 
16  *
17  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23     * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #import "WebNSAttributedStringExtras.h"
30
31 #import "DOMRangeInternal.h"
32 #import "WebDataSourcePrivate.h"
33 #import "WebFrame.h"
34 #import "WebFrameBridge.h"
35 #import "WebFrameInternal.h"
36 #import <WebCore/csshelper.h>
37 #import <WebCore/BlockExceptions.h>
38 #import <WebCore/Document.h>
39 #import <WebCore/Element.h>
40 #import <WebCore/FontData.h>
41 #import <WebCore/FrameLoader.h>
42 #import <WebCore/FrameMac.h>
43 #import <WebCore/HTMLNames.h>
44 #import <WebCore/Image.h>
45 #import <WebCore/InlineTextBox.h>
46 #import <WebCore/KURL.h>
47 #import <WebCore/Range.h>
48 #import <WebCore/RenderImage.h>
49 #import <WebCore/RenderListItem.h>
50 #import <WebCore/RenderObject.h>
51 #import <WebCore/RenderStyle.h>
52 #import <WebCore/RenderText.h>
53 #import <WebCore/Text.h>
54
55 using namespace WebCore;
56 using namespace HTMLNames;
57
58 #define BULLET_CHAR 0x2022
59 #define SQUARE_CHAR 0x25AA
60 #define CIRCLE_CHAR 0x25E6
61
62 struct ListItemInfo {
63     unsigned start;
64     unsigned end;
65 };
66
67 static Element* listParent(Element* item)
68 {
69     while (!item->hasTagName(ulTag) && !item->hasTagName(olTag)) {
70         item = static_cast<Element*>(item->parentNode());
71         if (!item)
72             break;
73     }
74     return item;
75 }
76
77 static Node* isTextFirstInListItem(Node* e)
78 {
79     if (!e->isTextNode())
80         return 0;
81     Node* par = e->parentNode();
82     while (par) {
83         if (par->firstChild() != e)
84             return 0;
85         if (par->hasTagName(liTag))
86             return par;
87         e = par;
88         par = par->parentNode();
89     }
90     return 0;
91 }
92
93 static NSFileWrapper *fileWrapperForElement(Element* e)
94 {
95     NSFileWrapper *wrapper = nil;
96     BEGIN_BLOCK_OBJC_EXCEPTIONS;
97     
98     const AtomicString& attr = e->getAttribute(srcAttr);
99     if (!attr.isEmpty()) {
100         NSURL *URL = KURL(e->document()->completeURL(attr.deprecatedString())).getNSURL();
101         wrapper = [[kit(e->document()->frame()) dataSource] _fileWrapperForURL:URL];
102     }
103     if (!wrapper) {
104         RenderImage* renderer = static_cast<RenderImage*>(e->renderer());
105         if (renderer->cachedImage() && !renderer->cachedImage()->isErrorImage()) {
106             wrapper = [[NSFileWrapper alloc] initRegularFileWithContents:(NSData *)(renderer->cachedImage()->image()->getTIFFRepresentation())];
107             [wrapper setPreferredFilename:@"image.tiff"];
108             [wrapper autorelease];
109         }
110     }
111
112     return wrapper;
113
114     END_BLOCK_OBJC_EXCEPTIONS;
115
116     return nil;
117 }
118
119 @implementation NSAttributedString (WebKitExtras)
120
121 - (NSAttributedString *)_web_attributedStringByStrippingAttachmentCharacters
122 {
123     // This code was originally copied from NSTextView
124     NSRange attachmentRange;
125     NSString *originalString = [self string];
126     static NSString *attachmentCharString = nil;
127     
128     if (!attachmentCharString) {
129         unichar chars[2];
130         if (!attachmentCharString) {
131             chars[0] = NSAttachmentCharacter;
132             chars[1] = 0;
133             attachmentCharString = [[NSString alloc] initWithCharacters:chars length:1];
134         }
135     }
136     
137     attachmentRange = [originalString rangeOfString:attachmentCharString];
138     if (attachmentRange.location != NSNotFound && attachmentRange.length > 0) {
139         NSMutableAttributedString *newAttributedString = [[self mutableCopyWithZone:NULL] autorelease];
140         
141         while (attachmentRange.location != NSNotFound && attachmentRange.length > 0) {
142             [newAttributedString replaceCharactersInRange:attachmentRange withString:@""];
143             attachmentRange = [[newAttributedString string] rangeOfString:attachmentCharString];
144         }
145         return newAttributedString;
146     }
147     
148     return self;
149 }
150
151 // FIXME: Use WebCore::TextIterator to iterate text runs.
152
153 + (NSAttributedString *)_web_attributedStringFromRange:(Range*)range
154 {
155     ListItemInfo info;
156     ExceptionCode ec = 0; // dummy variable -- we ignore DOM exceptions
157     NSMutableAttributedString *result;
158     BEGIN_BLOCK_OBJC_EXCEPTIONS;
159
160     if (!range || !range->boundaryPointsValid())
161         return nil;
162     
163     Node* firstNode = range->startNode();
164     if (!firstNode)
165         return nil;
166     Node* pastEndNode = range->pastEndNode();
167     
168     int startOffset = range->startOffset(ec);
169     int endOffset = range->endOffset(ec);
170     Node* endNode = range->endContainer(ec);
171
172     result = [[[NSMutableAttributedString alloc] init] autorelease];
173     
174     bool hasNewLine = true;
175     bool addedSpace = true;
176     NSAttributedString *pendingStyledSpace = nil;
177     bool hasParagraphBreak = true;
178     const Element *linkStartNode = 0;
179     unsigned linkStartLocation = 0;
180     Vector<Element*> listItems;
181     Vector<ListItemInfo> listItemLocations;
182     float maxMarkerWidth = 0;
183     
184     Node *currentNode = firstNode;
185     
186     // If the first item is the entire text of a list item, use the list item node as the start of the 
187     // selection, not the text node.  The user's intent was probably to select the list.
188     if (currentNode->isTextNode() && startOffset == 0) {
189         Node *startListNode = isTextFirstInListItem(firstNode);
190         if (startListNode){
191             firstNode = startListNode;
192             currentNode = firstNode;
193         }
194     }
195     
196     while (currentNode && currentNode != pastEndNode) {
197         RenderObject *renderer = currentNode->renderer();
198         if (renderer) {
199             RenderStyle *style = renderer->style();
200             NSFont *font = style->font().primaryFont()->getNSFont();
201             bool needSpace = pendingStyledSpace != nil;
202             if (currentNode->isTextNode()) {
203                 if (hasNewLine) {
204                     addedSpace = true;
205                     needSpace = false;
206                     [pendingStyledSpace release];
207                     pendingStyledSpace = nil;
208                     hasNewLine = false;
209                 }
210                 DeprecatedString text;
211                 DeprecatedString str = currentNode->nodeValue().deprecatedString();
212                 int start = (currentNode == firstNode) ? startOffset : -1;
213                 int end = (currentNode == endNode) ? endOffset : -1;
214                 if (renderer->isText()) {
215                     if (!style->collapseWhiteSpace()) {
216                         if (needSpace && !addedSpace) {
217                             if (text.isEmpty() && linkStartLocation == [result length])
218                                 ++linkStartLocation;
219                             [result appendAttributedString:pendingStyledSpace];
220                         }
221                         int runStart = (start == -1) ? 0 : start;
222                         int runEnd = (end == -1) ? str.length() : end;
223                         text += str.mid(runStart, runEnd-runStart);
224                         [pendingStyledSpace release];
225                         pendingStyledSpace = nil;
226                         addedSpace = u_charDirection(str[runEnd - 1].unicode()) == U_WHITE_SPACE_NEUTRAL;
227                     }
228                     else {
229                         RenderText* textObj = static_cast<RenderText*>(renderer);
230                         if (!textObj->firstTextBox() && str.length() > 0 && !addedSpace) {
231                             // We have no runs, but we do have a length.  This means we must be
232                             // whitespace that collapsed away at the end of a line.
233                             text += ' ';
234                             addedSpace = true;
235                         }
236                         else {
237                             addedSpace = false;
238                             for (InlineTextBox* box = textObj->firstTextBox(); box; box = box->nextTextBox()) {
239                                 int runStart = (start == -1) ? box->m_start : start;
240                                 int runEnd = (end == -1) ? box->m_start + box->m_len : end;
241                                 if (runEnd > box->m_start + box->m_len)
242                                     runEnd = box->m_start + box->m_len;
243                                 if (runStart >= box->m_start &&
244                                     runStart < box->m_start + box->m_len) {
245                                     if (box == textObj->firstTextBox() && box->m_start == runStart && runStart > 0)
246                                         needSpace = true; // collapsed space at the start
247                                     if (needSpace && !addedSpace) {
248                                         if (pendingStyledSpace != nil) {
249                                             if (text.isEmpty() && linkStartLocation == [result length])
250                                                 ++linkStartLocation;
251                                             [result appendAttributedString:pendingStyledSpace];
252                                         } else
253                                             text += ' ';
254                                     }
255                                     DeprecatedString runText = str.mid(runStart, runEnd - runStart);
256                                     runText.replace('\n', ' ');
257                                     text += runText;
258                                     int nextRunStart = box->nextTextBox() ? box->nextTextBox()->m_start : str.length(); // collapsed space between runs or at the end
259                                     needSpace = nextRunStart > runEnd;
260                                     [pendingStyledSpace release];
261                                     pendingStyledSpace = nil;
262                                     addedSpace = u_charDirection(str[runEnd - 1].unicode()) == U_WHITE_SPACE_NEUTRAL;
263                                     start = -1;
264                                 }
265                                 if (end != -1 && runEnd >= end)
266                                     break;
267                             }
268                         }
269                     }
270                 }
271                 
272                 text.replace('\\', renderer->backslashAsCurrencySymbol());
273     
274                 if (text.length() > 0 || needSpace) {
275                     NSMutableDictionary *attrs = [[NSMutableDictionary alloc] init];
276                     [attrs setObject:font forKey:NSFontAttributeName];
277                     if (style && style->color().isValid() && style->color().alpha() != 0)
278                         [attrs setObject:nsColor(style->color()) forKey:NSForegroundColorAttributeName];
279                     if (style && style->backgroundColor().isValid() && style->backgroundColor().alpha() != 0)
280                         [attrs setObject:nsColor(style->backgroundColor()) forKey:NSBackgroundColorAttributeName];
281
282                     if (text.length() > 0) {
283                         hasParagraphBreak = false;
284                         NSAttributedString *partialString = [[NSAttributedString alloc] initWithString:text.getNSString() attributes:attrs];
285                         [result appendAttributedString: partialString];                
286                         [partialString release];
287                     }
288
289                     if (needSpace) {
290                         [pendingStyledSpace release];
291                         pendingStyledSpace = [[NSAttributedString alloc] initWithString:@" " attributes:attrs];
292                     }
293
294                     [attrs release];
295                 }
296             } else {
297                 // This is our simple HTML -> ASCII transformation:
298                 DeprecatedString text;
299                 if (currentNode->hasTagName(aTag)) {
300                     // Note the start of the <a> element.  We will add the NSLinkAttributeName
301                     // attribute to the attributed string when navigating to the next sibling 
302                     // of this node.
303                     linkStartLocation = [result length];
304                     linkStartNode = static_cast<Element*>(currentNode);
305                 } else if (currentNode->hasTagName(brTag)) {
306                     text += "\n";
307                     hasNewLine = true;
308                 } else if (currentNode->hasTagName(liTag)) {
309                     DeprecatedString listText;
310                     Element *itemParent = listParent(static_cast<Element*>(currentNode));
311                     
312                     if (!hasNewLine)
313                         listText += '\n';
314                     hasNewLine = true;
315
316                     listItems.append(static_cast<Element*>(currentNode));
317                     info.start = [result length];
318                     info.end = 0;
319                     listItemLocations.append (info);
320                     
321                     listText += '\t';
322                     if (itemParent && renderer->isListItem()) {
323                         RenderListItem* listRenderer = static_cast<RenderListItem*>(renderer);
324
325                         maxMarkerWidth = MAX([font pointSize], maxMarkerWidth);
326                         switch(style->listStyleType()) {
327                             case DISC:
328                                 listText += ((DeprecatedChar)BULLET_CHAR);
329                                 break;
330                             case CIRCLE:
331                                 listText += ((DeprecatedChar)CIRCLE_CHAR);
332                                 break;
333                             case SQUARE:
334                                 listText += ((DeprecatedChar)SQUARE_CHAR);
335                                 break;
336                             case LNONE:
337                                 break;
338                             default:
339                                 DeprecatedString marker = listRenderer->markerStringValue();
340                                 listText += marker;
341                                 // Use AppKit metrics.  Will be rendered by AppKit.
342                                 float markerWidth = [marker.getNSString() sizeWithAttributes:[NSDictionary dictionaryWithObject:font forKey:NSFontAttributeName]].width;
343                                 maxMarkerWidth = MAX(markerWidth, maxMarkerWidth);
344                         }
345
346                         listText += ' ';
347                         listText += '\t';
348
349                         NSMutableDictionary *attrs = [[NSMutableDictionary alloc] init];
350                         [attrs setObject:font forKey:NSFontAttributeName];
351                         if (style && style->color().isValid())
352                             [attrs setObject:nsColor(style->color()) forKey:NSForegroundColorAttributeName];
353                         if (style && style->backgroundColor().isValid())
354                             [attrs setObject:nsColor(style->backgroundColor()) forKey:NSBackgroundColorAttributeName];
355
356                         NSAttributedString *partialString = [[NSAttributedString alloc] initWithString:listText.getNSString() attributes:attrs];
357                         [attrs release];
358                         [result appendAttributedString: partialString];                
359                         [partialString release];
360                     }
361                 } else if (currentNode->hasTagName(olTag) || currentNode->hasTagName(ulTag)) {
362                     if (!hasNewLine)
363                         text += "\n";
364                     hasNewLine = true;
365                 } else if (currentNode->hasTagName(blockquoteTag)
366                         || currentNode->hasTagName(ddTag)
367                         || currentNode->hasTagName(divTag)
368                         || currentNode->hasTagName(dlTag)
369                         || currentNode->hasTagName(dtTag)
370                         || currentNode->hasTagName(hrTag)
371                         || currentNode->hasTagName(listingTag)
372                         || currentNode->hasTagName(preTag)
373                         || currentNode->hasTagName(tdTag)
374                         || currentNode->hasTagName(thTag)) {
375                     if (!hasNewLine)
376                         text += '\n';
377                     hasNewLine = true;
378                 } else if (currentNode->hasTagName(h1Tag)
379                         || currentNode->hasTagName(h2Tag)
380                         || currentNode->hasTagName(h3Tag)
381                         || currentNode->hasTagName(h4Tag)
382                         || currentNode->hasTagName(h5Tag)
383                         || currentNode->hasTagName(h6Tag)
384                         || currentNode->hasTagName(pTag)
385                         || currentNode->hasTagName(trTag)) {
386                     if (!hasNewLine)
387                         text += '\n';
388                     
389                     // In certain cases, emit a paragraph break.
390                     int bottomMargin = renderer->collapsedMarginBottom();
391                     int fontSize = style->fontDescription().computedPixelSize();
392                     if (bottomMargin * 2 >= fontSize) {
393                         if (!hasParagraphBreak) {
394                             text += '\n';
395                             hasParagraphBreak = true;
396                         }
397                     }
398                     
399                     hasNewLine = true;
400                 }
401                 else if (currentNode->hasTagName(imgTag)) {
402                     if (pendingStyledSpace != nil) {
403                         if (linkStartLocation == [result length])
404                             ++linkStartLocation;
405                         [result appendAttributedString:pendingStyledSpace];
406                         [pendingStyledSpace release];
407                         pendingStyledSpace = nil;
408                     }
409                     NSFileWrapper *fileWrapper = fileWrapperForElement(static_cast<Element*>(currentNode));
410                     NSTextAttachment *attachment = [[NSTextAttachment alloc] initWithFileWrapper:fileWrapper];
411                     NSAttributedString *iString = [NSAttributedString attributedStringWithAttachment:attachment];
412                     [result appendAttributedString: iString];
413                     [attachment release];
414                 }
415
416                 NSAttributedString *partialString = [[NSAttributedString alloc] initWithString:text.getNSString()];
417                 [result appendAttributedString: partialString];
418                 [partialString release];
419             }
420         }
421
422         Node *nextNode = currentNode->firstChild();
423         if (!nextNode)
424             nextNode = currentNode->nextSibling();
425
426         while (!nextNode && currentNode->parentNode()) {
427             DeprecatedString text;
428             currentNode = currentNode->parentNode();
429             if (currentNode == pastEndNode)
430                 break;
431             nextNode = currentNode->nextSibling();
432
433             if (currentNode->hasTagName(aTag)) {
434                 // End of a <a> element.  Create an attributed string NSLinkAttributeName attribute
435                 // for the range of the link.  Note that we create the attributed string from the DOM, which
436                 // will have corrected any illegally nested <a> elements.
437                 if (linkStartNode && currentNode == linkStartNode) {
438                     String href = parseURL(linkStartNode->getAttribute(hrefAttr));
439                     KURL kURL = linkStartNode->document()->frame()->loader()->completeURL(href.deprecatedString());
440                     
441                     NSURL *URL = kURL.getNSURL();
442                     NSRange tempRange = { linkStartLocation, [result length]-linkStartLocation }; // workaround for 4213314
443                     [result addAttribute:NSLinkAttributeName value:URL range:tempRange];
444                     linkStartNode = 0;
445                 }
446             }
447             else if (currentNode->hasTagName(olTag) || currentNode->hasTagName(ulTag)) {
448                 if (!hasNewLine)
449                     text += '\n';
450                 hasNewLine = true;
451             } else if (currentNode->hasTagName(liTag)) {
452                 
453                 int i, count = listItems.size();
454                 for (i = 0; i < count; i++){
455                     if (listItems[i] == currentNode){
456                         listItemLocations[i].end = [result length];
457                         break;
458                     }
459                 }
460                 if (!hasNewLine)
461                     text += '\n';
462                 hasNewLine = true;
463             } else if (currentNode->hasTagName(blockquoteTag) ||
464                        currentNode->hasTagName(ddTag) ||
465                        currentNode->hasTagName(divTag) ||
466                        currentNode->hasTagName(dlTag) ||
467                        currentNode->hasTagName(dtTag) ||
468                        currentNode->hasTagName(hrTag) ||
469                        currentNode->hasTagName(listingTag) ||
470                        currentNode->hasTagName(preTag) ||
471                        currentNode->hasTagName(tdTag) ||
472                        currentNode->hasTagName(thTag)) {
473                 if (!hasNewLine)
474                     text += '\n';
475                 hasNewLine = true;
476             } else if (currentNode->hasTagName(pTag) ||
477                        currentNode->hasTagName(trTag) ||
478                        currentNode->hasTagName(h1Tag) ||
479                        currentNode->hasTagName(h2Tag) ||
480                        currentNode->hasTagName(h3Tag) ||
481                        currentNode->hasTagName(h4Tag) ||
482                        currentNode->hasTagName(h5Tag) ||
483                        currentNode->hasTagName(h6Tag)) {
484                 if (!hasNewLine)
485                     text += '\n';
486                 // An extra newline is needed at the start, not the end, of these types of tags,
487                 // so don't add another here.
488                 hasNewLine = true;
489             }
490             
491             NSAttributedString *partialString = [[NSAttributedString alloc] initWithString:text.getNSString()];
492             [result appendAttributedString:partialString];
493             [partialString release];
494         }
495
496         currentNode = nextNode;
497     }
498     
499     [pendingStyledSpace release];
500     
501     // Apply paragraph styles from outside in.  This ensures that nested lists correctly
502     // override their parent's paragraph style.
503     {
504         unsigned i, count = listItems.size();
505         Element *e;
506
507 #ifdef POSITION_LIST
508         Node *containingBlock;
509         int containingBlockX, containingBlockY;
510         
511         // Determine the position of the outermost containing block.  All paragraph
512         // styles and tabs should be relative to this position.  So, the horizontal position of 
513         // each item in the list (in the resulting attributed string) will be relative to position 
514         // of the outermost containing block.
515         if (count > 0){
516             containingBlock = firstNode;
517             while (containingBlock->renderer()->isInline()){
518                 containingBlock = containingBlock->parentNode();
519             }
520             containingBlock->renderer()->absolutePosition(containingBlockX, containingBlockY);
521         }
522 #endif
523         
524         for (i = 0; i < count; i++){
525             e = listItems[i];
526             info = listItemLocations[i];
527             
528             if (info.end < info.start)
529                 info.end = [result length];
530                 
531             RenderObject *r = e->renderer();
532             RenderStyle *style = r->style();
533
534             int rx;
535             NSFont *font = style->font().primaryFont()->getNSFont();
536             float pointSize = [font pointSize];
537
538 #ifdef POSITION_LIST
539             int ry;
540             r->absolutePosition(rx, ry);
541             rx -= containingBlockX;
542             
543             // Ensure that the text is indented at least enough to allow for the markers.
544             rx = MAX(rx, (int)maxMarkerWidth);
545 #else
546             rx = (int)MAX(maxMarkerWidth, pointSize);
547 #endif
548
549             // The bullet text will be right aligned at the first tab marker, followed
550             // by a space, followed by the list item text.  The space is arbitrarily
551             // picked as pointSize*2/3.  The space on the first line of the text item
552             // is established by a left aligned tab, on subsequent lines it's established
553             // by the head indent.
554             NSMutableParagraphStyle *mps = [[NSMutableParagraphStyle alloc] init];
555             [mps setFirstLineHeadIndent: 0];
556             [mps setHeadIndent: rx];
557             [mps setTabStops:[NSArray arrayWithObjects:
558                         [[[NSTextTab alloc] initWithType:NSRightTabStopType location:rx-(pointSize*2/3)] autorelease],
559                         [[[NSTextTab alloc] initWithType:NSLeftTabStopType location:rx] autorelease],
560                         nil]];
561             NSRange tempRange = { info.start, info.end-info.start }; // workaround for 4213314
562             [result addAttribute:NSParagraphStyleAttributeName value:mps range:tempRange];
563             [mps release];
564         }
565     }
566
567     return result;
568
569     END_BLOCK_OBJC_EXCEPTIONS;
570
571     return nil;
572 }
573
574 @end