Use a 1-byte enum class for TextDirection
[WebKit-https.git] / Source / WebCore / platform / text / BidiResolver.h
1 /*
2  * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
3  * Copyright (C) 2003-2017 Apple Inc. All rights reserved.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public License
16  * along with this library; see the file COPYING.LIB.  If not, write to
17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19  *
20  */
21
22 #pragma once
23
24 #include "BidiContext.h"
25 #include "BidiRunList.h"
26 #include "WritingMode.h"
27 #include <wtf/HashMap.h>
28 #include <wtf/Noncopyable.h>
29 #include <wtf/Vector.h>
30
31 namespace WebCore {
32
33 class RenderObject;
34
35 template<typename Iterator> class WhitespaceCollapsingState {
36 public:
37     void reset()
38     {
39         m_transitions.clear();
40         m_currentTransition = 0;
41     }
42     
43     void startIgnoringSpaces(const Iterator& transition)
44     {
45         ASSERT(!(m_transitions.size() % 2));
46         m_transitions.append(transition);
47     }
48
49     void stopIgnoringSpaces(const Iterator& transition)
50     {
51         ASSERT(m_transitions.size() % 2);
52         m_transitions.append(transition);
53     }
54
55     // When ignoring spaces, this needs to be called for objects that need line boxes such as RenderInlines or
56     // hard line breaks to ensure that they're not ignored.
57     void ensureLineBoxInsideIgnoredSpaces(RenderObject& renderer)
58     {
59         Iterator transition(0, &renderer, 0);
60         stopIgnoringSpaces(transition);
61         startIgnoringSpaces(transition);
62     }
63
64     void decrementTransitionAt(size_t index)
65     {
66         m_transitions[index].fastDecrement();
67     }
68
69     const Vector<Iterator>& transitions() { return m_transitions; }
70     size_t numTransitions() const { return m_transitions.size(); }
71     size_t currentTransition() const { return m_currentTransition; }
72     void setCurrentTransition(size_t currentTransition) { m_currentTransition = currentTransition; }
73     void incrementCurrentTransition() { ++m_currentTransition; }
74     void decrementNumTransitions() { m_transitions.shrink(m_transitions.size() - 1); }
75     bool betweenTransitions() const { return m_currentTransition % 2; }
76 private:
77     Vector<Iterator> m_transitions;
78     size_t m_currentTransition { 0 };
79 };
80
81 // The BidiStatus at a given position (typically the end of a line) can
82 // be cached and then used to restart bidi resolution at that position.
83 struct BidiStatus {
84     BidiStatus() = default;
85
86     // Creates a BidiStatus representing a new paragraph root with a default direction.
87     // Uses TextDirection as it only has two possibilities instead of UCharDirection which has at least 19.
88     BidiStatus(TextDirection direction, bool isOverride)
89         : eor(direction == TextDirection::LTR ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT)
90         , lastStrong(eor)
91         , last(eor)
92         , context(BidiContext::create(direction == TextDirection::LTR ? 0 : 1, eor, isOverride))
93     {
94     }
95
96     BidiStatus(UCharDirection eor, UCharDirection lastStrong, UCharDirection last, RefPtr<BidiContext>&& context)
97         : eor(eor)
98         , lastStrong(lastStrong)
99         , last(last)
100         , context(WTFMove(context))
101     {
102     }
103
104     UCharDirection eor { U_OTHER_NEUTRAL };
105     UCharDirection lastStrong { U_OTHER_NEUTRAL };
106     UCharDirection last { U_OTHER_NEUTRAL };
107     RefPtr<BidiContext> context;
108 };
109
110 struct BidiEmbedding {
111     BidiEmbedding(UCharDirection direction, BidiEmbeddingSource source)
112         : direction(direction)
113         , source(source)
114     {
115     }
116
117     UCharDirection direction;
118     BidiEmbeddingSource source;
119 };
120
121 inline bool operator==(const BidiStatus& status1, const BidiStatus& status2)
122 {
123     return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context);
124 }
125
126 inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2)
127 {
128     return !(status1 == status2);
129 }
130
131 struct BidiCharacterRun {
132     WTF_MAKE_FAST_ALLOCATED;
133 public:
134     BidiCharacterRun(unsigned start, unsigned stop, BidiContext* context, UCharDirection direction)
135         : m_start(start)
136         , m_stop(stop)
137         , m_override(context->override())
138     {
139         if (direction == U_OTHER_NEUTRAL)
140             direction = context->dir();
141
142         m_level = context->level();
143
144         // add level of run (cases I1 & I2)
145         if (m_level % 2) {
146             if (direction == U_LEFT_TO_RIGHT || direction == U_ARABIC_NUMBER || direction == U_EUROPEAN_NUMBER)
147                 m_level++;
148         } else {
149             if (direction == U_RIGHT_TO_LEFT)
150                 m_level++;
151             else if (direction == U_ARABIC_NUMBER || direction == U_EUROPEAN_NUMBER)
152                 m_level += 2;
153         }
154     }
155
156     ~BidiCharacterRun()
157     {
158         // Delete the linked list in a loop to prevent destructor recursion.
159         auto next = WTFMove(m_next);
160         while (next)
161             next = WTFMove(next->m_next);
162     }
163
164     unsigned start() const { return m_start; }
165     unsigned stop() const { return m_stop; }
166     unsigned char level() const { return m_level; }
167     bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; }
168     bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; }
169
170     BidiCharacterRun* next() const { return m_next.get(); }
171     std::unique_ptr<BidiCharacterRun> takeNext() { return WTFMove(m_next); }
172     void setNext(std::unique_ptr<BidiCharacterRun>&& next) { m_next = WTFMove(next); }
173
174 private:
175     std::unique_ptr<BidiCharacterRun> m_next;
176
177 public:
178     unsigned m_start;
179     unsigned m_stop;
180     unsigned char m_level;
181     bool m_override : 1;
182     bool m_hasHyphen : 1; // Used by BidiRun subclass which is a layering violation but enables us to save 8 bytes per object on 64-bit.
183 };
184
185 enum VisualDirectionOverride {
186     NoVisualOverride,
187     VisualLeftToRightOverride,
188     VisualRightToLeftOverride
189 };
190
191 // BidiResolver is WebKit's implementation of the Unicode Bidi Algorithm
192 // http://unicode.org/reports/tr9
193 template<typename Iterator, typename Run, typename DerivedClass> class BidiResolverBase {
194     WTF_MAKE_NONCOPYABLE(BidiResolverBase);
195 public:
196     const Iterator& position() const { return m_current; }
197     void setPositionIgnoringNestedIsolates(const Iterator& position) { m_current = position; }
198     void setPosition(const Iterator& position, unsigned nestedIsolatedCount)
199     {
200         m_current = position;
201         m_nestedIsolateCount = nestedIsolatedCount;
202     }
203
204     void increment() { static_cast<DerivedClass&>(*this).incrementInternal(); }
205
206     BidiContext* context() const { return m_status.context.get(); }
207     void setContext(RefPtr<BidiContext>&& context) { m_status.context = WTFMove(context); }
208
209     void setLastDir(UCharDirection lastDir) { m_status.last = lastDir; }
210     void setLastStrongDir(UCharDirection lastStrongDir) { m_status.lastStrong = lastStrongDir; }
211     void setEorDir(UCharDirection eorDir) { m_status.eor = eorDir; }
212
213     UCharDirection dir() const { return m_direction; }
214     void setDir(UCharDirection direction) { m_direction = direction; }
215
216     const BidiStatus& status() const { return m_status; }
217     void setStatus(BidiStatus status) { m_status = status; }
218
219     WhitespaceCollapsingState<Iterator>& whitespaceCollapsingState() { return m_whitespaceCollapsingState; }
220
221     // The current algorithm handles nested isolates one layer of nesting at a time.
222     // But when we layout each isolated span, we will walk into (and ignore) all
223     // child isolated spans.
224     void enterIsolate() { m_nestedIsolateCount++; }
225     void exitIsolate() { ASSERT(m_nestedIsolateCount >= 1); m_nestedIsolateCount--; }
226     bool inIsolate() const { return m_nestedIsolateCount; }
227
228     void embed(UCharDirection, BidiEmbeddingSource);
229     bool commitExplicitEmbedding();
230
231     void createBidiRunsForLine(const Iterator& end, VisualDirectionOverride = NoVisualOverride, bool hardLineBreak = false);
232
233     BidiRunList<Run>& runs() { return m_runs; }
234
235     // FIXME: This used to be part of deleteRuns() but was a layering violation.
236     // It's unclear if this is still needed.
237     void markCurrentRunEmpty() { m_emptyRun = true; }
238
239     void setWhitespaceCollapsingTransitionForIsolatedRun(Run&, size_t);
240     unsigned whitespaceCollapsingTransitionForIsolatedRun(Run&);
241
242 protected:
243     BidiResolverBase() = default;
244
245     // FIXME: Instead of InlineBidiResolvers subclassing this method, we should
246     // pass in some sort of Traits object which knows how to create runs for appending.
247     void appendRun() { static_cast<DerivedClass&>(*this).appendRunInternal(); }
248     bool needsContinuePastEnd() const { return static_cast<const DerivedClass&>(*this).needsContinuePastEndInternal(); }
249
250     Iterator m_current;
251     // sor and eor are "start of run" and "end of run" respectively and correpond
252     // to abreviations used in UBA spec: http://unicode.org/reports/tr9/#BD7
253     Iterator m_sor; // Points to the first character in the current run.
254     Iterator m_eor; // Points to the last character in the current run.
255     Iterator m_last;
256     BidiStatus m_status;
257     UCharDirection m_direction { U_OTHER_NEUTRAL };
258     Iterator endOfLine;
259     bool m_reachedEndOfLine { false };
260     Iterator m_lastBeforeET; // Before a U_EUROPEAN_NUMBER_TERMINATOR
261     bool m_emptyRun { true };
262
263     // FIXME: This should not belong to the resolver, but rather be passed
264     // into createBidiRunsForLine by the caller.
265     BidiRunList<Run> m_runs;
266
267     WhitespaceCollapsingState<Iterator> m_whitespaceCollapsingState;
268
269     unsigned m_nestedIsolateCount { 0 };
270     HashMap<Run*, unsigned> m_whitespaceCollapsingTransitionForIsolatedRun;
271
272 private:
273     void raiseExplicitEmbeddingLevel(UCharDirection from, UCharDirection to);
274     void lowerExplicitEmbeddingLevel(UCharDirection from);
275     void checkDirectionInLowerRaiseEmbeddingLevel();
276
277     void updateStatusLastFromCurrentDirection(UCharDirection);
278     void reorderRunsFromLevels();
279     void incrementInternal() { m_current.increment(); }
280     void appendRunInternal();
281     bool needsContinuePastEndInternal() const { return true; }
282
283     Vector<BidiEmbedding, 8> m_currentExplicitEmbeddingSequence;
284 };
285
286 template<typename Iterator, typename Run>
287 class BidiResolver : public BidiResolverBase<Iterator, Run, BidiResolver<Iterator, Run>> {
288 };
289
290 template<typename Iterator, typename Run, typename IsolateRun>
291 class BidiResolverWithIsolate : public BidiResolverBase<Iterator, Run, BidiResolverWithIsolate<Iterator, Run, IsolateRun>> {
292 public:
293     ~BidiResolverWithIsolate();
294
295     void incrementInternal();
296     void appendRunInternal();
297     bool needsContinuePastEndInternal() const;
298     Vector<IsolateRun>& isolatedRuns() { return m_isolatedRuns; }
299
300 private:
301     Vector<IsolateRun> m_isolatedRuns;
302 };
303
304 template<typename Iterator, typename Run, typename IsolateRun>
305 inline BidiResolverWithIsolate<Iterator, Run, IsolateRun>::~BidiResolverWithIsolate()
306 {
307     // The owner of this resolver should have handled the isolated runs.
308     ASSERT(m_isolatedRuns.isEmpty());
309 }
310
311 template<typename Iterator, typename Run, typename DerivedClass>
312 void BidiResolverBase<Iterator, Run, DerivedClass>::appendRunInternal()
313 {
314     if (!m_emptyRun && !m_eor.atEnd()) {
315         unsigned startOffset = m_sor.offset();
316         unsigned endOffset = m_eor.offset();
317
318         if (!endOfLine.atEnd() && endOffset >= endOfLine.offset()) {
319             m_reachedEndOfLine = true;
320             endOffset = endOfLine.offset();
321         }
322
323         if (endOffset >= startOffset)
324             m_runs.appendRun(std::make_unique<Run>(startOffset, endOffset + 1, context(), m_direction));
325
326         m_eor.increment();
327         m_sor = m_eor;
328     }
329
330     m_direction = U_OTHER_NEUTRAL;
331     m_status.eor = U_OTHER_NEUTRAL;
332 }
333
334 template<typename Iterator, typename Run, typename DerivedClass>
335 void BidiResolverBase<Iterator, Run, DerivedClass>::embed(UCharDirection dir, BidiEmbeddingSource source)
336 {
337     // Isolated spans compute base directionality during their own UBA run.
338     // Do not insert fake embed characters once we enter an isolated span.
339     ASSERT(!inIsolate());
340
341     ASSERT(dir == U_POP_DIRECTIONAL_FORMAT || dir == U_LEFT_TO_RIGHT_EMBEDDING || dir == U_LEFT_TO_RIGHT_OVERRIDE || dir == U_RIGHT_TO_LEFT_EMBEDDING || dir == U_RIGHT_TO_LEFT_OVERRIDE);
342     m_currentExplicitEmbeddingSequence.append(BidiEmbedding(dir, source));
343 }
344
345 template<typename Iterator, typename Run, typename DerivedClass>
346 void BidiResolverBase<Iterator, Run, DerivedClass>::checkDirectionInLowerRaiseEmbeddingLevel()
347 {
348     ASSERT(m_status.eor != U_OTHER_NEUTRAL || m_eor.atEnd());
349     ASSERT(m_status.last != U_DIR_NON_SPACING_MARK
350         && m_status.last != U_BOUNDARY_NEUTRAL
351         && m_status.last != U_RIGHT_TO_LEFT_EMBEDDING
352         && m_status.last != U_LEFT_TO_RIGHT_EMBEDDING
353         && m_status.last != U_RIGHT_TO_LEFT_OVERRIDE 
354         && m_status.last != U_LEFT_TO_RIGHT_OVERRIDE
355         && m_status.last != U_POP_DIRECTIONAL_FORMAT);
356     if (m_direction == U_OTHER_NEUTRAL)
357         m_direction = m_status.lastStrong == U_LEFT_TO_RIGHT ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT;
358 }
359
360 template<typename Iterator, typename Run, typename DerivedClass>
361 void BidiResolverBase<Iterator, Run, DerivedClass>::lowerExplicitEmbeddingLevel(UCharDirection from)
362 {
363     if (!m_emptyRun && m_eor != m_last) {
364         checkDirectionInLowerRaiseEmbeddingLevel();
365         // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
366         if (from == U_LEFT_TO_RIGHT) {
367             // bidi.sor ... bidi.eor ... bidi.last L
368             if (m_status.eor == U_EUROPEAN_NUMBER) {
369                 if (m_status.lastStrong != U_LEFT_TO_RIGHT) {
370                     m_direction = U_EUROPEAN_NUMBER;
371                     appendRun();
372                 }
373             } else if (m_status.eor == U_ARABIC_NUMBER) {
374                 m_direction = U_ARABIC_NUMBER;
375                 appendRun();
376             } else if (m_status.lastStrong != U_LEFT_TO_RIGHT) {
377                 appendRun();
378                 m_direction = U_LEFT_TO_RIGHT;
379             }
380         } else if (m_status.eor == U_EUROPEAN_NUMBER || m_status.eor == U_ARABIC_NUMBER || m_status.lastStrong == U_LEFT_TO_RIGHT) {
381             appendRun();
382             m_direction = U_RIGHT_TO_LEFT;
383         }
384         m_eor = m_last;
385     }
386
387     appendRun();
388     m_emptyRun = true;
389
390     // sor for the new run is determined by the higher level (rule X10)
391     setLastDir(from);
392     setLastStrongDir(from);
393     m_eor = Iterator();
394 }
395
396 template<typename Iterator, typename Run, typename DerivedClass>
397 void BidiResolverBase<Iterator, Run, DerivedClass>::raiseExplicitEmbeddingLevel(UCharDirection from, UCharDirection to)
398 {
399     if (!m_emptyRun && m_eor != m_last) {
400         checkDirectionInLowerRaiseEmbeddingLevel();
401         // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
402         if (to == U_LEFT_TO_RIGHT) {
403             // bidi.sor ... bidi.eor ... bidi.last L
404             if (m_status.eor == U_EUROPEAN_NUMBER) {
405                 if (m_status.lastStrong != U_LEFT_TO_RIGHT) {
406                     m_direction = U_EUROPEAN_NUMBER;
407                     appendRun();
408                 }
409             } else if (m_status.eor == U_ARABIC_NUMBER) {
410                 m_direction = U_ARABIC_NUMBER;
411                 appendRun();
412             } else if (m_status.lastStrong != U_LEFT_TO_RIGHT && from == U_LEFT_TO_RIGHT) {
413                 appendRun();
414                 m_direction = U_LEFT_TO_RIGHT;
415             }
416         } else if (m_status.eor == U_ARABIC_NUMBER
417             || (m_status.eor == U_EUROPEAN_NUMBER && (m_status.lastStrong != U_LEFT_TO_RIGHT || from == U_RIGHT_TO_LEFT))
418             || (m_status.eor != U_EUROPEAN_NUMBER && m_status.lastStrong == U_LEFT_TO_RIGHT && from == U_RIGHT_TO_LEFT)) {
419             appendRun();
420             m_direction = U_RIGHT_TO_LEFT;
421         }
422         m_eor = m_last;
423     }
424
425     appendRun();
426     m_emptyRun = true;
427
428     setLastDir(to);
429     setLastStrongDir(to);
430     m_eor = Iterator();
431 }
432
433 template<typename Iterator, typename Run, typename DerivedClass>
434 bool BidiResolverBase<Iterator, Run, DerivedClass>::commitExplicitEmbedding()
435 {
436     // When we're "inIsolate()" we're resolving the parent context which
437     // ignores (skips over) the isolated content, including embedding levels.
438     // We should never accrue embedding levels while skipping over isolated content.
439     ASSERT(!inIsolate() || m_currentExplicitEmbeddingSequence.isEmpty());
440
441     auto fromLevel = context()->level();
442     RefPtr<BidiContext> toContext = context();
443
444     for (auto& embedding : m_currentExplicitEmbeddingSequence) {
445         if (embedding.direction == U_POP_DIRECTIONAL_FORMAT) {
446             if (auto* parentContext = toContext->parent())
447                 toContext = parentContext;
448         } else {
449             UCharDirection direction = (embedding.direction == U_RIGHT_TO_LEFT_EMBEDDING || embedding.direction == U_RIGHT_TO_LEFT_OVERRIDE) ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT;
450             bool override = embedding.direction == U_LEFT_TO_RIGHT_OVERRIDE || embedding.direction == U_RIGHT_TO_LEFT_OVERRIDE;
451             unsigned char level = toContext->level();
452             if (direction == U_RIGHT_TO_LEFT)
453                 level = nextGreaterOddLevel(level);
454             else
455                 level = nextGreaterEvenLevel(level);
456             if (level < 61)
457                 toContext = BidiContext::create(level, direction, override, embedding.source, toContext.get());
458         }
459     }
460
461     auto toLevel = toContext->level();
462
463     if (toLevel > fromLevel)
464         raiseExplicitEmbeddingLevel(fromLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT, toLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT);
465     else if (toLevel < fromLevel)
466         lowerExplicitEmbeddingLevel(fromLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT);
467
468     setContext(WTFMove(toContext));
469
470     m_currentExplicitEmbeddingSequence.clear();
471
472     return fromLevel != toLevel;
473 }
474
475 template<typename Iterator, typename Run, typename DerivedClass>
476 inline void BidiResolverBase<Iterator, Run, DerivedClass>::updateStatusLastFromCurrentDirection(UCharDirection dirCurrent)
477 {
478     switch (dirCurrent) {
479     case U_EUROPEAN_NUMBER_TERMINATOR:
480         if (m_status.last != U_EUROPEAN_NUMBER)
481             m_status.last = U_EUROPEAN_NUMBER_TERMINATOR;
482         break;
483     case U_EUROPEAN_NUMBER_SEPARATOR:
484     case U_COMMON_NUMBER_SEPARATOR:
485     case U_SEGMENT_SEPARATOR:
486     case U_WHITE_SPACE_NEUTRAL:
487     case U_OTHER_NEUTRAL:
488         switch (m_status.last) {
489         case U_LEFT_TO_RIGHT:
490         case U_RIGHT_TO_LEFT:
491         case U_RIGHT_TO_LEFT_ARABIC:
492         case U_EUROPEAN_NUMBER:
493         case U_ARABIC_NUMBER:
494             m_status.last = dirCurrent;
495             break;
496         default:
497             m_status.last = U_OTHER_NEUTRAL;
498         }
499         break;
500     case U_DIR_NON_SPACING_MARK:
501     case U_BOUNDARY_NEUTRAL:
502     case U_RIGHT_TO_LEFT_EMBEDDING:
503     case U_LEFT_TO_RIGHT_EMBEDDING:
504     case U_RIGHT_TO_LEFT_OVERRIDE:
505     case U_LEFT_TO_RIGHT_OVERRIDE:
506     case U_POP_DIRECTIONAL_FORMAT:
507         // ignore these
508         break;
509     case U_EUROPEAN_NUMBER:
510         FALLTHROUGH;
511     default:
512         m_status.last = dirCurrent;
513     }
514 }
515
516 template<typename Iterator, typename Run, typename DerivedClass>
517 inline void BidiResolverBase<Iterator, Run, DerivedClass>::reorderRunsFromLevels()
518 {
519     unsigned char levelLow = 128;
520     unsigned char levelHigh = 0;
521     for (Run* run = m_runs.firstRun(); run; run = run->next()) {
522         levelHigh = std::max(run->level(), levelHigh);
523         levelLow = std::min(run->level(), levelLow);
524     }
525
526     // This implements reordering of the line (L2 according to Bidi spec):
527     // http://unicode.org/reports/tr9/#L2
528     // L2. From the highest level found in the text to the lowest odd level on each line,
529     // reverse any contiguous sequence of characters that are at that level or higher.
530
531     // Reversing is only done up to the lowest odd level.
532     if (!(levelLow % 2))
533         levelLow++;
534
535     unsigned count = m_runs.runCount() - 1;
536
537     while (levelHigh >= levelLow) {
538         unsigned i = 0;
539         Run* run = m_runs.firstRun();
540         while (i < count) {
541             for (;i < count && run && run->level() < levelHigh; i++)
542                 run = run->next();
543             unsigned start = i;
544             for (;i <= count && run && run->level() >= levelHigh; i++)
545                 run = run->next();
546             unsigned end = i - 1;
547             m_runs.reverseRuns(start, end);
548         }
549         levelHigh--;
550     }
551 }
552
553 template<typename Iterator, typename Run, typename DerivedClass>
554 void BidiResolverBase<Iterator, Run, DerivedClass>::createBidiRunsForLine(const Iterator& end, VisualDirectionOverride override, bool hardLineBreak)
555 {
556     ASSERT(m_direction == U_OTHER_NEUTRAL);
557
558     if (override != NoVisualOverride) {
559         m_emptyRun = false;
560         m_sor = m_current;
561         m_eor = Iterator();
562         while (m_current != end && !m_current.atEnd()) {
563             m_eor = m_current;
564             increment();
565         }
566         m_direction = override == VisualLeftToRightOverride ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT;
567         appendRun();
568         m_runs.setLogicallyLastRun(m_runs.lastRun());
569         if (override == VisualRightToLeftOverride && m_runs.runCount())
570             m_runs.reverseRuns(0, m_runs.runCount() - 1);
571         return;
572     }
573
574     m_emptyRun = true;
575
576     m_eor = Iterator();
577
578     m_last = m_current;
579     bool pastEnd = false;
580     BidiResolverBase<Iterator, Run, DerivedClass> stateAtEnd;
581
582     while (true) {
583         UCharDirection dirCurrent;
584         if (pastEnd && (hardLineBreak || m_current.atEnd())) {
585             BidiContext* c = context();
586             if (hardLineBreak) {
587                 // A deviation from the Unicode Bidi Algorithm in order to match
588                 // WinIE and user expectations: hard line breaks reset bidi state
589                 // coming from unicode bidi control characters, but not those from
590                 // DOM nodes with specified directionality
591                 stateAtEnd.setContext(c->copyStackRemovingUnicodeEmbeddingContexts());
592
593                 dirCurrent = stateAtEnd.context()->dir();
594                 stateAtEnd.setEorDir(dirCurrent);
595                 stateAtEnd.setLastDir(dirCurrent);
596                 stateAtEnd.setLastStrongDir(dirCurrent);
597             } else {
598                 while (c->parent())
599                     c = c->parent();
600                 dirCurrent = c->dir();
601             }
602         } else {
603             dirCurrent = m_current.direction();
604             if (context()->override()
605                     && dirCurrent != U_RIGHT_TO_LEFT_EMBEDDING
606                     && dirCurrent != U_LEFT_TO_RIGHT_EMBEDDING
607                     && dirCurrent != U_RIGHT_TO_LEFT_OVERRIDE
608                     && dirCurrent != U_LEFT_TO_RIGHT_OVERRIDE
609                     && dirCurrent != U_POP_DIRECTIONAL_FORMAT)
610                 dirCurrent = context()->dir();
611             else if (dirCurrent == U_DIR_NON_SPACING_MARK)
612                 dirCurrent = m_status.last;
613         }
614
615 #if PLATFORM(WIN)
616         // Our Windows build hasn't updated its headers from ICU 6.1, which doesn't have these symbols.
617         const UCharDirection U_FIRST_STRONG_ISOLATE = static_cast<UCharDirection>(19);
618         const UCharDirection U_LEFT_TO_RIGHT_ISOLATE = static_cast<UCharDirection>(20);
619         const UCharDirection U_RIGHT_TO_LEFT_ISOLATE = static_cast<UCharDirection>(21);
620         const UCharDirection U_POP_DIRECTIONAL_ISOLATE = static_cast<UCharDirection>(22);
621 #endif
622         // We ignore all character directionality while in unicode-bidi: isolate spans.
623         // We'll handle ordering the isolated characters in a second pass.
624         if (inIsolate() || dirCurrent == U_FIRST_STRONG_ISOLATE || dirCurrent == U_LEFT_TO_RIGHT_ISOLATE || dirCurrent == U_RIGHT_TO_LEFT_ISOLATE || dirCurrent == U_POP_DIRECTIONAL_ISOLATE)
625             dirCurrent = U_OTHER_NEUTRAL;
626
627         ASSERT(m_status.eor != U_OTHER_NEUTRAL || m_eor.atEnd());
628         switch (dirCurrent) {
629
630         // embedding and overrides (X1-X9 in the Bidi specs)
631         case U_RIGHT_TO_LEFT_EMBEDDING:
632         case U_LEFT_TO_RIGHT_EMBEDDING:
633         case U_RIGHT_TO_LEFT_OVERRIDE:
634         case U_LEFT_TO_RIGHT_OVERRIDE:
635         case U_POP_DIRECTIONAL_FORMAT:
636             embed(dirCurrent, FromUnicode);
637             commitExplicitEmbedding();
638             break;
639
640         // strong types
641         case U_LEFT_TO_RIGHT:
642             switch(m_status.last) {
643                 case U_RIGHT_TO_LEFT:
644                 case U_RIGHT_TO_LEFT_ARABIC:
645                 case U_EUROPEAN_NUMBER:
646                 case U_ARABIC_NUMBER:
647                     if (m_status.last != U_EUROPEAN_NUMBER || m_status.lastStrong != U_LEFT_TO_RIGHT)
648                         appendRun();
649                     break;
650                 case U_LEFT_TO_RIGHT:
651                     break;
652                 case U_EUROPEAN_NUMBER_SEPARATOR:
653                 case U_EUROPEAN_NUMBER_TERMINATOR:
654                 case U_COMMON_NUMBER_SEPARATOR:
655                 case U_BOUNDARY_NEUTRAL:
656                 case U_BLOCK_SEPARATOR:
657                 case U_SEGMENT_SEPARATOR:
658                 case U_WHITE_SPACE_NEUTRAL:
659                 case U_OTHER_NEUTRAL:
660                     if (m_status.eor == U_EUROPEAN_NUMBER) {
661                         if (m_status.lastStrong != U_LEFT_TO_RIGHT) {
662                             // the numbers need to be on a higher embedding level, so let's close that run
663                             m_direction = U_EUROPEAN_NUMBER;
664                             appendRun();
665                             if (context()->dir() != U_LEFT_TO_RIGHT) {
666                                 // the neutrals take the embedding direction, which is R
667                                 m_eor = m_last;
668                                 m_direction = U_RIGHT_TO_LEFT;
669                                 appendRun();
670                             }
671                         }
672                     } else if (m_status.eor == U_ARABIC_NUMBER) {
673                         // Arabic numbers are always on a higher embedding level, so let's close that run
674                         m_direction = U_ARABIC_NUMBER;
675                         appendRun();
676                         if (context()->dir() != U_LEFT_TO_RIGHT) {
677                             // the neutrals take the embedding direction, which is R
678                             m_eor = m_last;
679                             m_direction = U_RIGHT_TO_LEFT;
680                             appendRun();
681                         }
682                     } else if (m_status.lastStrong != U_LEFT_TO_RIGHT) {
683                         //last stuff takes embedding dir
684                         if (context()->dir() == U_RIGHT_TO_LEFT) {
685                             m_eor = m_last; 
686                             m_direction = U_RIGHT_TO_LEFT;
687                         }
688                         appendRun();
689                     }
690                     break;
691                 default:
692                     break;
693             }
694             m_eor = m_current;
695             m_status.eor = U_LEFT_TO_RIGHT;
696             m_status.lastStrong = U_LEFT_TO_RIGHT;
697             m_direction = U_LEFT_TO_RIGHT;
698             break;
699         case U_RIGHT_TO_LEFT_ARABIC:
700         case U_RIGHT_TO_LEFT:
701             switch (m_status.last) {
702                 case U_LEFT_TO_RIGHT:
703                 case U_EUROPEAN_NUMBER:
704                 case U_ARABIC_NUMBER:
705                     appendRun();
706                     FALLTHROUGH;
707                 case U_RIGHT_TO_LEFT:
708                 case U_RIGHT_TO_LEFT_ARABIC:
709                     break;
710                 case U_EUROPEAN_NUMBER_SEPARATOR:
711                 case U_EUROPEAN_NUMBER_TERMINATOR:
712                 case U_COMMON_NUMBER_SEPARATOR:
713                 case U_BOUNDARY_NEUTRAL:
714                 case U_BLOCK_SEPARATOR:
715                 case U_SEGMENT_SEPARATOR:
716                 case U_WHITE_SPACE_NEUTRAL:
717                 case U_OTHER_NEUTRAL:
718                     if (m_status.eor == U_EUROPEAN_NUMBER) {
719                         if (m_status.lastStrong == U_LEFT_TO_RIGHT && context()->dir() == U_LEFT_TO_RIGHT)
720                             m_eor = m_last;
721                         appendRun();
722                     } else if (m_status.eor == U_ARABIC_NUMBER)
723                         appendRun();
724                     else if (m_status.lastStrong == U_LEFT_TO_RIGHT) {
725                         if (context()->dir() == U_LEFT_TO_RIGHT)
726                             m_eor = m_last;
727                         appendRun();
728                     }
729                     break;
730                 default:
731                     break;
732             }
733             m_eor = m_current;
734             m_status.eor = U_RIGHT_TO_LEFT;
735             m_status.lastStrong = dirCurrent;
736             m_direction = U_RIGHT_TO_LEFT;
737             break;
738
739             // weak types:
740
741         case U_EUROPEAN_NUMBER:
742             if (m_status.lastStrong != U_RIGHT_TO_LEFT_ARABIC) {
743                 // if last strong was AL change EN to AN
744                 switch (m_status.last) {
745                     case U_EUROPEAN_NUMBER:
746                     case U_LEFT_TO_RIGHT:
747                         break;
748                     case U_RIGHT_TO_LEFT:
749                     case U_RIGHT_TO_LEFT_ARABIC:
750                     case U_ARABIC_NUMBER:
751                         m_eor = m_last;
752                         appendRun();
753                         m_direction = U_EUROPEAN_NUMBER;
754                         break;
755                     case U_EUROPEAN_NUMBER_SEPARATOR:
756                     case U_COMMON_NUMBER_SEPARATOR:
757                         if (m_status.eor == U_EUROPEAN_NUMBER)
758                             break;
759                         FALLTHROUGH;
760                     case U_EUROPEAN_NUMBER_TERMINATOR:
761                     case U_BOUNDARY_NEUTRAL:
762                     case U_BLOCK_SEPARATOR:
763                     case U_SEGMENT_SEPARATOR:
764                     case U_WHITE_SPACE_NEUTRAL:
765                     case U_OTHER_NEUTRAL:
766                         if (m_status.eor == U_EUROPEAN_NUMBER) {
767                             if (m_status.lastStrong == U_RIGHT_TO_LEFT) {
768                                 // ENs on both sides behave like Rs, so the neutrals should be R.
769                                 // Terminate the EN run.
770                                 appendRun();
771                                 // Make an R run.
772                                 m_eor = m_status.last == U_EUROPEAN_NUMBER_TERMINATOR ? m_lastBeforeET : m_last;
773                                 m_direction = U_RIGHT_TO_LEFT;
774                                 appendRun();
775                                 // Begin a new EN run.
776                                 m_direction = U_EUROPEAN_NUMBER;
777                             }
778                         } else if (m_status.eor == U_ARABIC_NUMBER) {
779                             // Terminate the AN run.
780                             appendRun();
781                             if (m_status.lastStrong == U_RIGHT_TO_LEFT || context()->dir() == U_RIGHT_TO_LEFT) {
782                                 // Make an R run.
783                                 m_eor = m_status.last == U_EUROPEAN_NUMBER_TERMINATOR ? m_lastBeforeET : m_last;
784                                 m_direction = U_RIGHT_TO_LEFT;
785                                 appendRun();
786                                 // Begin a new EN run.
787                                 m_direction = U_EUROPEAN_NUMBER;
788                             }
789                         } else if (m_status.lastStrong == U_RIGHT_TO_LEFT) {
790                             // Extend the R run to include the neutrals.
791                             m_eor = m_status.last == U_EUROPEAN_NUMBER_TERMINATOR ? m_lastBeforeET : m_last;
792                             m_direction = U_RIGHT_TO_LEFT;
793                             appendRun();
794                             // Begin a new EN run.
795                             m_direction = U_EUROPEAN_NUMBER;
796                         }
797                         break;
798                     default:
799                         break;
800                 }
801                 m_eor = m_current;
802                 m_status.eor = U_EUROPEAN_NUMBER;
803                 if (m_direction == U_OTHER_NEUTRAL)
804                     m_direction = U_LEFT_TO_RIGHT;
805                 break;
806             }
807             FALLTHROUGH;
808         case U_ARABIC_NUMBER:
809             dirCurrent = U_ARABIC_NUMBER;
810             switch (m_status.last) {
811                 case U_LEFT_TO_RIGHT:
812                     if (context()->dir() == U_LEFT_TO_RIGHT)
813                         appendRun();
814                     break;
815                 case U_ARABIC_NUMBER:
816                     break;
817                 case U_RIGHT_TO_LEFT:
818                 case U_RIGHT_TO_LEFT_ARABIC:
819                 case U_EUROPEAN_NUMBER:
820                     m_eor = m_last;
821                     appendRun();
822                     break;
823                 case U_COMMON_NUMBER_SEPARATOR:
824                     if (m_status.eor == U_ARABIC_NUMBER)
825                         break;
826                     FALLTHROUGH;
827                 case U_EUROPEAN_NUMBER_SEPARATOR:
828                 case U_EUROPEAN_NUMBER_TERMINATOR:
829                 case U_BOUNDARY_NEUTRAL:
830                 case U_BLOCK_SEPARATOR:
831                 case U_SEGMENT_SEPARATOR:
832                 case U_WHITE_SPACE_NEUTRAL:
833                 case U_OTHER_NEUTRAL:
834                     if (m_status.eor == U_ARABIC_NUMBER
835                         || (m_status.eor == U_EUROPEAN_NUMBER && (m_status.lastStrong == U_RIGHT_TO_LEFT || context()->dir() == U_RIGHT_TO_LEFT))
836                         || (m_status.eor != U_EUROPEAN_NUMBER && m_status.lastStrong == U_LEFT_TO_RIGHT && context()->dir() == U_RIGHT_TO_LEFT)) {
837                         // Terminate the run before the neutrals.
838                         appendRun();
839                         // Begin an R run for the neutrals.
840                         m_direction = U_RIGHT_TO_LEFT;
841                     } else if (m_direction == U_OTHER_NEUTRAL)
842                         m_direction = m_status.lastStrong == U_LEFT_TO_RIGHT ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT;
843                     m_eor = m_last;
844                     appendRun();
845                     break;
846                 default:
847                     break;
848             }
849             m_eor = m_current;
850             m_status.eor = U_ARABIC_NUMBER;
851             if (m_direction == U_OTHER_NEUTRAL)
852                 m_direction = U_ARABIC_NUMBER;
853             break;
854         case U_EUROPEAN_NUMBER_SEPARATOR:
855         case U_COMMON_NUMBER_SEPARATOR:
856             break;
857         case U_EUROPEAN_NUMBER_TERMINATOR:
858             if (m_status.last == U_EUROPEAN_NUMBER) {
859                 dirCurrent = U_EUROPEAN_NUMBER;
860                 m_eor = m_current;
861                 m_status.eor = dirCurrent;
862             } else if (m_status.last != U_EUROPEAN_NUMBER_TERMINATOR)
863                 m_lastBeforeET = m_emptyRun ? m_eor : m_last;
864             break;
865
866         // boundary neutrals should be ignored
867         case U_BOUNDARY_NEUTRAL:
868             if (m_eor == m_last)
869                 m_eor = m_current;
870             break;
871             // neutrals
872         case U_BLOCK_SEPARATOR:
873             // FIXME: What do we do with newline and paragraph separators that come to here?
874             break;
875         case U_SEGMENT_SEPARATOR:
876             // FIXME: Implement rule L1.
877             break;
878         case U_WHITE_SPACE_NEUTRAL:
879             break;
880         case U_OTHER_NEUTRAL:
881             break;
882         default:
883             break;
884         }
885
886         if (pastEnd && (m_eor == m_current || !needsContinuePastEnd())) {
887             if (!m_reachedEndOfLine) {
888                 m_eor = endOfLine;
889                 switch (m_status.eor) {
890                     case U_LEFT_TO_RIGHT:
891                     case U_RIGHT_TO_LEFT:
892                     case U_ARABIC_NUMBER:
893                         m_direction = m_status.eor;
894                         break;
895                     case U_EUROPEAN_NUMBER:
896                         m_direction = m_status.lastStrong == U_LEFT_TO_RIGHT ? U_LEFT_TO_RIGHT : U_EUROPEAN_NUMBER;
897                         break;
898                     default:
899                         ASSERT_NOT_REACHED();
900                 }
901                 appendRun();
902             }
903             m_current = end;
904             m_status = stateAtEnd.m_status;
905             m_sor = stateAtEnd.m_sor; 
906             m_eor = stateAtEnd.m_eor;
907             m_last = stateAtEnd.m_last;
908             m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
909             m_lastBeforeET = stateAtEnd.m_lastBeforeET;
910             m_emptyRun = stateAtEnd.m_emptyRun;
911             m_direction = U_OTHER_NEUTRAL;
912             break;
913         }
914
915         updateStatusLastFromCurrentDirection(dirCurrent);
916         m_last = m_current;
917
918         if (m_emptyRun) {
919             m_sor = m_current;
920             m_emptyRun = false;
921         }
922
923         increment();
924         if (!m_currentExplicitEmbeddingSequence.isEmpty()) {
925             bool committed = commitExplicitEmbedding();
926             if (committed && pastEnd) {
927                 m_current = end;
928                 m_status = stateAtEnd.m_status;
929                 m_sor = stateAtEnd.m_sor; 
930                 m_eor = stateAtEnd.m_eor;
931                 m_last = stateAtEnd.m_last;
932                 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
933                 m_lastBeforeET = stateAtEnd.m_lastBeforeET;
934                 m_emptyRun = stateAtEnd.m_emptyRun;
935                 m_direction = U_OTHER_NEUTRAL;
936                 break;
937             }
938         }
939
940         if (!pastEnd && (m_current == end || m_current.atEnd())) {
941             if (m_emptyRun)
942                 break;
943             stateAtEnd.m_status = m_status;
944             stateAtEnd.m_sor = m_sor;
945             stateAtEnd.m_eor = m_eor;
946             stateAtEnd.m_last = m_last;
947             stateAtEnd.m_reachedEndOfLine = m_reachedEndOfLine;
948             stateAtEnd.m_lastBeforeET = m_lastBeforeET;
949             stateAtEnd.m_emptyRun = m_emptyRun;
950             endOfLine = m_last;
951             pastEnd = true;
952         }
953     }
954
955     m_runs.setLogicallyLastRun(m_runs.lastRun());
956     reorderRunsFromLevels();
957     endOfLine = Iterator();
958 }
959
960 template<typename Iterator, typename Run, typename DerivedClass>
961 void BidiResolverBase<Iterator, Run, DerivedClass>::setWhitespaceCollapsingTransitionForIsolatedRun(Run& run, size_t transition)
962 {
963     ASSERT(!m_whitespaceCollapsingTransitionForIsolatedRun.contains(&run));
964     m_whitespaceCollapsingTransitionForIsolatedRun.add(&run, transition);
965 }
966
967 template<typename Iterator, typename Run, typename DerivedClass>
968 unsigned BidiResolverBase<Iterator, Run, DerivedClass>::whitespaceCollapsingTransitionForIsolatedRun(Run& run)
969 {
970     return m_whitespaceCollapsingTransitionForIsolatedRun.take(&run);
971 }
972
973 } // namespace WebCore