1 // -*- c-basic-offset: 2 -*-
3 * This file is part of the KDE libraries
4 * Copyright (C) 1999-2001 Harri Porten (porten@kde.org)
5 * Copyright (C) 2004 Apple Computer, Inc.
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #include "interpreter.h"
27 #include "operations.h"
29 #include "regexp_object.h"
30 #include "string_object.h"
31 #include "error_object.h"
33 #include "string_object.lut.h"
37 // ------------------------------ StringInstanceImp ----------------------------
39 const ClassInfo StringInstanceImp::info = {"String", 0, 0, 0};
41 StringInstanceImp::StringInstanceImp(ObjectImp *proto)
44 setInternalValue(String(""));
47 StringInstanceImp::StringInstanceImp(ObjectImp *proto, const UString &string)
50 setInternalValue(String(string));
53 Value StringInstanceImp::get(ExecState *exec, const Identifier &propertyName) const
55 if (propertyName == lengthPropertyName)
56 return Number(internalValue().toString(exec).size());
59 const unsigned index = propertyName.toArrayIndex(&ok);
61 const UString s = internalValue().toString(exec);
62 const unsigned length = s.size();
65 const UChar c = s[index];
66 return String(UString(&c, 1));
69 return ObjectImp::get(exec, propertyName);
72 void StringInstanceImp::put(ExecState *exec, const Identifier &propertyName, const Value &value, int attr)
74 if (propertyName == lengthPropertyName)
76 ObjectImp::put(exec, propertyName, value, attr);
79 bool StringInstanceImp::hasProperty(ExecState *exec, const Identifier &propertyName) const
81 if (propertyName == lengthPropertyName)
85 const unsigned index = propertyName.toArrayIndex(&ok);
87 const unsigned length = internalValue().toString(exec).size();
92 return ObjectImp::hasProperty(exec, propertyName);
95 bool StringInstanceImp::deleteProperty(ExecState *exec, const Identifier &propertyName)
97 if (propertyName == lengthPropertyName)
99 return ObjectImp::deleteProperty(exec, propertyName);
102 // ------------------------------ StringPrototypeImp ---------------------------
103 const ClassInfo StringPrototypeImp::info = {"String", &StringInstanceImp::info, &stringTable, 0};
104 /* Source for string_object.lut.h
105 @begin stringTable 26
106 toString StringProtoFuncImp::ToString DontEnum|Function 0
107 valueOf StringProtoFuncImp::ValueOf DontEnum|Function 0
108 charAt StringProtoFuncImp::CharAt DontEnum|Function 1
109 charCodeAt StringProtoFuncImp::CharCodeAt DontEnum|Function 1
110 concat StringProtoFuncImp::Concat DontEnum|Function 1
111 indexOf StringProtoFuncImp::IndexOf DontEnum|Function 1
112 lastIndexOf StringProtoFuncImp::LastIndexOf DontEnum|Function 1
113 match StringProtoFuncImp::Match DontEnum|Function 1
114 replace StringProtoFuncImp::Replace DontEnum|Function 2
115 search StringProtoFuncImp::Search DontEnum|Function 1
116 slice StringProtoFuncImp::Slice DontEnum|Function 2
117 split StringProtoFuncImp::Split DontEnum|Function 2
118 substr StringProtoFuncImp::Substr DontEnum|Function 2
119 substring StringProtoFuncImp::Substring DontEnum|Function 2
120 toLowerCase StringProtoFuncImp::ToLowerCase DontEnum|Function 0
121 toUpperCase StringProtoFuncImp::ToUpperCase DontEnum|Function 0
122 toLocaleLowerCase StringProtoFuncImp::ToLocaleLowerCase DontEnum|Function 0
123 toLocaleUpperCase StringProtoFuncImp::ToLocaleUpperCase DontEnum|Function 0
125 # Under here: html extension, should only exist if KJS_PURE_ECMA is not defined
126 # I guess we need to generate two hashtables in the .lut.h file, and use #ifdef
127 # to select the right one... TODO. #####
128 big StringProtoFuncImp::Big DontEnum|Function 0
129 small StringProtoFuncImp::Small DontEnum|Function 0
130 blink StringProtoFuncImp::Blink DontEnum|Function 0
131 bold StringProtoFuncImp::Bold DontEnum|Function 0
132 fixed StringProtoFuncImp::Fixed DontEnum|Function 0
133 italics StringProtoFuncImp::Italics DontEnum|Function 0
134 strike StringProtoFuncImp::Strike DontEnum|Function 0
135 sub StringProtoFuncImp::Sub DontEnum|Function 0
136 sup StringProtoFuncImp::Sup DontEnum|Function 0
137 fontcolor StringProtoFuncImp::Fontcolor DontEnum|Function 1
138 fontsize StringProtoFuncImp::Fontsize DontEnum|Function 1
139 anchor StringProtoFuncImp::Anchor DontEnum|Function 1
140 link StringProtoFuncImp::Link DontEnum|Function 1
144 StringPrototypeImp::StringPrototypeImp(ExecState *exec,
145 ObjectPrototypeImp *objProto)
146 : StringInstanceImp(objProto)
149 // The constructor will be added later, after StringObjectImp has been built
150 putDirect(lengthPropertyName, NumberImp::zero(), DontDelete|ReadOnly|DontEnum);
154 Value StringPrototypeImp::get(ExecState *exec, const Identifier &propertyName) const
156 return lookupGetFunction<StringProtoFuncImp, StringInstanceImp>( exec, propertyName, &stringTable, this );
159 // ------------------------------ StringProtoFuncImp ---------------------------
161 StringProtoFuncImp::StringProtoFuncImp(ExecState *exec, int i, int len)
162 : InternalFunctionImp(
163 static_cast<FunctionPrototypeImp*>(exec->lexicalInterpreter()->builtinFunctionPrototype().imp())
167 putDirect(lengthPropertyName, len, DontDelete|ReadOnly|DontEnum);
170 bool StringProtoFuncImp::implementsCall() const
175 static inline bool regExpIsGlobal(RegExpImp *regExp, ExecState *exec)
177 Value globalProperty = regExp->get(exec,"global");
178 return globalProperty.type() != UndefinedType && globalProperty.toBoolean(exec);
181 static inline void expandSourceRanges(UString::Range * & array, int& count, int& capacity)
187 newCapacity = capacity * 2;
190 UString::Range *newArray = new UString::Range[newCapacity];
191 for (int i = 0; i < count; i++) {
192 newArray[i] = array[i];
197 capacity = newCapacity;
201 static void pushSourceRange(UString::Range * & array, int& count, int& capacity, UString::Range range)
203 if (count + 1 > capacity)
204 expandSourceRanges(array, count, capacity);
206 array[count] = range;
210 static inline void expandReplacements(UString * & array, int& count, int& capacity)
216 newCapacity = capacity * 2;
219 UString *newArray = new UString[newCapacity];
220 for (int i = 0; i < count; i++) {
221 newArray[i] = array[i];
226 capacity = newCapacity;
230 static void pushReplacement(UString * & array, int& count, int& capacity, UString replacement)
232 if (count + 1 > capacity)
233 expandReplacements(array, count, capacity);
235 array[count] = replacement;
239 static inline UString substituteBackreferences(const UString &replacement, const UString &source, int **ovector, RegExp *reg)
241 UString substitutedReplacement = replacement;
245 for (int i = 0; (i = substitutedReplacement.find(UString("$"), i)) != -1; i++) {
246 if (i+1 < substitutedReplacement.size() && substitutedReplacement[i+1] == '$') { // "$$" -> "$"
247 substitutedReplacement = substitutedReplacement.substr(0,i) + "$" + substitutedReplacement.substr(i+2);
250 // Assume number part is one char exactly
251 unsigned long backrefIndex = substitutedReplacement.substr(i+1,1).toULong(&converted, false /* tolerate empty string */);
252 if (converted && backrefIndex <= (unsigned)reg->subPatterns()) {
253 int backrefStart = (*ovector)[2*backrefIndex];
254 int backrefLength = (*ovector)[2*backrefIndex+1] - backrefStart;
255 substitutedReplacement = substitutedReplacement.substr(0,i)
256 + source.substr(backrefStart, backrefLength)
257 + substitutedReplacement.substr(i+2);
258 i += backrefLength - 1; // -1 offsets i++
262 return substitutedReplacement;
265 static Value replace(ExecState *exec, const UString &source, const Value &pattern, const Value &replacement)
267 if (pattern.type() == ObjectType && pattern.toObject(exec).inherits(&RegExpImp::info)) {
268 RegExpImp* imp = static_cast<RegExpImp *>( pattern.toObject(exec).imp() );
269 RegExp *reg = imp->regExp();
270 bool global = regExpIsGlobal(imp, exec);
272 RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->lexicalInterpreter()->builtinRegExp().imp());
274 UString replacementString = replacement.toString(exec);
278 int startPosition = 0;
280 UString::Range *sourceRanges = 0;
281 int sourceRangeCount = 0;
282 int sourceRangeCapacity = 0;
283 UString *replacements = 0;
284 int replacementCount = 0;
285 int replacementCapacity = 0;
287 // This is either a loop (if global is set) or a one-way (if not).
289 int **ovector = regExpObj->registerRegexp( reg, source );
290 UString matchString = reg->match(source, startPosition, &matchIndex, ovector);
291 regExpObj->setSubPatterns(reg->subPatterns());
292 if (matchIndex == -1)
294 int matchLen = matchString.size();
296 pushSourceRange(sourceRanges, sourceRangeCount, sourceRangeCapacity, UString::Range(lastIndex, matchIndex - lastIndex));
298 UString substitutedReplacement = substituteBackreferences(replacementString, source, ovector, reg);
299 pushReplacement(replacements, replacementCount, replacementCapacity, substitutedReplacement);
301 lastIndex = matchIndex + matchLen;
302 startPosition = lastIndex;
304 // special case of empty match
307 if (startPosition > source.size())
312 if (lastIndex < source.size())
313 pushSourceRange(sourceRanges, sourceRangeCount, sourceRangeCapacity, UString::Range(lastIndex, source.size() - lastIndex));
315 UString result = source.spliceSubstringsWithSeparators(sourceRanges, sourceRangeCount, replacements, replacementCount);
317 delete [] sourceRanges;
318 delete [] replacements;
320 return String(result);
321 } else { // First arg is a string
322 UString patternString = pattern.toString(exec);
323 int matchPos = source.find(patternString);
324 int matchLen = patternString.size();
325 // Do the replacement
327 return String(source);
329 return String(source.substr(0, matchPos) + replacement.toString(exec) + source.substr(matchPos + matchLen));
334 // ECMA 15.5.4.2 - 15.5.4.20
335 Value StringProtoFuncImp::call(ExecState *exec, Object &thisObj, const List &args)
339 // toString and valueOf are no generic function.
340 if (id == ToString || id == ValueOf) {
341 if (thisObj.isNull() || !thisObj.inherits(&StringInstanceImp::info)) {
342 Object err = Error::create(exec,TypeError);
343 exec->setException(err);
347 return String(thisObj.internalValue().toString(exec));
355 UString s = thisObj.toString(exec);
367 // Other browsers treat an omitted parameter as 0 rather than NaN.
368 // That doesn't match the ECMA standard, but is needed for site compatibility.
369 dpos = a0.isA(UndefinedType) ? 0 : a0.toInteger(exec);
370 if (dpos >= 0 && dpos < len) // false for NaN
371 u = s.substr(static_cast<int>(dpos), 1);
377 // Other browsers treat an omitted parameter as 0 rather than NaN.
378 // That doesn't match the ECMA standard, but is needed for site compatibility.
379 dpos = a0.isA(UndefinedType) ? 0 : a0.toInteger(exec);
380 if (dpos >= 0 && dpos < len) // false for NaN
381 result = Number(s[static_cast<int>(dpos)].unicode());
383 result = Number(NaN);
386 ListIterator it = args.begin();
387 for ( ; it != args.end() ; ++it) {
388 s += it->dispatchToString(exec);
394 u2 = a0.toString(exec);
395 if (a1.type() == UndefinedType)
398 dpos = a1.toInteger(exec);
399 if (dpos >= 0) { // false for NaN
405 result = Number(s.find(u2, static_cast<int>(dpos)));
408 u2 = a0.toString(exec);
409 d = a1.toNumber(exec);
410 if (a1.type() == UndefinedType || KJS::isNaN(d))
413 dpos = a1.toInteger(exec);
414 if (dpos >= 0) { // false for NaN
420 result = Number(s.rfind(u2, static_cast<int>(dpos)));
425 RegExp *reg, *tmpReg = 0;
427 if (a0.isA(ObjectType) && a0.toObject(exec).inherits(&RegExpImp::info))
429 imp = static_cast<RegExpImp *>( a0.toObject(exec).imp() );
434 * ECMA 15.5.4.12 String.prototype.search (regexp)
435 * If regexp is not an object whose [[Class]] property is "RegExp", it is
436 * replaced with the result of the expression new RegExp(regexp).
438 reg = tmpReg = new RegExp(a0.toString(exec), RegExp::None);
440 RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->lexicalInterpreter()->builtinRegExp().imp());
441 int **ovector = regExpObj->registerRegexp(reg, u);
442 UString mstr = reg->match(u, -1, &pos, ovector);
444 result = Number(pos);
447 if ((reg->flags() & RegExp::Global) == 0) {
448 // case without 'g' flag is handled like RegExp.prototype.exec
452 regExpObj->setSubPatterns(reg->subPatterns());
453 result = regExpObj->arrayOfMatches(exec,mstr);
456 // return array of matches
461 list.append(UndefinedImp::staticUndefined);
463 list.append(String(mstr));
465 pos += mstr.isEmpty() ? 1 : mstr.size();
467 mstr = reg->match(u, pos, &pos, ovector);
470 imp->put(exec, "lastIndex", Number(lastIndex), DontDelete|DontEnum);
471 if (list.isEmpty()) {
472 // if there are no matches at all, it's important to return
473 // Null instead of an empty array, because this matches
474 // other browsers and because Null is a false value.
477 result = exec->lexicalInterpreter()->builtinArray().construct(exec, list);
485 result = replace(exec, s, a0, a1);
487 case Slice: // http://developer.netscape.com/docs/manuals/js/client/jsref/string.htm#1194366
489 // The arg processing is very much like ArrayProtoFunc::Slice
490 double begin = args[0].toInteger(exec);
491 if (begin >= 0) { // false for NaN
496 if (!(begin >= 0)) // true for NaN
500 if (args[1].type() != UndefinedType) {
501 end = args[1].toInteger(exec);
502 if (end >= 0) { // false for NaN
507 if (!(end >= 0)) // true for NaN
511 //printf( "Slicing from %d to %d \n", begin, end );
512 result = String(s.substr(static_cast<int>(begin), static_cast<int>(end-begin)));
516 Object constructor = exec->lexicalInterpreter()->builtinArray();
517 Object res = Object::dynamicCast(constructor.construct(exec,List::empty()));
521 uint32_t limit = a1.type() == UndefinedType ? 0xFFFFFFFFU : a1.toUInt32(exec);
522 if (a0.type() == ObjectType && Object::dynamicCast(a0).inherits(&RegExpImp::info)) {
523 Object obj0 = Object::dynamicCast(a0);
524 RegExp reg(obj0.get(exec,"source").toString(exec));
525 if (u.isEmpty() && !reg.match(u, 0).isNull()) {
526 // empty string matched by regexp -> empty array
527 res.put(exec,lengthPropertyName, Number(0));
531 while (static_cast<uint32_t>(i) != limit && pos < u.size()) {
532 // TODO: back references
535 UString mstr = reg.match(u, pos, &mpos, &ovector);
536 delete [] ovector; ovector = 0L;
539 pos = mpos + (mstr.isEmpty() ? 1 : mstr.size());
540 if (mpos != p0 || !mstr.isEmpty()) {
541 res.put(exec,i, String(u.substr(p0, mpos-p0)));
542 p0 = mpos + mstr.size();
547 u2 = a0.toString(exec);
550 // empty separator matches empty string -> empty array
551 put(exec,lengthPropertyName, Number(0));
554 while (static_cast<uint32_t>(i) != limit && i < u.size()-1)
555 res.put(exec, i++, String(u.substr(p0++, 1)));
558 while (static_cast<uint32_t>(i) != limit && (pos = u.find(u2, p0)) >= 0) {
559 res.put(exec, i, String(u.substr(p0, pos-p0)));
560 p0 = pos + u2.size();
565 // add remaining string, if any
566 if (static_cast<uint32_t>(i) != limit)
567 res.put(exec, i++, String(u.substr(p0)));
568 res.put(exec,lengthPropertyName, Number(i));
572 double d = a0.toInteger(exec);
573 double d2 = a1.toInteger(exec);
574 if (!(d >= 0)) { // true for NaN
576 if (!(d >= 0)) // true for NaN
587 result = String(s.substr(static_cast<int>(d), static_cast<int>(d2)));
591 double start = a0.toNumber(exec);
592 double end = a1.toNumber(exec);
593 if (KJS::isNaN(start))
605 if (a1.type() == UndefinedType)
612 result = String(s.substr((int)start, (int)end-(int)start));
616 case ToLocaleLowerCase: // FIXME: To get this 100% right we need to detect Turkish and change I to lowercase i without a dot.
618 for (i = 0; i < len; i++)
619 u[i] = u[i].toLower();
623 case ToLocaleUpperCase: // FIXME: To get this 100% right we need to detect Turkish and change i to uppercase I with a dot.
625 for (i = 0; i < len; i++)
626 u[i] = u[i].toUpper();
629 #ifndef KJS_PURE_ECMA
631 result = String("<big>" + s + "</big>");
634 result = String("<small>" + s + "</small>");
637 result = String("<blink>" + s + "</blink>");
640 result = String("<b>" + s + "</b>");
643 result = String("<tt>" + s + "</tt>");
646 result = String("<i>" + s + "</i>");
649 result = String("<strike>" + s + "</strike>");
652 result = String("<sub>" + s + "</sub>");
655 result = String("<sup>" + s + "</sup>");
658 result = String("<font color=\"" + a0.toString(exec) + "\">" + s + "</font>");
661 result = String("<font size=\"" + a0.toString(exec) + "\">" + s + "</font>");
664 result = String("<a name=\"" + a0.toString(exec) + "\">" + s + "</a>");
667 result = String("<a href=\"" + a0.toString(exec) + "\">" + s + "</a>");
675 // ------------------------------ StringObjectImp ------------------------------
677 StringObjectImp::StringObjectImp(ExecState *exec,
678 FunctionPrototypeImp *funcProto,
679 StringPrototypeImp *stringProto)
680 : InternalFunctionImp(funcProto)
683 // ECMA 15.5.3.1 String.prototype
684 putDirect(prototypePropertyName, stringProto, DontEnum|DontDelete|ReadOnly);
686 static Identifier fromCharCode("fromCharCode");
687 putDirect(fromCharCode, new StringObjectFuncImp(exec,funcProto), DontEnum);
689 // no. of arguments for constructor
690 putDirect(lengthPropertyName, NumberImp::one(), ReadOnly|DontDelete|DontEnum);
694 bool StringObjectImp::implementsConstruct() const
700 Object StringObjectImp::construct(ExecState *exec, const List &args)
702 ObjectImp *proto = exec->lexicalInterpreter()->builtinStringPrototype().imp();
703 if (args.size() == 0)
704 return Object(new StringInstanceImp(proto));
705 return Object(new StringInstanceImp(proto, args.begin()->dispatchToString(exec)));
708 bool StringObjectImp::implementsCall() const
714 Value StringObjectImp::call(ExecState *exec, Object &/*thisObj*/, const List &args)
720 return String(v.toString(exec));
724 // ------------------------------ StringObjectFuncImp --------------------------
726 // ECMA 15.5.3.2 fromCharCode()
727 StringObjectFuncImp::StringObjectFuncImp(ExecState *exec, FunctionPrototypeImp *funcProto)
728 : InternalFunctionImp(funcProto)
731 putDirect(lengthPropertyName, NumberImp::one(), DontDelete|ReadOnly|DontEnum);
734 bool StringObjectFuncImp::implementsCall() const
739 Value StringObjectFuncImp::call(ExecState *exec, Object &/*thisObj*/, const List &args)
743 UChar *buf = new UChar[args.size()];
745 ListIterator it = args.begin();
746 while (it != args.end()) {
747 unsigned short u = it->toUInt16(exec);
751 s = UString(buf, args.size(), false);