+2011-09-12 Michael Saboff <msaboff@apple.com>
+
+ Update RegExp and related classes to use 8 bit strings when available
+ https://bugs.webkit.org/show_bug.cgi?id=67337
+
+ Modified both the Yarr interpreter and JIT to handle 8 bit subject strings.
+ The code paths are triggered by the UString::is8bit() method which currently
+ returns false. Implemented JIT changes for all current architectures.
+ Tested X86_64 and ARM v7.
+
+ This includes some code that will likely change as we complete the
+ 8 bit string changes. This includes the way the raw buffer pointers
+ are accessed as well as replacing the CharAccess class with a
+ string interator returned from UString.
+
+ Fixed build breakage in testRegExp.cpp due to globalObject construction
+ changes.
+
+ Reviewed by Gavin Barraclough.
+
+ * JavaScriptCore.exp:
+ * JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def:
+ * testRegExp.cpp:
+ (GlobalObject::finishCreation):
+ (GlobalObject::GlobalObject):
+ * assembler/ARMAssembler.cpp:
+ (JSC::ARMAssembler::baseIndexTransfer32):
+ * assembler/ARMAssembler.h:
+ * assembler/ARMv7Assembler.h:
+ (JSC::ARMv7Assembler::ubfx):
+ (JSC::ARMv7Assembler::ARMInstructionFormatter::twoWordOp12Reg40Imm3Reg4Imm20Imm5):
+ * assembler/MacroAssemblerARM.h:
+ (JSC::MacroAssemblerARM::load8):
+ (JSC::MacroAssemblerARM::branch8):
+ (JSC::MacroAssemblerARM::branch16):
+ * assembler/MacroAssemblerARMv7.h:
+ (JSC::MacroAssemblerARMv7::load8):
+ (JSC::MacroAssemblerARMv7::branch16):
+ (JSC::MacroAssemblerARMv7::branch8):
+ * assembler/MacroAssemblerMIPS.h:
+ (JSC::MacroAssemblerMIPS::load8):
+ (JSC::MacroAssemblerMIPS::branch8):
+ (JSC::MacroAssemblerMIPS::branch16):
+ * assembler/MacroAssemblerSH4.h:
+ (JSC::MacroAssemblerSH4::load8):
+ (JSC::MacroAssemblerSH4::branch8):
+ (JSC::MacroAssemblerSH4::branch16):
+ * assembler/MacroAssemblerX86Common.h:
+ (JSC::MacroAssemblerX86Common::load8):
+ (JSC::MacroAssemblerX86Common::branch16):
+ (JSC::MacroAssemblerX86Common::branch8):
+ * assembler/SH4Assembler.h:
+ (JSC::SH4Assembler::extub):
+ (JSC::SH4Assembler::printInstr):
+ * assembler/X86Assembler.h:
+ (JSC::X86Assembler::cmpw_ir):
+ (JSC::X86Assembler::movzbl_mr):
+ * runtime/RegExp.cpp:
+ (JSC::RegExp::compile):
+ (JSC::RegExp::compileIfNecessary):
+ (JSC::RegExp::match):
+ (JSC::RegExp::matchCompareWithInterpreter):
+ * runtime/RegExp.h:
+ * runtime/UString.h:
+ (JSC::UString::is8Bit):
+ * yarr/Yarr.h:
+ * yarr/YarrInterpreter.cpp:
+ (JSC::Yarr::Interpreter::CharAccess::CharAccess):
+ (JSC::Yarr::Interpreter::CharAccess::~CharAccess):
+ (JSC::Yarr::Interpreter::CharAccess::operator[]):
+ (JSC::Yarr::Interpreter::InputStream::InputStream):
+ (JSC::Yarr::Interpreter::Interpreter):
+ (JSC::Yarr::interpret):
+ * yarr/YarrJIT.cpp:
+ (JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
+ (JSC::Yarr::YarrGenerator::readCharacter):
+ (JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
+ (JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
+ (JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
+ (JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
+ (JSC::Yarr::YarrGenerator::generateCharacterClassFixed):
+ (JSC::Yarr::YarrGenerator::generateDotStarEnclosure):
+ (JSC::Yarr::YarrGenerator::YarrGenerator):
+ (JSC::Yarr::YarrGenerator::compile):
+ (JSC::Yarr::jitCompile):
+ (JSC::Yarr::execute):
+ * yarr/YarrJIT.h:
+ (JSC::Yarr::YarrCodeBlock::has8BitCode):
+ (JSC::Yarr::YarrCodeBlock::has16BitCode):
+ (JSC::Yarr::YarrCodeBlock::set8BitCode):
+ (JSC::Yarr::YarrCodeBlock::set16BitCode):
+ (JSC::Yarr::YarrCodeBlock::execute):
+ * yarr/YarrParser.h:
+ (JSC::Yarr::Parser::Parser):
+
2011-09-12 Andras Becsi <andras.becsi@nokia.com>
[Qt] Build fails after r94920 with strict compiler
__ZN3JSC4Heap7protectENS_7JSValueE
__ZN3JSC4Heap8capacityEv
__ZN3JSC4Heap9unprotectENS_7JSValueE
+__ZN3JSC4Yarr9interpretEPNS0_15BytecodePatternERKNS_7UStringEjjPi
__ZN3JSC4Yarr11YarrPatternC1ERKNS_7UStringEbbPPKc
__ZN3JSC4Yarr11byteCompileERNS0_11YarrPatternEPN3WTF20BumpPointerAllocatorE
-__ZN3JSC4Yarr9interpretEPNS0_15BytecodePatternEPKtjjPi
__ZN3JSC4callEPNS_9ExecStateENS_7JSValueENS_8CallTypeERKNS_8CallDataES2_RKNS_7ArgListE
__ZN3JSC6JSCell11getCallDataERNS_8CallDataE
__ZN3JSC6JSCell11getJSNumberEv
??0ThreadCondition@WTF@@QAE@XZ
??0UString@JSC@@QAE@PBD@Z
??0UString@JSC@@QAE@PBDI@Z
+ ??0UString@JSC@@QAE@PB_W@Z
??0UString@JSC@@QAE@PB_WI@Z
??0WTFThreadData@WTF@@QAE@XZ
??0YarrPattern@Yarr@JSC@@QAE@ABVUString@2@_N1PAPBD@Z
?initializeMainThread@WTF@@YAXXZ
?initializeThreading@JSC@@YAXXZ
?initializeThreading@WTF@@YAXXZ
- ?interpret@Yarr@JSC@@YAHPAUBytecodePattern@12@PB_WIIPAH@Z
+ ?interpret@Yarr@JSC@@YAHPAUBytecodePattern@12@ABVUString@2@IIPAH@Z
?isAccessorDescriptor@PropertyDescriptor@JSC@@QBE_NXZ
?isBusy@Heap@JSC@@QAE_NXZ
?isDataDescriptor@PropertyDescriptor@JSC@@QBE_NXZ
}
}
-void ARMAssembler::baseIndexTransfer32(bool isLoad, RegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset)
+void ARMAssembler::baseIndexTransfer32(bool isLoad, RegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset, bool bytes)
{
ARMWord op2;
+ ARMWord transferFlag = bytes ? DT_BYTE : 0;
ASSERT(scale >= 0 && scale <= 3);
op2 = lsl(index, scale);
if (offset >= 0 && offset <= 0xfff) {
add_r(ARMRegisters::S0, base, op2);
- dtr_u(isLoad, srcDst, ARMRegisters::S0, offset);
+ dtr_u(isLoad, srcDst, ARMRegisters::S0, offset | transferFlag);
return;
}
if (offset <= 0 && offset >= -0xfff) {
add_r(ARMRegisters::S0, base, op2);
- dtr_d(isLoad, srcDst, ARMRegisters::S0, -offset);
+ dtr_d(isLoad, srcDst, ARMRegisters::S0, (-offset & 0xfff) | transferFlag);
return;
}
ldr_un_imm(ARMRegisters::S0, offset);
add_r(ARMRegisters::S0, ARMRegisters::S0, op2);
- dtr_ur(isLoad, srcDst, base, ARMRegisters::S0);
+ dtr_ur(isLoad, srcDst, base, ARMRegisters::S0 | transferFlag);
}
void ARMAssembler::doubleTransfer(bool isLoad, FPRegisterID srcDst, RegisterID base, int32_t offset)
// Memory load/store helpers
void dataTransfer32(bool isLoad, RegisterID srcDst, RegisterID base, int32_t offset, bool bytes = false);
- void baseIndexTransfer32(bool isLoad, RegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset);
+ void baseIndexTransfer32(bool isLoad, RegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset, bool bytes = false);
void doubleTransfer(bool isLoad, FPRegisterID srcDst, RegisterID base, int32_t offset);
// Constant pool hnadlers
OP_MOV_imm_T3 = 0xF240,
OP_SUB_imm_T4 = 0xF2A0,
OP_MOVT = 0xF2C0,
+ OP_UBFX_T1 = 0xF3C0,
OP_NOP_T2a = 0xF3AF,
OP_LDRB_imm_T3 = 0xF810,
OP_LDRB_reg_T2 = 0xF810,
m_formatter.oneWordOp10Reg3Reg3(OP_TST_reg_T1, rm, rn);
}
+ ALWAYS_INLINE void ubfx(RegisterID rd, RegisterID rn, unsigned lsb, unsigned width)
+ {
+ ASSERT(lsb < 32);
+ ASSERT((width >= 1) && (width <= 32));
+ ASSERT((lsb + width) <= 32);
+ m_formatter.twoWordOp12Reg40Imm3Reg4Imm20Imm5(OP_UBFX_T1, rd, rn, (lsb & 0x1c) << 10, (lsb & 0x3) << 6, (width - 1) & 0x1f);
+ }
+
void vadd_F64(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
{
m_formatter.vfpOp(OP_VADD_T2, OP_VADD_T2b, true, rn, rd, rm);
m_buffer.putShort((reg2 << 12) | imm);
}
+ ALWAYS_INLINE void twoWordOp12Reg40Imm3Reg4Imm20Imm5(OpcodeID1 op, RegisterID reg1, RegisterID reg2, uint16_t imm1, uint16_t imm2, uint16_t imm3)
+ {
+ m_buffer.putShort(op | reg1);
+ m_buffer.putShort((imm1 << 12) | (reg2 << 8) | (imm2 << 6) | imm3);
+ }
+
// Formats up instructions of the pattern:
// 111111111B11aaaa:bbbb222SA2C2cccc
// Where 1s in the pattern come from op1, 2s in the pattern come from op2, S is the provided size bit.
m_assembler.dataTransfer32(true, dest, address.base, address.offset, true);
}
+ void load8(BaseIndex address, RegisterID dest)
+ {
+ m_assembler.baseIndexTransfer32(true, dest, address.base, address.index, static_cast<int>(address.scale), address.offset, true);
+ }
+
void load32(ImplicitAddress address, RegisterID dest)
{
m_assembler.dataTransfer32(true, dest, address.base, address.offset);
return branch32(cond, ARMRegisters::S1, right);
}
+ Jump branch8(RelationalCondition cond, BaseIndex left, TrustedImm32 right)
+ {
+ ASSERT(!(right.m_value & 0xFFFFFF00));
+ load8(left, ARMRegisters::S1);
+ return branch32(cond, ARMRegisters::S1, right);
+ }
+
+ Jump branch16(RelationalCondition cond, RegisterID left, TrustedImm32 right)
+ {
+ ASSERT(!(right.m_value & 0xFFFF0000));
+ right.m_value <<= 16;
+ m_assembler.mov_r(ARMRegisters::S1, left);
+ lshift32(16, ARMRegisters::S1)
+ return branch32(cond, ARMRegisters::S1, right);
+ }
+
Jump branch32(RelationalCondition cond, RegisterID left, RegisterID right, int useConstantPool = 0)
{
m_assembler.cmp_r(left, right);
load8(setupArmAddress(address), dest);
}
+ void load8(BaseIndex address, RegisterID dest)
+ {
+ load8(setupArmAddress(address), dest);
+ }
+
DataLabel32 load32WithAddressOffsetPatch(Address address, RegisterID dest)
{
DataLabel32 label = moveWithPatch(TrustedImm32(address.offset), dataTempRegister);
return branch32(cond, dataTempRegister, addressTempRegister);
}
+ Jump branch16(RelationalCondition cond, RegisterID left, TrustedImm32 right)
+ {
+ ASSERT(!(0xffff0000 & right.m_value));
+ // Extract the lower 16 bits into a temp for comparison
+ m_assembler.ubfx(dataTempRegister, left, 0, 16);
+ return branch32(cond, dataTempRegister, right);
+ }
+
Jump branch16(RelationalCondition cond, BaseIndex left, TrustedImm32 right)
{
// use addressTempRegister incase the branch32 we call uses dataTempRegister. :-/
Jump branch8(RelationalCondition cond, Address left, TrustedImm32 right)
{
+ ASSERT(!(0xffffff00 & right.m_value));
// use addressTempRegister incase the branch8 we call uses dataTempRegister. :-/
load8(left, addressTempRegister);
return branch8(cond, addressTempRegister, right);
}
+ Jump branch8(RelationalCondition cond, BaseIndex left, TrustedImm32 right)
+ {
+ ASSERT(!(0xffffff00 & right.m_value));
+ // use addressTempRegister incase the branch32 we call uses dataTempRegister. :-/
+ load8(left, addressTempRegister);
+ return branch32(cond, addressTempRegister, right);
+ }
+
Jump branchTest32(ResultCondition cond, RegisterID reg, RegisterID mask)
{
m_assembler.tst(reg, mask);
}
}
+ void load8(BaseIndex address, RegisterID dest)
+ {
+ if (address.offset >= -32768 && address.offset <= 32767
+ && !m_fixedWidth) {
+ /*
+ sll addrTemp, address.index, address.scale
+ addu addrTemp, addrTemp, address.base
+ lbu dest, address.offset(addrTemp)
+ */
+ m_assembler.sll(addrTempRegister, address.index, address.scale);
+ m_assembler.addu(addrTempRegister, addrTempRegister, address.base);
+ m_assembler.lbu(dest, addrTempRegister, address.offset);
+ } else {
+ /*
+ sll addrTemp, address.index, address.scale
+ addu addrTemp, addrTemp, address.base
+ lui immTemp, (address.offset + 0x8000) >> 16
+ addu addrTemp, addrTemp, immTemp
+ lbu dest, (address.offset & 0xffff)(at)
+ */
+ m_assembler.sll(addrTempRegister, address.index, address.scale);
+ m_assembler.addu(addrTempRegister, addrTempRegister, address.base);
+ m_assembler.lui(immTempRegister, (address.offset + 0x8000) >> 16);
+ m_assembler.addu(addrTempRegister, addrTempRegister,
+ immTempRegister);
+ m_assembler.lbu(dest, addrTempRegister, address.offset);
+ }
+ }
+
void load32(ImplicitAddress address, RegisterID dest)
{
if (address.offset >= -32768 && address.offset <= 32767
return branch32(cond, dataTempRegister, immTempRegister);
}
+ Jump branch8(RelationalCondition cond, BaseIndex left, TrustedImm32 right)
+ {
+ ASSERT(!(right.m_value & 0xFFFFFF00));
+ load8(left, dataTempRegister);
+ // Be careful that the previous load8() uses immTempRegister.
+ // So, we need to put move() after load8().
+ move(right, immTempRegister);
+ return branch32(cond, dataTempRegister, immTempRegister);
+ }
+
Jump branch32(RelationalCondition cond, RegisterID left, RegisterID right)
{
if (cond == Equal)
return branch32(cond, dataTempRegister, immTempRegister);
}
+ Jump branch16(RelationalCondition cond, RegisterID left, TrustedImm32 right)
+ {
+ // Make sure the immediate value is unsigned 16 bits.
+ ASSERT(!(right.m_value & 0xFFFF0000));
+ m_assembler.andi(immTempRegister, left, 0xffff);
+ return branch32(cond, immTempRegister, right);
+ }
+
Jump branch16(RelationalCondition cond, BaseIndex left, RegisterID right)
{
load16(left, dataTempRegister);
load8(address.base, address.offset, dest);
}
+ void load8(BaseIndex address, RegisterID dest)
+ {
+ RegisterID scr = claimScratch();
+ move(address.index, scr);
+ lshift32(TrustedImm32(address.scale), scr);
+ add32(address.base, scr);
+ load8(scr, address.offset, dest);
+ releaseScratch(scr);
+ }
+
void load32(BaseIndex address, RegisterID dest)
{
RegisterID scr = claimScratch();
return branchTrue();
}
+ Jump branch8(RelationalCondition cond, BaseIndex left, TrustedImm32 right)
+ {
+ ASSERT(!(right.m_value & 0xFFFFFF00));
+ RegisterID scr = claimScratch();
+
+ move(left.index, scr);
+ lshift32(TrustedImm32(left.scale), scr);
+
+ if (left.offset)
+ add32(TrustedImm32(left.offset), scr);
+ add32(left.base, scr);
+ load8(scr, scr);
+ extub(scr, scr);
+ RegisterID scr1 = claimScratch();
+ m_assembler.loadConstant(right.m_value, scr1);
+ releaseScratch(scr);
+ releaseScratch(scr1);
+
+ return branch32(cond, scr, scr1);
+ }
+
+ Jump branch16(RelationalCondition cond, RegisterID left, TrustedImm32 right)
+ {
+ ASSERT(!(right.m_value & 0xFFFF0000));
+ RegisterID scr = claimScratch();
+
+ extuw(left, scr);
+ if (((cond == Equal) || (cond == NotEqual)) && !right.m_value)
+ m_assembler.testlRegReg(scr, scr);
+ else
+ compare32(right.m_value, scr, cond);
+
+ releaseScratch(scr);
+
+ if (cond == NotEqual)
+ return branchFalse();
+ return branchTrue();
+ }
+
Jump branch16(RelationalCondition cond, BaseIndex left, RegisterID right)
{
RegisterID scr = claimScratch();
return DataLabelCompact(this);
}
+ void load8(BaseIndex address, RegisterID dest)
+ {
+ m_assembler.movzbl_mr(address.offset, address.base, address.index, address.scale, dest);
+ }
+
void load16(BaseIndex address, RegisterID dest)
{
m_assembler.movzwl_mr(address.offset, address.base, address.index, address.scale, dest);
return branch32(cond, left, right);
}
+ Jump branch16(RelationalCondition cond, RegisterID left, TrustedImm32 right)
+ {
+ if (((cond == Equal) || (cond == NotEqual)) && !right.m_value)
+ m_assembler.testw_rr(left, left);
+ else
+ m_assembler.cmpw_ir(right.m_value, left);
+ return Jump(m_assembler.jCC(x86Condition(cond)));
+ }
+
Jump branch16(RelationalCondition cond, BaseIndex left, RegisterID right)
{
m_assembler.cmpw_rm(right, left.offset, left.base, left.index, left.scale);
return Jump(m_assembler.jCC(x86Condition(cond)));
}
+ Jump branch8(RelationalCondition cond, BaseIndex left, TrustedImm32 right)
+ {
+ ASSERT(!(right.m_value & 0xFFFFFF00));
+
+ m_assembler.cmpb_im(right.m_value, left.offset, left.base, left.index, left.scale);
+ return Jump(m_assembler.jCC(x86Condition(cond)));
+ }
+
Jump jump()
{
return Jump(m_assembler.jmp());
TST_OPCODE = 0x2008,
TSTIMM_OPCODE = 0xc800,
TSTB_OPCODE = 0xcc00,
+ EXTUB_OPCODE = 0x600c,
EXTUW_OPCODE = 0x600d,
XOR_OPCODE = 0x200a,
XORIMM_OPCODE = 0xca00,
oneShortOp(opc);
}
+ void extub(RegisterID src, RegisterID dst)
+ {
+ uint16_t opc = getOpcodeGroup1(EXTUB_OPCODE, dst, src);
+ oneShortOp(opc);
+ }
+
void extuw(RegisterID src, RegisterID dst)
{
uint16_t opc = getOpcodeGroup1(EXTUW_OPCODE, dst, src);
case MOVW_READ_R0RM_OPCODE:
format = " MOV.W @(R0, R%d), R%d\n";
break;
+ case EXTUB_OPCODE:
+ format = " EXTU.B R%d, R%d\n";
+ break;
case EXTUW_OPCODE:
format = " EXTU.W R%d, R%d\n";
break;
}
#endif
+ void cmpw_ir(int imm, RegisterID dst)
+ {
+ if (CAN_SIGN_EXTEND_8_32(imm)) {
+ m_formatter.prefix(PRE_OPERAND_SIZE);
+ m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_CMP, dst);
+ m_formatter.immediate8(imm);
+ } else {
+ m_formatter.prefix(PRE_OPERAND_SIZE);
+ m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_CMP, dst);
+ m_formatter.immediate16(imm);
+ }
+ }
+
void cmpw_rm(RegisterID src, int offset, RegisterID base, RegisterID index, int scale)
{
m_formatter.prefix(PRE_OPERAND_SIZE);
m_formatter.twoByteOp(OP2_MOVZX_GvEw, dst, base, index, scale, offset);
}
+ void movzbl_mr(int offset, RegisterID base, RegisterID index, int scale, RegisterID dst)
+ {
+ m_formatter.twoByteOp(OP2_MOVZX_GvEb, dst, base, index, scale, offset);
+ }
+
void movzbl_rr(RegisterID src, RegisterID dst)
{
// In 64-bit, this may cause an unnecessary REX to be planted (if the dst register
return globalData.regExpCache()->lookupOrCreate(patternString, flags);
}
-void RegExp::compile(JSGlobalData* globalData)
+void RegExp::compile(JSGlobalData* globalData, Yarr::YarrCharSize charSize)
{
- ASSERT(m_state == NotCompiled);
- m_representation = adoptPtr(new RegExpRepresentation);
Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
if (m_constructionError) {
ASSERT_NOT_REACHED();
m_state = ParseError;
return;
}
-
- globalData->regExpCache()->addToStrongCache(this);
-
ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
- m_state = ByteCode;
+ if (!m_representation) {
+ ASSERT(m_state == NotCompiled);
+ m_representation = adoptPtr(new RegExpRepresentation);
+ globalData->regExpCache()->addToStrongCache(this);
+ m_state = ByteCode;
+ }
#if ENABLE(YARR_JIT)
if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
- Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode);
+ Yarr::jitCompile(pattern, charSize, globalData, m_representation->m_regExpJITCode);
#if ENABLE(YARR_JIT_DEBUG)
if (!m_representation->m_regExpJITCode.isFallBack())
m_state = JITCode;
}
#endif
}
+#else
+ UNUSED_PARAM(charSize);
#endif
m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator);
}
+void RegExp::compileIfNecessary(JSGlobalData& globalData, Yarr::YarrCharSize charSize)
+{
+ // If the state is NotCompiled or ParseError, then there is no representation.
+ // If there is a representation, and the state must be either JITCode or ByteCode.
+ ASSERT(!!m_representation == (m_state == JITCode || m_state == ByteCode));
+
+ if (m_representation) {
+#if ENABLE(YARR_JIT)
+ if (m_state != JITCode)
+ return;
+ if ((charSize == Yarr::Char8) && (m_representation->m_regExpJITCode.has8BitCode()))
+ return;
+ if ((charSize == Yarr::Char16) && (m_representation->m_regExpJITCode.has16BitCode()))
+ return;
+#else
+ return;
+#endif
+ }
+
+ compile(&globalData, charSize);
+}
+
+
int RegExp::match(JSGlobalData& globalData, const UString& s, int startOffset, Vector<int, 32>* ovector)
{
if (startOffset < 0)
return -1;
if (m_state != ParseError) {
- compileIfNecessary(globalData);
+ compileIfNecessary(globalData, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
int offsetVectorSize = (m_numSubpatterns + 1) * 2;
int* offsetVector;
int result;
#if ENABLE(YARR_JIT)
if (m_state == JITCode) {
- result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
+ if (s.is8Bit())
+ result = Yarr::execute(m_representation->m_regExpJITCode, s.latin1().data(), startOffset, s.length(), offsetVector);
+ else
+ result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
#if ENABLE(YARR_JIT_DEBUG)
matchCompareWithInterpreter(s, startOffset, offsetVector, result);
#endif
} else
#endif
- result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector);
+ result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s, startOffset, s.length(), offsetVector);
ASSERT(result >= -1);
#if REGEXP_FUNC_TEST_DATA_GEN
for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
interpreterOffsetVector[j] = -1;
- interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector);
+ interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s, startOffset, s.length(), interpreterOffsetVector);
if (jitResult != interpreterResult)
differences++;
#include "ExecutableAllocator.h"
#include "Structure.h"
#include "RegExpKey.h"
+#include "yarr/Yarr.h"
#include <wtf/Forward.h>
#include <wtf/RefCounted.h>
NotCompiled
} m_state;
- void compile(JSGlobalData*);
- void compileIfNecessary(JSGlobalData& globalData)
- {
- if (m_representation)
- return;
- compile(&globalData);
- }
+ void compile(JSGlobalData*, Yarr::YarrCharSize);
+ void compileIfNecessary(JSGlobalData&, Yarr::YarrCharSize);
#if ENABLE(YARR_JIT_DEBUG)
void matchCompareWithInterpreter(const UString&, int startOffset, int* offsetVector, int jitResult);
return m_impl->characters();
}
+ bool is8Bit() const { return false; }
+
CString ascii() const;
CString latin1() const;
CString utf8(bool strict = false) const;
return new (allocateCell<GlobalObject>(globalData.heap)) GlobalObject(globalData, structure, arguments);
}
virtual UString className() const { return "global"; }
+
+protected:
+ void finishCreation(JSGlobalData& globalData, const Vector<UString>& arguments)
+ {
+ Base::finishCreation(globalData, this);
+ UNUSED_PARAM(arguments);
+ }
};
COMPILE_ASSERT(!IsInteger<GlobalObject>::value, WTF_IsInteger_GlobalObject_false);
GlobalObject::GlobalObject(JSGlobalData& globalData, Structure* structure, const Vector<UString>& arguments)
: JSGlobalObject(globalData, structure)
{
- UNUSED_PARAM(arguments);
+ finishCreation(globalData, arguments);
}
// Use SEH for Release builds only to get rid of the crash report dialog
JSRegExpErrorInternal = -4
};
+enum YarrCharSize {
+ Char8,
+ Char16
+};
+
PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
-int interpret(BytecodePattern*, const UChar* input, unsigned start, unsigned length, int* output);
+int interpret(BytecodePattern*, const UString& input, unsigned start, unsigned length, int* output);
} } // namespace JSC::Yarr
#include "config.h"
#include "YarrInterpreter.h"
+#include "UString.h"
#include "Yarr.h"
#include <wtf/BumpPointerAllocator.h>
+#include <wtf/text/CString.h>
#ifndef NDEBUG
#include <stdio.h>
allocatorPool = allocatorPool->dealloc(context);
}
+ // This class is a placeholder for future character iterator, current
+ // proposed name StringConstCharacterIterator.
+ class CharAccess {
+ public:
+ CharAccess(const UString& s)
+ : m_buffer(0)
+ {
+ if (s.is8Bit()) {
+ m_charSize = Char8;
+ unsigned length = s.length();
+ m_ptr.ptr8 = m_buffer = static_cast<char *>(fastMalloc(length));
+ memcpy(m_buffer, s.latin1().data(), length);
+ } else {
+ m_charSize = Char16;
+ m_ptr.ptr16 = s.characters();
+ }
+ }
+
+ CharAccess(const char* ptr)
+ : m_charSize(Char8)
+ , m_buffer(0)
+ {
+ m_ptr.ptr8 = ptr;
+ }
+
+ CharAccess(const UChar* ptr)
+ : m_charSize(Char16)
+ , m_buffer(0)
+ {
+ m_ptr.ptr16 = ptr;
+ }
+
+ ~CharAccess()
+ {
+ if (m_charSize == Char8)
+ fastFree(m_buffer);
+ }
+
+ inline UChar operator[](unsigned index)
+ {
+ if (m_charSize == Char8)
+ return m_ptr.ptr8[index];
+ return m_ptr.ptr16[index];
+ }
+
+ private:
+ union {
+ const char* ptr8;
+ const UChar* ptr16;
+ } m_ptr;
+ YarrCharSize m_charSize;
+ char* m_buffer;
+ };
+
class InputStream {
public:
- InputStream(const UChar* input, unsigned start, unsigned length)
+ InputStream(const UString& input, unsigned start, unsigned length)
: input(input)
, pos(start)
, length(length)
}
private:
- const UChar* input;
+ CharAccess input;
unsigned pos;
unsigned length;
};
return output[0];
}
- Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length)
+ Interpreter(BytecodePattern* pattern, int* output, const UString input, unsigned start, unsigned length)
: pattern(pattern)
, output(output)
- , input(inputChar, start, length)
+ , input(input, start, length)
, allocatorPool(0)
, remainingMatchCount(matchLimit)
{
return ByteCompiler(pattern).compile(allocator);
}
-int interpret(BytecodePattern* bytecode, const UChar* input, unsigned start, unsigned length, int* output)
+int interpret(BytecodePattern* bytecode, const UString& input, unsigned start, unsigned length, int* output)
{
return Interpreter(bytecode, output, input, start, length).interpret();
}
return branch32(NotEqual, index, length);
}
- Jump jumpIfCharEquals(UChar ch, int inputPosition)
- {
- return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
- }
-
Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
{
+ if (m_charSize == Char8)
+ return branch8(NotEqual, BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), Imm32(ch));
return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
}
void readCharacter(int inputPosition, RegisterID reg)
{
- load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
+ if (m_charSize == Char8)
+ load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg);
+ else
+ load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
}
void storeToFrame(RegisterID reg, unsigned frameLocation)
PatternTerm* term = op.m_term;
UChar ch = term->patternCharacter;
+ if ((ch > 0xff) && (m_charSize == Char8)) {
+ // Have a 16 bit pattern character and an 8 bit string - short circuit
+ op.m_jumps.append(jump());
+ return;
+ }
+
const RegisterID character = regT0;
if (nextOp.m_op == OpTerm) {
UChar ch2 = nextTerm->patternCharacter;
+ int shiftAmount = m_charSize == Char8 ? 8 : 16;
int mask = 0;
- int chPair = ch | (ch2 << 16);
+ int chPair = ch | (ch2 << shiftAmount);
if (m_pattern.m_ignoreCase) {
if (isASCIIAlpha(ch))
mask |= 32;
if (isASCIIAlpha(ch2))
- mask |= 32 << 16;
+ mask |= 32 << shiftAmount;
}
- BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
- if (mask) {
- load32WithUnalignedHalfWords(address, character);
- or32(Imm32(mask), character);
- op.m_jumps.append(branch32(NotEqual, character, Imm32(chPair | mask)));
- } else
- op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, address, Imm32(chPair)));
-
+ if (m_charSize == Char8) {
+ BaseIndex address(input, index, TimesOne, (term->inputPosition - m_checked) * sizeof(char));
+ if (mask) {
+ load16(address, character);
+ or32(Imm32(mask), character);
+ op.m_jumps.append(branch16(NotEqual, character, Imm32(chPair | mask)));
+ } else
+ op.m_jumps.append(branch16(NotEqual, address, Imm32(chPair)));
+ } else {
+ BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
+ if (mask) {
+ load32WithUnalignedHalfWords(address, character);
+ or32(Imm32(mask), character);
+ op.m_jumps.append(branch32(NotEqual, character, Imm32(chPair | mask)));
+ } else
+ op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, address, Imm32(chPair)));
+ }
nextOp.m_isDeadCode = true;
return;
}
sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
Label loop(this);
- BaseIndex address(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet());
+ BaseIndex address(input, countRegister, m_charScale, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).unsafeGet());
if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
- load16(address, character);
+ if (m_charSize == Char8)
+ load8(address, character);
+ else
+ load16(address, character);
or32(TrustedImm32(32), character);
op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
} else {
ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- op.m_jumps.append(branch16(NotEqual, address, Imm32(ch)));
+ if (m_charSize == Char8)
+ op.m_jumps.append(branch8(NotEqual, address, Imm32(ch)));
+ else
+ op.m_jumps.append(branch16(NotEqual, address, Imm32(ch)));
}
add32(TrustedImm32(1), countRegister);
branch32(NotEqual, countRegister, index).linkTo(loop, this);
move(TrustedImm32(0), countRegister);
- JumpList failures;
- Label loop(this);
- failures.append(atEndOfInput());
- if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
- readCharacter(term->inputPosition - m_checked, character);
- or32(TrustedImm32(32), character);
- failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
+ if ((ch > 0xff) && (m_charSize == Char8)) {
+ // Have a 16 bit pattern character and an 8 bit string - short circuit
+ op.m_jumps.append(jump());
} else {
- ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
- }
+ JumpList failures;
+ Label loop(this);
+ failures.append(atEndOfInput());
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ readCharacter(term->inputPosition - m_checked, character);
+ or32(TrustedImm32(32), character);
+ failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
+ } else {
+ ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
+ failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
+ }
- add32(TrustedImm32(1), countRegister);
- add32(TrustedImm32(1), index);
- if (term->quantityCount == quantifyInfinite)
- jump(loop);
- else
- branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
+ add32(TrustedImm32(1), countRegister);
+ add32(TrustedImm32(1), index);
+ if (term->quantityCount == quantifyInfinite)
+ jump(loop);
+ else
+ branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
- failures.link(this);
+ failures.link(this);
+ }
op.m_reentry = label();
storeToFrame(countRegister, term->frameLocation);
loadFromFrame(term->frameLocation, countRegister);
- nonGreedyFailures.append(atEndOfInput());
- if (term->quantityCount != quantifyInfinite)
- nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
- if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
- readCharacter(term->inputPosition - m_checked, character);
- or32(TrustedImm32(32), character);
- nonGreedyFailures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
+ if ((ch > 0xff) && (m_charSize == Char8)) {
+ // Have a 16 bit pattern character and an 8 bit string - short circuit
+ nonGreedyFailures.append(jump());
} else {
- ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
- }
-
- add32(TrustedImm32(1), countRegister);
- add32(TrustedImm32(1), index);
+ nonGreedyFailures.append(atEndOfInput());
+ if (term->quantityCount != quantifyInfinite)
+ nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ readCharacter(term->inputPosition - m_checked, character);
+ or32(TrustedImm32(32), character);
+ nonGreedyFailures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
+ } else {
+ ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
+ nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
+ }
- jump(op.m_reentry);
+ add32(TrustedImm32(1), countRegister);
+ add32(TrustedImm32(1), index);
+ jump(op.m_reentry);
+ }
nonGreedyFailures.link(this);
+
sub32(countRegister, index);
m_backtrackingState.fallthrough();
}
Label loop(this);
JumpList matchDest;
- load16(BaseIndex(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character);
+ if (m_charSize == Char8)
+ load8(BaseIndex(input, countRegister, TimesOne, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(char))).unsafeGet()), character);
+ else
+ load16(BaseIndex(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character);
matchCharacterClass(character, matchDest, term->characterClass);
if (term->invert())
saveStartIndex.append(branchTest32(Zero, matchPos));
Label findBOLLoop(this);
sub32(TrustedImm32(1), matchPos);
- load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
+ if (m_charSize == Char8)
+ load8(BaseIndex(input, matchPos, TimesOne, 0), character);
+ else
+ load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this);
saveStartIndex.append(jump());
Label findEOLLoop(this);
foundEndingNewLine.append(branch32(Equal, matchPos, length));
- load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
+ if (m_charSize == Char8)
+ load8(BaseIndex(input, matchPos, TimesOne, 0), character);
+ else
+ load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
add32(TrustedImm32(1), matchPos);
jump(findEOLLoop);
}
public:
- YarrGenerator(YarrPattern& pattern)
+ YarrGenerator(YarrPattern& pattern, YarrCharSize charSize)
: m_pattern(pattern)
+ , m_charSize(charSize)
+ , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo)
, m_shouldFallBack(false)
, m_checked(0)
{
// Link & finalize the code.
LinkBuffer linkBuffer(*globalData, this);
m_backtrackingState.linkDataLabels(linkBuffer);
- jitObject.set(linkBuffer.finalizeCode());
+ if (m_charSize == Char8)
+ jitObject.set8BitCode(linkBuffer.finalizeCode());
+ else
+ jitObject.set16BitCode(linkBuffer.finalizeCode());
jitObject.setFallBack(m_shouldFallBack);
}
private:
YarrPattern& m_pattern;
+ YarrCharSize m_charSize;
+
+ Scale m_charScale;
+
// Used to detect regular expression constructs that are not currently
// supported in the JIT; fall back to the interpreter when this is detected.
bool m_shouldFallBack;
BacktrackingState m_backtrackingState;
};
-void jitCompile(YarrPattern& pattern, JSGlobalData* globalData, YarrCodeBlock& jitObject)
+void jitCompile(YarrPattern& pattern, YarrCharSize charSize, JSGlobalData* globalData, YarrCodeBlock& jitObject)
{
- YarrGenerator(pattern).compile(globalData, jitObject);
+ YarrGenerator(pattern, charSize).compile(globalData, jitObject);
+}
+
+int execute(YarrCodeBlock& jitObject, const char* input, unsigned start, unsigned length, int* output)
+{
+ return jitObject.execute(input, start, length, output);
}
int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output)
#include "JSGlobalData.h"
#include "MacroAssembler.h"
#include "UString.h"
+#include "Yarr.h"
#include "YarrPattern.h"
#if CPU(X86) && !COMPILER(MSVC)
namespace Yarr {
class YarrCodeBlock {
- typedef int (*YarrJITCode)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
+ typedef int (*YarrJITCode8)(const char* input, unsigned start, unsigned length, int* output) YARR_CALL;
+ typedef int (*YarrJITCode16)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
public:
YarrCodeBlock()
void setFallBack(bool fallback) { m_needFallBack = fallback; }
bool isFallBack() { return m_needFallBack; }
- void set(MacroAssembler::CodeRef ref) { m_ref = ref; }
+ bool has8BitCode() { return m_ref8.size(); }
+ bool has16BitCode() { return m_ref16.size(); }
+ void set8BitCode(MacroAssembler::CodeRef ref) { m_ref8 = ref; }
+ void set16BitCode(MacroAssembler::CodeRef ref) { m_ref16 = ref; }
- int execute(const UChar* input, unsigned start, unsigned length, int* output)
+ int execute(const char* input, unsigned start, unsigned length, int* output)
{
- return reinterpret_cast<YarrJITCode>(m_ref.code().executableAddress())(input, start, length, output);
+ ASSERT(has8BitCode());
+ return reinterpret_cast<YarrJITCode8>(m_ref8.code().executableAddress())(input, start, length, output);
}
+ int execute(const UChar* input, unsigned start, unsigned length, int* output)
+ {
+ ASSERT(has16BitCode());
+ return reinterpret_cast<YarrJITCode16>(m_ref16.code().executableAddress())(input, start, length, output);
+ }
#if ENABLE(REGEXP_TRACING)
void *getAddr() { return m_ref.code().executableAddress(); }
#endif
private:
- MacroAssembler::CodeRef m_ref;
+ MacroAssembler::CodeRef m_ref8;
+ MacroAssembler::CodeRef m_ref16;
bool m_needFallBack;
};
-void jitCompile(YarrPattern&, JSGlobalData*, YarrCodeBlock& jitObject);
+void jitCompile(YarrPattern&, YarrCharSize, JSGlobalData*, YarrCodeBlock& jitObject);
int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output);
+int execute(YarrCodeBlock& jitObject, const char* input, unsigned start, unsigned length, int* output);
} } // namespace JSC::Yarr
: m_delegate(delegate)
, m_backReferenceLimit(backReferenceLimit)
, m_err(NoError)
- , m_data(pattern.characters())
+ , m_data(pattern)
, m_size(pattern.length())
, m_index(0)
, m_parenthesesNestingDepth(0)
Delegate& m_delegate;
unsigned m_backReferenceLimit;
ErrorCode m_err;
- const UChar* m_data;
+ const UString& m_data;
unsigned m_size;
unsigned m_index;
unsigned m_parenthesesNestingDepth;
+2011-09-12 Michael Saboff <msaboff@apple.com>
+
+ Update RegExp and related classes to use 8 bit strings when available
+ https://bugs.webkit.org/show_bug.cgi?id=67337
+
+ Updated call to match to use UString& instead of UChar*.
+
+ Reviewed by Gavin Barraclough.
+
+ No new tests, Covered by existing tests.
+
+ * platform/text/RegularExpression.cpp:
+ (WebCore::RegularExpression::match):
+
2011-09-12 Beth Dakin <bdakin@apple.com>
https://bugs.webkit.org/show_bug.cgi?id=67898
for (unsigned j = 0, i = 0; i < d->m_numSubpatterns + 1; j += 2, i++)
offsetVector[j] = -1;
- int result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), str.characters(), startFrom, str.length(), offsetVector);
+ int result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), JSC::UString(str.impl()), startFrom, str.length(), offsetVector);
ASSERT(result >= -1);
if (result < 0) {