/* * Copyright (C) 2009 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "YarrJIT.h" #include "ASCIICType.h" #include "LinkBuffer.h" #include "Yarr.h" #if ENABLE(YARR_JIT) using namespace WTF; namespace JSC { namespace Yarr { class YarrGenerator : private MacroAssembler { friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); #if CPU(ARM) static const RegisterID input = ARMRegisters::r0; static const RegisterID index = ARMRegisters::r1; static const RegisterID length = ARMRegisters::r2; static const RegisterID output = ARMRegisters::r4; static const RegisterID regT0 = ARMRegisters::r5; static const RegisterID regT1 = ARMRegisters::r6; static const RegisterID returnRegister = ARMRegisters::r0; #elif CPU(MIPS) static const RegisterID input = MIPSRegisters::a0; static const RegisterID index = MIPSRegisters::a1; static const RegisterID length = MIPSRegisters::a2; static const RegisterID output = MIPSRegisters::a3; static const RegisterID regT0 = MIPSRegisters::t4; static const RegisterID regT1 = MIPSRegisters::t5; static const RegisterID returnRegister = MIPSRegisters::v0; #elif CPU(SH4) static const RegisterID input = SH4Registers::r4; static const RegisterID index = SH4Registers::r5; static const RegisterID length = SH4Registers::r6; static const RegisterID output = SH4Registers::r7; static const RegisterID regT0 = SH4Registers::r0; static const RegisterID regT1 = SH4Registers::r1; static const RegisterID returnRegister = SH4Registers::r0; #elif CPU(X86) static const RegisterID input = X86Registers::eax; static const RegisterID index = X86Registers::edx; static const RegisterID length = X86Registers::ecx; static const RegisterID output = X86Registers::edi; static const RegisterID regT0 = X86Registers::ebx; static const RegisterID regT1 = X86Registers::esi; static const RegisterID returnRegister = X86Registers::eax; #elif CPU(X86_64) static const RegisterID input = X86Registers::edi; static const RegisterID index = X86Registers::esi; static const RegisterID length = X86Registers::edx; static const RegisterID output = X86Registers::ecx; static const RegisterID regT0 = X86Registers::eax; static const RegisterID regT1 = X86Registers::ebx; static const RegisterID returnRegister = X86Registers::eax; #endif void optimizeAlternative(PatternAlternative* alternative) { if (!alternative->m_terms.size()) return; for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) { PatternTerm& term = alternative->m_terms[i]; PatternTerm& nextTerm = alternative->m_terms[i + 1]; if ((term.type == PatternTerm::TypeCharacterClass) && (term.quantityType == QuantifierFixedCount) && (nextTerm.type == PatternTerm::TypePatternCharacter) && (nextTerm.quantityType == QuantifierFixedCount)) { PatternTerm termCopy = term; alternative->m_terms[i] = nextTerm; alternative->m_terms[i + 1] = termCopy; } } } void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) { do { // pick which range we're going to generate int which = count >> 1; char lo = ranges[which].begin; char hi = ranges[which].end; // check if there are any ranges or matches below lo. If not, just jl to failure - // if there is anything else to check, check that first, if it falls through jmp to failure. if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); // generate code for all ranges before this one if (which) matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); ++*matchIndex; } failures.append(jump()); loOrAbove.link(this); } else if (which) { Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); failures.append(jump()); loOrAbove.link(this); } else failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) ++*matchIndex; matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); // fall through to here, the value is above hi. // shuffle along & loop around if there are any more matches to handle. unsigned next = which + 1; ranges += next; count -= next; } while (count); } void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass) { if (charClass->m_table) { ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table)); matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry)); return; } Jump unicodeFail; if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) { Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f)); if (charClass->m_matchesUnicode.size()) { for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) { UChar ch = charClass->m_matchesUnicode[i]; matchDest.append(branch32(Equal, character, Imm32(ch))); } } if (charClass->m_rangesUnicode.size()) { for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) { UChar lo = charClass->m_rangesUnicode[i].begin; UChar hi = charClass->m_rangesUnicode[i].end; Jump below = branch32(LessThan, character, Imm32(lo)); matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); below.link(this); } } unicodeFail = jump(); isAscii.link(this); } if (charClass->m_ranges.size()) { unsigned matchIndex = 0; JumpList failures; matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size()); while (matchIndex < charClass->m_matches.size()) matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++]))); failures.link(this); } else if (charClass->m_matches.size()) { // optimization: gather 'a','A' etc back together, can mask & test once. Vector<char> matchesAZaz; for (unsigned i = 0; i < charClass->m_matches.size(); ++i) { char ch = charClass->m_matches[i]; if (m_pattern.m_ignoreCase) { if (isASCIILower(ch)) { matchesAZaz.append(ch); continue; } if (isASCIIUpper(ch)) continue; } matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); } if (unsigned countAZaz = matchesAZaz.size()) { or32(TrustedImm32(32), character); for (unsigned i = 0; i < countAZaz; ++i) matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i]))); } } if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) unicodeFail.link(this); } // Jumps if input not available; will have (incorrectly) incremented already! Jump jumpIfNoAvailableInput(unsigned countToCheck) { add32(Imm32(countToCheck), index); return branch32(Above, index, length); } Jump jumpIfAvailableInput(unsigned countToCheck) { add32(Imm32(countToCheck), index); return branch32(BelowOrEqual, index, length); } Jump checkInput() { return branch32(BelowOrEqual, index, length); } Jump atEndOfInput() { return branch32(Equal, index, length); } Jump notAtEndOfInput() { return branch32(NotEqual, index, length); } Jump jumpIfCharEquals(UChar ch, int inputPosition) { return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch)); } Jump jumpIfCharNotEquals(UChar ch, int inputPosition) { return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch)); } void readCharacter(int inputPosition, RegisterID reg) { load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg); } void storeToFrame(RegisterID reg, unsigned frameLocation) { poke(reg, frameLocation); } void storeToFrame(TrustedImm32 imm, unsigned frameLocation) { poke(imm, frameLocation); } DataLabelPtr storeToFrameWithPatch(unsigned frameLocation) { return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*))); } void loadFromFrame(unsigned frameLocation, RegisterID reg) { peek(reg, frameLocation); } void loadFromFrameAndJump(unsigned frameLocation) { jump(Address(stackPointerRegister, frameLocation * sizeof(void*))); } struct IndirectJumpEntry { IndirectJumpEntry(int32_t stackOffset) : m_stackOffset(stackOffset) { } IndirectJumpEntry(int32_t stackOffset, Jump jump) : m_stackOffset(stackOffset) { addJump(jump); } IndirectJumpEntry(int32_t stackOffset, DataLabelPtr dataLabel) : m_stackOffset(stackOffset) { addDataLabel(dataLabel); } void addJump(Jump jump) { m_relJumps.append(jump); } void addDataLabel(DataLabelPtr dataLabel) { m_dataLabelPtrVector.append(dataLabel); } int32_t m_stackOffset; JumpList m_relJumps; Vector<DataLabelPtr, 16> m_dataLabelPtrVector; }; struct AlternativeBacktrackRecord { DataLabelPtr dataLabel; Label backtrackLocation; AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation) : dataLabel(dataLabel) , backtrackLocation(backtrackLocation) { } }; struct ParenthesesTail; struct TermGenerationState; struct GenerationState { typedef HashMap<int, IndirectJumpEntry*, WTF::IntHash<uint32_t>, UnsignedWithZeroKeyHashTraits<uint32_t> > IndirectJumpHashMap; GenerationState() : m_parenNestingLevel(0) { } void addIndirectJumpEntry(int32_t stackOffset, Jump jump) { IndirectJumpHashMap::iterator result = m_indirectJumpMap.find(stackOffset); ASSERT(stackOffset >= 0); uint32_t offset = static_cast<uint32_t>(stackOffset); if (result == m_indirectJumpMap.end()) m_indirectJumpMap.add(offset, new IndirectJumpEntry(stackOffset, jump)); else result->second->addJump(jump); } void addIndirectJumpEntry(int32_t stackOffset, JumpList jumps) { JumpList::JumpVector jumpVector = jumps.jumps(); size_t size = jumpVector.size(); for (size_t i = 0; i < size; ++i) addIndirectJumpEntry(stackOffset, jumpVector[i]); jumps.empty(); } void addIndirectJumpEntry(int32_t stackOffset, DataLabelPtr dataLabel) { IndirectJumpHashMap::iterator result = m_indirectJumpMap.find(stackOffset); ASSERT(stackOffset >= 0); uint32_t offset = static_cast<uint32_t>(stackOffset); if (result == m_indirectJumpMap.end()) m_indirectJumpMap.add(offset, new IndirectJumpEntry(stackOffset, dataLabel)); else result->second->addDataLabel(dataLabel); } void emitIndirectJumpTable(MacroAssembler* masm) { for (IndirectJumpHashMap::iterator iter = m_indirectJumpMap.begin(); iter != m_indirectJumpMap.end(); ++iter) { IndirectJumpEntry* indJumpEntry = iter->second; size_t size = indJumpEntry->m_dataLabelPtrVector.size(); if (size) { // Link any associated DataLabelPtr's with indirect jump via label Label hereLabel = masm->label(); for (size_t i = 0; i < size; ++i) m_backtrackRecords.append(AlternativeBacktrackRecord(indJumpEntry->m_dataLabelPtrVector[i], hereLabel)); } indJumpEntry->m_relJumps.link(masm); masm->jump(Address(stackPointerRegister, indJumpEntry->m_stackOffset)); delete indJumpEntry; } } void incrementParenNestingLevel() { ++m_parenNestingLevel; } void decrementParenNestingLevel() { --m_parenNestingLevel; } ParenthesesTail* addParenthesesTail(PatternTerm& term, JumpList* jumpListToPriorParen) { ParenthesesTail* parenthesesTail = new ParenthesesTail(term, m_parenNestingLevel, jumpListToPriorParen); m_parenTails.append(parenthesesTail); m_parenTailsForIteration.append(parenthesesTail); return parenthesesTail; } void emitParenthesesTail(YarrGenerator* generator) { unsigned vectorSize = m_parenTails.size(); bool priorBacktrackFallThrough = false; // Emit in reverse order so parentTail N can fall through to N-1 for (unsigned index = vectorSize; index > 0; --index) { JumpList jumpsToNext; priorBacktrackFallThrough = m_parenTails[index-1].get()->generateCode(generator, jumpsToNext, priorBacktrackFallThrough, index > 1); if (index > 1) jumpsToNext.linkTo(generator->label(), generator); else addJumpsToNextInteration(jumpsToNext); } m_parenTails.clear(); } void addJumpToNextInteration(Jump jump) { m_jumpsToNextInteration.append(jump); } void addJumpsToNextInteration(JumpList jumps) { m_jumpsToNextInteration.append(jumps); } void addDataLabelToNextIteration(DataLabelPtr dataLabel) { m_dataPtrsToNextIteration.append(dataLabel); } void linkToNextIteration(Label label) { m_nextIteration = label; for (unsigned i = 0; i < m_dataPtrsToNextIteration.size(); ++i) m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataPtrsToNextIteration[i], m_nextIteration)); m_dataPtrsToNextIteration.clear(); for (unsigned i = 0; i < m_parenTailsForIteration.size(); ++i) m_parenTailsForIteration[i]->setNextIteration(m_nextIteration); m_parenTailsForIteration.clear(); } void linkToNextIteration(YarrGenerator* generator) { m_jumpsToNextInteration.linkTo(m_nextIteration, generator); } int m_parenNestingLevel; Vector<AlternativeBacktrackRecord> m_backtrackRecords; IndirectJumpHashMap m_indirectJumpMap; Label m_nextIteration; Vector<OwnPtr<ParenthesesTail> > m_parenTails; JumpList m_jumpsToNextInteration; Vector<DataLabelPtr> m_dataPtrsToNextIteration; Vector<ParenthesesTail*> m_parenTailsForIteration; }; struct BacktrackDestination { typedef enum { NoBacktrack, BacktrackLabel, BacktrackStackOffset, BacktrackJumpList, BacktrackLinked } BacktrackType; BacktrackDestination() : m_backtrackType(NoBacktrack) , m_backtrackToLabel(0) , m_subDataLabelPtr(0) , m_nextBacktrack(0) , m_backtrackSourceLabel(0) , m_backtrackSourceJumps(0) { } BacktrackDestination(int32_t stackOffset) : m_backtrackType(BacktrackStackOffset) , m_backtrackStackOffset(stackOffset) , m_backtrackToLabel(0) , m_subDataLabelPtr(0) , m_nextBacktrack(0) , m_backtrackSourceLabel(0) , m_backtrackSourceJumps(0) { } BacktrackDestination(Label label) : m_backtrackType(BacktrackLabel) , m_backtrackLabel(label) , m_backtrackToLabel(0) , m_subDataLabelPtr(0) , m_nextBacktrack(0) , m_backtrackSourceLabel(0) , m_backtrackSourceJumps(0) { } void clear(bool doDataLabelClear = true) { m_backtrackType = NoBacktrack; if (doDataLabelClear) clearDataLabel(); m_nextBacktrack = 0; } void clearDataLabel() { m_dataLabelPtr = DataLabelPtr(); } bool hasDestination() { return (m_backtrackType != NoBacktrack); } bool isStackOffset() { return (m_backtrackType == BacktrackStackOffset); } bool isLabel() { return (m_backtrackType == BacktrackLabel); } bool isJumpList() { return (m_backtrackType == BacktrackJumpList); } bool hasDataLabel() { return m_dataLabelPtr.isSet(); } void copyTarget(BacktrackDestination& rhs, bool copyDataLabel = true) { m_backtrackType = rhs.m_backtrackType; if (m_backtrackType == BacktrackStackOffset) m_backtrackStackOffset = rhs.m_backtrackStackOffset; else if (m_backtrackType == BacktrackLabel) m_backtrackLabel = rhs.m_backtrackLabel; if (copyDataLabel) m_dataLabelPtr = rhs.m_dataLabelPtr; m_backtrackSourceJumps = rhs.m_backtrackSourceJumps; m_backtrackSourceLabel = rhs.m_backtrackSourceLabel; } void copyTo(BacktrackDestination& lhs) { lhs.m_backtrackType = m_backtrackType; if (m_backtrackType == BacktrackStackOffset) lhs.m_backtrackStackOffset = m_backtrackStackOffset; else if (m_backtrackType == BacktrackLabel) lhs.m_backtrackLabel = m_backtrackLabel; lhs.m_backtrackSourceJumps = m_backtrackSourceJumps; lhs.m_backtrackSourceLabel = m_backtrackSourceLabel; lhs.m_dataLabelPtr = m_dataLabelPtr; lhs.m_backTrackJumps = m_backTrackJumps; } void addBacktrackJump(Jump jump) { m_backTrackJumps.append(jump); } void setStackOffset(int32_t stackOffset) { m_backtrackType = BacktrackStackOffset; m_backtrackStackOffset = stackOffset; } void setLabel(Label label) { m_backtrackType = BacktrackLabel; m_backtrackLabel = label; } void setNextBacktrackLabel(Label label) { if (m_nextBacktrack) m_nextBacktrack->setLabel(label); } void propagateBacktrackToLabel(const BacktrackDestination& rhs) { if (!m_backtrackToLabel && rhs.m_backtrackToLabel) m_backtrackToLabel = rhs.m_backtrackToLabel; } void setBacktrackToLabel(Label* backtrackToLabel) { if (!m_backtrackToLabel) m_backtrackToLabel = backtrackToLabel; } bool hasBacktrackToLabel() { return m_backtrackToLabel; } void setBacktrackJumpList(JumpList* jumpList) { m_backtrackType = BacktrackJumpList; m_backtrackSourceJumps = jumpList; } void setBacktrackSourceLabel(Label* backtrackSourceLabel) { m_backtrackSourceLabel = backtrackSourceLabel; } void setDataLabel(DataLabelPtr dp) { if (m_subDataLabelPtr) { *m_subDataLabelPtr = dp; m_subDataLabelPtr = 0; } else { ASSERT(!hasDataLabel()); m_dataLabelPtr = dp; } } void clearSubDataLabelPtr() { m_subDataLabelPtr = 0; } void setSubDataLabelPtr(DataLabelPtr* subDataLabelPtr) { m_subDataLabelPtr = subDataLabelPtr; } void linkToNextBacktrack(BacktrackDestination* nextBacktrack) { m_nextBacktrack = nextBacktrack; } int32_t getStackOffset() { ASSERT(m_backtrackType == BacktrackStackOffset); return m_backtrackStackOffset; } Label getLabel() { ASSERT(m_backtrackType == BacktrackLabel); return m_backtrackLabel; } JumpList& getBacktrackJumps() { return m_backTrackJumps; } DataLabelPtr& getDataLabel() { return m_dataLabelPtr; } void jumpToBacktrack(MacroAssembler* masm) { if (isJumpList()) { if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) masm->jump().linkTo(*m_backtrackSourceLabel, masm); else m_backtrackSourceJumps->append(masm->jump()); } else if (isStackOffset()) masm->jump(Address(stackPointerRegister, m_backtrackStackOffset)); else if (isLabel()) masm->jump().linkTo(m_backtrackLabel, masm); else m_backTrackJumps.append(masm->jump()); } void jumpToBacktrack(YarrGenerator* generator, Jump jump) { if (isJumpList()) { if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) jump.linkTo(*m_backtrackSourceLabel, generator); else m_backtrackSourceJumps->append(jump); } else if (isStackOffset()) generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jump); else if (isLabel()) jump.linkTo(getLabel(), generator); else m_backTrackJumps.append(jump); } void jumpToBacktrack(YarrGenerator* generator, JumpList& jumps) { if (isJumpList()) { if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) jumps.linkTo(*m_backtrackSourceLabel, generator); else m_backtrackSourceJumps->append(jumps); } else if (isStackOffset()) generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jumps); else if (isLabel()) jumps.linkTo(getLabel(), generator); else m_backTrackJumps.append(jumps); } bool plantJumpToBacktrackIfExists(YarrGenerator* generator) { if (isJumpList()) { if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) generator->jump(*m_backtrackSourceLabel); else m_backtrackSourceJumps->append(generator->jump()); return true; } if (isStackOffset()) { generator->jump(Address(stackPointerRegister, getStackOffset())); return true; } if (isLabel()) { generator->jump(getLabel()); if (hasDataLabel()) { generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), getLabel())); clearDataLabel(); } return true; } return false; } void linkBacktrackToLabel(Label backtrackLabel) { if (m_backtrackToLabel) *m_backtrackToLabel = backtrackLabel; } void linkAlternativeBacktracks(YarrGenerator* generator, bool nextIteration = false) { Label hereLabel = generator->label(); if (m_backtrackToLabel) { *m_backtrackToLabel = hereLabel; m_backtrackToLabel = 0; } m_backTrackJumps.link(generator); if (nextIteration) generator->m_expressionState.linkToNextIteration(hereLabel); if (hasDataLabel()) { generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), hereLabel)); // data label cleared as a result of the clear() below } clear(); } void linkAlternativeBacktracksTo(YarrGenerator* generator, Label label, bool nextIteration = false) { m_backTrackJumps.linkTo(label, generator); if (nextIteration) generator->m_expressionState.linkToNextIteration(label); if (hasDataLabel()) { generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), label)); clearDataLabel(); } } private: BacktrackType m_backtrackType; int32_t m_backtrackStackOffset; Label m_backtrackLabel; DataLabelPtr m_dataLabelPtr; Label* m_backtrackToLabel; DataLabelPtr* m_subDataLabelPtr; BacktrackDestination* m_nextBacktrack; Label* m_backtrackSourceLabel; JumpList* m_backtrackSourceJumps; JumpList m_backTrackJumps; }; struct TermGenerationState { TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal) : disjunction(disjunction) , checkedTotal(checkedTotal) , m_subParenNum(0) , m_linkedBacktrack(0) , m_jumpList(0) { } void resetAlternative() { m_backtrack.clear(); alt = 0; } bool alternativeValid() { return alt < disjunction->m_alternatives.size(); } void nextAlternative() { ++alt; } PatternAlternative* alternative() { return disjunction->m_alternatives[alt]; } bool isLastAlternative() { return (alt + 1) == disjunction->m_alternatives.size(); } void resetTerm() { ASSERT(alternativeValid()); t = 0; m_subParenNum = 0; } bool termValid() { ASSERT(alternativeValid()); return t < alternative()->m_terms.size(); } void nextTerm() { ASSERT(alternativeValid()); ++t; } PatternTerm& term() { ASSERT(alternativeValid()); return alternative()->m_terms[t]; } bool isLastTerm() { ASSERT(alternativeValid()); return (t + 1) == alternative()->m_terms.size(); } unsigned getSubParenNum() { return m_subParenNum++; } bool isMainDisjunction() { return !disjunction->m_parent; } void setJumpListToPriorParen(JumpList* jumpList) { m_jumpList = jumpList; } JumpList* getJumpListToPriorParen() { return m_jumpList; } PatternTerm& lookaheadTerm() { ASSERT(alternativeValid()); ASSERT((t + 1) < alternative()->m_terms.size()); return alternative()->m_terms[t + 1]; } bool isSinglePatternCharacterLookaheadTerm() { ASSERT(alternativeValid()); return ((t + 1) < alternative()->m_terms.size()) && (lookaheadTerm().type == PatternTerm::TypePatternCharacter) && (lookaheadTerm().quantityType == QuantifierFixedCount) && (lookaheadTerm().quantityCount == 1); } int inputOffset() { return term().inputPosition - checkedTotal; } void clearBacktrack() { m_backtrack.clear(false); m_linkedBacktrack = 0; } void jumpToBacktrack(MacroAssembler* masm) { m_backtrack.jumpToBacktrack(masm); } void jumpToBacktrack(YarrGenerator* generator, Jump jump) { m_backtrack.jumpToBacktrack(generator, jump); } void jumpToBacktrack(YarrGenerator* generator, JumpList& jumps) { m_backtrack.jumpToBacktrack(generator, jumps); } bool plantJumpToBacktrackIfExists(YarrGenerator* generator) { return m_backtrack.plantJumpToBacktrackIfExists(generator); } void linkDataLabelToBacktrackIfExists(YarrGenerator* generator, DataLabelPtr dataLabel) { // If we have a stack offset backtrack destination, use it directly if (m_backtrack.isStackOffset()) { generator->m_expressionState.addIndirectJumpEntry(m_backtrack.getStackOffset(), dataLabel); m_backtrack.clearSubDataLabelPtr(); } else { // If we have a backtrack label, connect the datalabel to it directly. if (m_backtrack.isLabel()) generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, m_backtrack.getLabel())); else setBacktrackDataLabel(dataLabel); } } void addBacktrackJump(Jump jump) { m_backtrack.addBacktrackJump(jump); } void setBacktrackDataLabel(DataLabelPtr dp) { m_backtrack.setDataLabel(dp); } void setBackTrackStackOffset(int32_t stackOffset) { m_backtrack.setStackOffset(stackOffset); } void setBacktrackLabel(Label label) { m_backtrack.setLabel(label); } void linkAlternativeBacktracks(YarrGenerator* generator, bool nextIteration = false) { m_backtrack.linkAlternativeBacktracks(generator, nextIteration); m_linkedBacktrack = 0; } void linkAlternativeBacktracksTo(YarrGenerator* generator, Label label, bool nextIteration = false) { m_backtrack.linkAlternativeBacktracksTo(generator, label, nextIteration); } void setBacktrackLink(BacktrackDestination* linkedBacktrack) { m_linkedBacktrack = linkedBacktrack; } void chainBacktracks(BacktrackDestination* followonBacktrack) { if (m_linkedBacktrack) m_linkedBacktrack->linkToNextBacktrack(followonBacktrack); } BacktrackDestination& getBacktrackDestination() { return m_backtrack; } void propagateBacktrackingFrom(YarrGenerator* generator, BacktrackDestination& backtrack, bool doJump = true) { if (doJump) m_backtrack.jumpToBacktrack(generator, backtrack.getBacktrackJumps()); if (m_backtrack.isLabel() && backtrack.hasBacktrackToLabel()) backtrack.linkBacktrackToLabel(m_backtrack.getLabel()); if (backtrack.hasDestination()) { if (m_backtrack.hasDataLabel()) generator->m_expressionState.addDataLabelToNextIteration(m_backtrack.getDataLabel()); m_backtrack.copyTarget(backtrack, doJump); } } PatternDisjunction* disjunction; int checkedTotal; private: unsigned alt; unsigned t; unsigned m_subParenNum; BacktrackDestination m_backtrack; BacktrackDestination* m_linkedBacktrack; JumpList* m_jumpList; }; struct ParenthesesTail { ParenthesesTail(PatternTerm& term, int nestingLevel, JumpList* jumpListToPriorParen) : m_term(term) , m_nestingLevel(nestingLevel) , m_subParenIndex(0) , m_jumpListToPriorParen(jumpListToPriorParen) { } void processBacktracks(YarrGenerator* generator, TermGenerationState& state, TermGenerationState& parenthesesState, Label nonGreedyTryParentheses, Label fallThrough) { m_nonGreedyTryParentheses = nonGreedyTryParentheses; m_fallThrough = fallThrough; m_subParenIndex = state.getSubParenNum(); parenthesesState.getBacktrackDestination().copyTo(m_parenBacktrack); state.chainBacktracks(&m_backtrack); BacktrackDestination& stateBacktrack = state.getBacktrackDestination(); stateBacktrack.copyTo(m_backtrack); stateBacktrack.setBacktrackToLabel(&m_backtrackToLabel); state.setBacktrackLink(&m_backtrack); stateBacktrack.setSubDataLabelPtr(&m_dataAfterLabelPtr); m_doDirectBacktrack = m_parenBacktrack.hasDestination(); if ((m_term.quantityType == QuantifierGreedy) || (m_term.quantityType == QuantifierNonGreedy)) m_doDirectBacktrack = false; if (m_doDirectBacktrack) state.propagateBacktrackingFrom(generator, m_parenBacktrack, false); else { stateBacktrack.setBacktrackJumpList(&m_afterBacktrackJumps); stateBacktrack.setBacktrackSourceLabel(&m_backtrackFromAfterParens); } } void setNextIteration(Label nextIteration) { if (!m_nestingLevel && !m_backtrackToLabel.isSet()) m_backtrackToLabel = nextIteration; } void addAfterParenJump(Jump jump) { m_afterBacktrackJumps.append(jump); } bool generateCode(YarrGenerator* generator, JumpList& jumpsToNext, bool priorBackTrackFallThrough, bool nextBacktrackFallThrough) { const RegisterID indexTemporary = regT0; unsigned parenthesesFrameLocation = m_term.frameLocation; Jump fromPriorBacktrack; bool needJumpForPriorParenTail = false; if (priorBackTrackFallThrough && ((m_term.quantityType == QuantifierGreedy) || (m_term.quantityType == QuantifierNonGreedy) || (!m_doDirectBacktrack && m_parenBacktrack.hasDestination()))) { // If the prior paren tail code assumed that it could fall through, // but we need to generate after paren backtrack code, then provide // a jump around that code for the prior paren tail code. // A regular expressing like ((xxx)...)? needs this. fromPriorBacktrack = generator->jump(); needJumpForPriorParenTail = true; } if (!m_backtrack.hasDestination()) { if (m_backtrackToLabel.isSet()) { m_backtrack.setLabel(m_backtrackToLabel); nextBacktrackFallThrough = false; } else if (m_jumpListToPriorParen) { // If we don't have a destination, go back to either the prior paren or the next outer paren. m_backtrack.setBacktrackJumpList(m_jumpListToPriorParen); nextBacktrackFallThrough = false; } else m_backtrack.setBacktrackJumpList(&jumpsToNext); } else nextBacktrackFallThrough = false; // A failure AFTER the parens jumps here - Backtrack to this paren m_backtrackFromAfterParens = generator->label(); if (m_dataAfterLabelPtr.isSet()) generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataAfterLabelPtr, m_backtrackFromAfterParens)); m_afterBacktrackJumps.link(generator); if (m_term.quantityType == QuantifierGreedy) { // If this is -1 we have now tested with both with and without the parens. generator->loadFromFrame(parenthesesFrameLocation, indexTemporary); m_backtrack.jumpToBacktrack(generator, generator->branch32(Equal, indexTemporary, TrustedImm32(-1))); } else if (m_term.quantityType == QuantifierNonGreedy) { // If this is -1 we have now tested with both with and without the parens. generator->loadFromFrame(parenthesesFrameLocation, indexTemporary); generator->branch32(Equal, indexTemporary, TrustedImm32(-1)).linkTo(m_nonGreedyTryParentheses, generator); } if (!m_doDirectBacktrack) m_parenBacktrack.plantJumpToBacktrackIfExists(generator); // A failure WITHIN the parens jumps here if (needJumpForPriorParenTail) fromPriorBacktrack.link(generator); m_parenBacktrack.linkAlternativeBacktracks(generator); m_withinBacktrackJumps.link(generator); if (m_term.capture()) generator->store32(TrustedImm32(-1), Address(output, (m_term.parentheses.subpatternId << 1) * sizeof(int))); if (m_term.quantityType == QuantifierGreedy) { generator->storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); generator->jump().linkTo(m_fallThrough, generator); nextBacktrackFallThrough = false; } else if (!nextBacktrackFallThrough) m_backtrack.jumpToBacktrack(generator); if (!m_doDirectBacktrack) m_backtrack.setNextBacktrackLabel(m_backtrackFromAfterParens); return nextBacktrackFallThrough; } PatternTerm& m_term; int m_nestingLevel; unsigned m_subParenIndex; JumpList* m_jumpListToPriorParen; Label m_nonGreedyTryParentheses; Label m_fallThrough; Label m_backtrackToLabel; Label m_backtrackFromAfterParens; DataLabelPtr m_dataAfterLabelPtr; JumpList m_withinBacktrackJumps; JumpList m_afterBacktrackJumps; BacktrackDestination m_parenBacktrack; BacktrackDestination m_backtrack; bool m_doDirectBacktrack; }; void generateAssertionBOL(TermGenerationState& state) { PatternTerm& term = state.term(); if (m_pattern.m_multiline) { const RegisterID character = regT0; JumpList matchDest; if (!term.inputPosition) matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal))); readCharacter(state.inputOffset() - 1, character); matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); state.jumpToBacktrack(this); matchDest.link(this); } else { // Erk, really should poison out these alternatives early. :-/ if (term.inputPosition) state.jumpToBacktrack(this); else state.jumpToBacktrack(this, branch32(NotEqual, index, Imm32(state.checkedTotal))); } } void generateAssertionEOL(TermGenerationState& state) { PatternTerm& term = state.term(); if (m_pattern.m_multiline) { const RegisterID character = regT0; JumpList matchDest; if (term.inputPosition == state.checkedTotal) matchDest.append(atEndOfInput()); readCharacter(state.inputOffset(), character); matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); state.jumpToBacktrack(this); matchDest.link(this); } else { if (term.inputPosition == state.checkedTotal) state.jumpToBacktrack(this, notAtEndOfInput()); // Erk, really should poison out these alternatives early. :-/ else state.jumpToBacktrack(this); } } // Also falls though on nextIsNotWordChar. void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar) { const RegisterID character = regT0; PatternTerm& term = state.term(); if (term.inputPosition == state.checkedTotal) nextIsNotWordChar.append(atEndOfInput()); readCharacter(state.inputOffset(), character); matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass()); } void generateAssertionWordBoundary(TermGenerationState& state) { const RegisterID character = regT0; PatternTerm& term = state.term(); Jump atBegin; JumpList matchDest; if (!term.inputPosition) atBegin = branch32(Equal, index, Imm32(state.checkedTotal)); readCharacter(state.inputOffset() - 1, character); matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass()); if (!term.inputPosition) atBegin.link(this); // We fall through to here if the last character was not a wordchar. JumpList nonWordCharThenWordChar; JumpList nonWordCharThenNonWordChar; if (term.invert()) { matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar); nonWordCharThenWordChar.append(jump()); } else { matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar); nonWordCharThenNonWordChar.append(jump()); } state.jumpToBacktrack(this, nonWordCharThenNonWordChar); // We jump here if the last character was a wordchar. matchDest.link(this); JumpList wordCharThenWordChar; JumpList wordCharThenNonWordChar; if (term.invert()) { matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar); wordCharThenWordChar.append(jump()); } else { matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar); // This can fall-though! } state.jumpToBacktrack(this, wordCharThenWordChar); nonWordCharThenWordChar.link(this); wordCharThenNonWordChar.link(this); } void generatePatternCharacterSingle(TermGenerationState& state) { const RegisterID character = regT0; UChar ch = state.term().patternCharacter; if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { readCharacter(state.inputOffset(), character); or32(TrustedImm32(32), character); state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); } else { ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); state.jumpToBacktrack(this, jumpIfCharNotEquals(ch, state.inputOffset())); } } void generatePatternCharacterPair(TermGenerationState& state) { const RegisterID character = regT0; UChar ch1 = state.term().patternCharacter; UChar ch2 = state.lookaheadTerm().patternCharacter; int mask = 0; int chPair = ch1 | (ch2 << 16); if (m_pattern.m_ignoreCase) { if (isASCIIAlpha(ch1)) mask |= 32; if (isASCIIAlpha(ch2)) mask |= 32 << 16; } if (mask) { load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character); or32(Imm32(mask), character); state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(chPair | mask))); } else state.jumpToBacktrack(this, branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair))); } void generatePatternCharacterFixed(TermGenerationState& state) { const RegisterID character = regT0; const RegisterID countRegister = regT1; PatternTerm& term = state.term(); UChar ch = term.patternCharacter; move(index, countRegister); sub32(Imm32(term.quantityCount), countRegister); Label loop(this); if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character); or32(TrustedImm32(32), character); state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); } else { ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); state.jumpToBacktrack(this, branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch))); } add32(TrustedImm32(1), countRegister); branch32(NotEqual, countRegister, index).linkTo(loop, this); } void generatePatternCharacterGreedy(TermGenerationState& state) { const RegisterID character = regT0; const RegisterID countRegister = regT1; PatternTerm& term = state.term(); UChar ch = term.patternCharacter; move(TrustedImm32(0), countRegister); JumpList failures; Label loop(this); failures.append(atEndOfInput()); if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { readCharacter(state.inputOffset(), character); or32(TrustedImm32(32), character); failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); } else { ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); failures.append(jumpIfCharNotEquals(ch, state.inputOffset())); } add32(TrustedImm32(1), countRegister); add32(TrustedImm32(1), index); if (term.quantityCount != quantifyInfinite) { branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); failures.append(jump()); } else jump(loop); Label backtrackBegin(this); loadFromFrame(term.frameLocation, countRegister); state.jumpToBacktrack(this, branchTest32(Zero, countRegister)); sub32(TrustedImm32(1), countRegister); sub32(TrustedImm32(1), index); failures.link(this); storeToFrame(countRegister, term.frameLocation); state.setBacktrackLabel(backtrackBegin); } void generatePatternCharacterNonGreedy(TermGenerationState& state) { const RegisterID character = regT0; const RegisterID countRegister = regT1; PatternTerm& term = state.term(); UChar ch = term.patternCharacter; move(TrustedImm32(0), countRegister); Jump firstTimeDoNothing = jump(); Label hardFail(this); sub32(countRegister, index); state.jumpToBacktrack(this); Label backtrackBegin(this); loadFromFrame(term.frameLocation, countRegister); atEndOfInput().linkTo(hardFail, this); if (term.quantityCount != quantifyInfinite) branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { readCharacter(state.inputOffset(), character); or32(TrustedImm32(32), character); branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this); } else { ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this); } add32(TrustedImm32(1), countRegister); add32(TrustedImm32(1), index); firstTimeDoNothing.link(this); storeToFrame(countRegister, term.frameLocation); state.setBacktrackLabel(backtrackBegin); } void generateCharacterClassSingle(TermGenerationState& state) { const RegisterID character = regT0; PatternTerm& term = state.term(); JumpList matchDest; readCharacter(state.inputOffset(), character); matchCharacterClass(character, matchDest, term.characterClass); if (term.invert()) state.jumpToBacktrack(this, matchDest); else { state.jumpToBacktrack(this); matchDest.link(this); } } void generateCharacterClassFixed(TermGenerationState& state) { const RegisterID character = regT0; const RegisterID countRegister = regT1; PatternTerm& term = state.term(); move(index, countRegister); sub32(Imm32(term.quantityCount), countRegister); Label loop(this); JumpList matchDest; load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character); matchCharacterClass(character, matchDest, term.characterClass); if (term.invert()) state.jumpToBacktrack(this, matchDest); else { state.jumpToBacktrack(this); matchDest.link(this); } add32(TrustedImm32(1), countRegister); branch32(NotEqual, countRegister, index).linkTo(loop, this); } void generateCharacterClassGreedy(TermGenerationState& state) { const RegisterID character = regT0; const RegisterID countRegister = regT1; PatternTerm& term = state.term(); move(TrustedImm32(0), countRegister); JumpList failures; Label loop(this); failures.append(atEndOfInput()); if (term.invert()) { readCharacter(state.inputOffset(), character); matchCharacterClass(character, failures, term.characterClass); } else { JumpList matchDest; readCharacter(state.inputOffset(), character); matchCharacterClass(character, matchDest, term.characterClass); failures.append(jump()); matchDest.link(this); } add32(TrustedImm32(1), countRegister); add32(TrustedImm32(1), index); if (term.quantityCount != quantifyInfinite) { branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); failures.append(jump()); } else jump(loop); Label backtrackBegin(this); loadFromFrame(term.frameLocation, countRegister); state.jumpToBacktrack(this, branchTest32(Zero, countRegister)); sub32(TrustedImm32(1), countRegister); sub32(TrustedImm32(1), index); failures.link(this); storeToFrame(countRegister, term.frameLocation); state.setBacktrackLabel(backtrackBegin); } void generateCharacterClassNonGreedy(TermGenerationState& state) { const RegisterID character = regT0; const RegisterID countRegister = regT1; PatternTerm& term = state.term(); move(TrustedImm32(0), countRegister); Jump firstTimeDoNothing = jump(); Label hardFail(this); sub32(countRegister, index); state.jumpToBacktrack(this); Label backtrackBegin(this); loadFromFrame(term.frameLocation, countRegister); atEndOfInput().linkTo(hardFail, this); branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); JumpList matchDest; readCharacter(state.inputOffset(), character); matchCharacterClass(character, matchDest, term.characterClass); if (term.invert()) matchDest.linkTo(hardFail, this); else { jump(hardFail); matchDest.link(this); } add32(TrustedImm32(1), countRegister); add32(TrustedImm32(1), index); firstTimeDoNothing.link(this); storeToFrame(countRegister, term.frameLocation); state.setBacktrackLabel(backtrackBegin); } void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation) { ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion)); ASSERT(parenthesesTerm.quantityCount == 1); PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction; unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0; if (disjunction->m_alternatives.size() == 1) { state.resetAlternative(); ASSERT(state.alternativeValid()); PatternAlternative* alternative = state.alternative(); optimizeAlternative(alternative); int countToCheck = alternative->m_minimumSize - preCheckedCount; if (countToCheck) { ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount)); // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists' // will be forced to always trampoline into here, just to decrement the index. // Ick. Jump skip = jump(); Label backtrackBegin(this); sub32(Imm32(countToCheck), index); state.addBacktrackJump(jump()); skip.link(this); state.setBacktrackLabel(backtrackBegin); state.jumpToBacktrack(this, jumpIfNoAvailableInput(countToCheck)); state.checkedTotal += countToCheck; } for (state.resetTerm(); state.termValid(); state.nextTerm()) generateTerm(state); state.checkedTotal -= countToCheck; } else { JumpList successes; bool propogateBacktrack = false; // Save current state's paren jump list for use with each alternative JumpList* outerJumpList = state.getJumpListToPriorParen(); for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative(), state.setJumpListToPriorParen(outerJumpList)) { PatternAlternative* alternative = state.alternative(); optimizeAlternative(alternative); ASSERT(alternative->m_minimumSize >= preCheckedCount); int countToCheck = alternative->m_minimumSize - preCheckedCount; if (countToCheck) { state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck)); state.checkedTotal += countToCheck; } for (state.resetTerm(); state.termValid(); state.nextTerm()) generateTerm(state); // Matched an alternative. DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation); if (!state.isLastAlternative() || countToCheck) successes.append(jump()); // Alternative did not match. // Do we have a backtrack destination? // if so, link the data label to it. state.linkDataLabelToBacktrackIfExists(this, dataLabel); if (!state.isLastAlternative() || countToCheck) state.linkAlternativeBacktracks(this); if (countToCheck) { sub32(Imm32(countToCheck), index); state.checkedTotal -= countToCheck; } else if (state.isLastAlternative()) propogateBacktrack = true; } // We fall through to here when the last alternative fails. // Add a backtrack out of here for the parenthese handling code to link up. if (!propogateBacktrack) state.addBacktrackJump(jump()); // Save address on stack for the parens code to backtrack to, to retry the // next alternative. state.setBackTrackStackOffset(alternativeFrameLocation * sizeof(void*)); successes.link(this); } } void generateParenthesesSingle(TermGenerationState& state) { const RegisterID indexTemporary = regT0; PatternTerm& term = state.term(); PatternDisjunction* disjunction = term.parentheses.disjunction; ASSERT(term.quantityCount == 1); unsigned preCheckedCount = (term.quantityType == QuantifierFixedCount) ? disjunction->m_minimumSize : 0; unsigned parenthesesFrameLocation = term.frameLocation; unsigned alternativeFrameLocation = parenthesesFrameLocation; if (term.quantityType != QuantifierFixedCount) alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; // optimized case - no capture & no quantifier can be handled in a light-weight manner. if (!term.capture() && (term.quantityType == QuantifierFixedCount)) { m_expressionState.incrementParenNestingLevel(); TermGenerationState parenthesesState(disjunction, state.checkedTotal); // Use the current state's jump list for the nested parentheses. parenthesesState.setJumpListToPriorParen(state.getJumpListToPriorParen()); generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); // this expects that any backtracks back out of the parentheses will be in the // parenthesesState's m_backTrackJumps vector, and that if they need backtracking // they will have set an entry point on the parenthesesState's m_backtrackLabel. BacktrackDestination& parenthesesBacktrack = parenthesesState.getBacktrackDestination(); BacktrackDestination& stateBacktrack = state.getBacktrackDestination(); state.propagateBacktrackingFrom(this, parenthesesBacktrack); stateBacktrack.propagateBacktrackToLabel(parenthesesBacktrack); state.setJumpListToPriorParen(parenthesesState.getJumpListToPriorParen()); m_expressionState.decrementParenNestingLevel(); } else { Jump nonGreedySkipParentheses; Label nonGreedyTryParentheses; if (term.quantityType == QuantifierGreedy) storeToFrame(index, parenthesesFrameLocation); else if (term.quantityType == QuantifierNonGreedy) { storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); nonGreedySkipParentheses = jump(); nonGreedyTryParentheses = label(); storeToFrame(index, parenthesesFrameLocation); } // store the match start index if (term.capture()) { int inputOffset = state.inputOffset() - preCheckedCount; if (inputOffset) { move(index, indexTemporary); add32(Imm32(inputOffset), indexTemporary); store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); } else store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); } ParenthesesTail* parenthesesTail = m_expressionState.addParenthesesTail(term, state.getJumpListToPriorParen()); m_expressionState.incrementParenNestingLevel(); TermGenerationState parenthesesState(disjunction, state.checkedTotal); // Save the parenthesesTail for backtracking from nested parens to this one. parenthesesState.setJumpListToPriorParen(&parenthesesTail->m_withinBacktrackJumps); // generate the body of the parentheses generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); // For non-fixed counts, backtrack if we didn't match anything. if (term.quantityType != QuantifierFixedCount) parenthesesTail->addAfterParenJump(branch32(Equal, index, Address(stackPointerRegister, (parenthesesFrameLocation * sizeof(void*))))); // store the match end index if (term.capture()) { int inputOffset = state.inputOffset(); if (inputOffset) { move(index, indexTemporary); add32(Imm32(state.inputOffset()), indexTemporary); store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); } else store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); } m_expressionState.decrementParenNestingLevel(); parenthesesTail->processBacktracks(this, state, parenthesesState, nonGreedyTryParentheses, label()); state.setJumpListToPriorParen(&parenthesesTail->m_afterBacktrackJumps); parenthesesState.getBacktrackDestination().clear(); if (term.quantityType == QuantifierNonGreedy) nonGreedySkipParentheses.link(this); } } void generateParenthesesGreedyNoBacktrack(TermGenerationState& state) { PatternTerm& parenthesesTerm = state.term(); PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction; ASSERT(parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern); ASSERT(parenthesesTerm.quantityCount != 1); // Handled by generateParenthesesSingle. TermGenerationState parenthesesState(disjunction, state.checkedTotal); Label matchAgain(this); storeToFrame(index, parenthesesTerm.frameLocation); // Save the current index to check for zero len matches later. for (parenthesesState.resetAlternative(); parenthesesState.alternativeValid(); parenthesesState.nextAlternative()) { PatternAlternative* alternative = parenthesesState.alternative(); optimizeAlternative(alternative); int countToCheck = alternative->m_minimumSize; if (countToCheck) { parenthesesState.addBacktrackJump(jumpIfNoAvailableInput(countToCheck)); parenthesesState.checkedTotal += countToCheck; } for (parenthesesState.resetTerm(); parenthesesState.termValid(); parenthesesState.nextTerm()) generateTerm(parenthesesState); // If we get here, we matched! If the index advanced then try to match more since limit isn't supported yet. branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesTerm.frameLocation * sizeof(void*))), matchAgain); // If we get here we matched, but we matched "" - cannot accept this alternative as is, so either backtrack, // or fall through to try the next alternative if no backtrack is available. parenthesesState.plantJumpToBacktrackIfExists(this); parenthesesState.linkAlternativeBacktracks(this); // We get here if the alternative fails to match - fall through to the next iteration, or out of the loop. if (countToCheck) { sub32(Imm32(countToCheck), index); parenthesesState.checkedTotal -= countToCheck; } } // If the last alternative falls through to here, we have a failed match... // Which means that we match whatever we have matched up to this point (even if nothing). } void generateParentheticalAssertion(TermGenerationState& state) { PatternTerm& term = state.term(); PatternDisjunction* disjunction = term.parentheses.disjunction; ASSERT(term.quantityCount == 1); ASSERT(term.quantityType == QuantifierFixedCount); unsigned parenthesesFrameLocation = term.frameLocation; unsigned alternativeFrameLocation = parenthesesFrameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion; int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition; if (term.invert()) { // Inverted case storeToFrame(index, parenthesesFrameLocation); state.checkedTotal -= countCheckedAfterAssertion; if (countCheckedAfterAssertion) sub32(Imm32(countCheckedAfterAssertion), index); TermGenerationState parenthesesState(disjunction, state.checkedTotal); generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); // Success! - which means - Fail! loadFromFrame(parenthesesFrameLocation, index); state.jumpToBacktrack(this); // And fail means success. parenthesesState.linkAlternativeBacktracks(this); loadFromFrame(parenthesesFrameLocation, index); state.checkedTotal += countCheckedAfterAssertion; } else { // Normal case storeToFrame(index, parenthesesFrameLocation); state.checkedTotal -= countCheckedAfterAssertion; if (countCheckedAfterAssertion) sub32(Imm32(countCheckedAfterAssertion), index); TermGenerationState parenthesesState(disjunction, state.checkedTotal); generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); // Success! - which means - Success! loadFromFrame(parenthesesFrameLocation, index); Jump success = jump(); parenthesesState.linkAlternativeBacktracks(this); loadFromFrame(parenthesesFrameLocation, index); state.jumpToBacktrack(this); success.link(this); state.checkedTotal += countCheckedAfterAssertion; } } void generateTerm(TermGenerationState& state) { PatternTerm& term = state.term(); switch (term.type) { case PatternTerm::TypeAssertionBOL: generateAssertionBOL(state); break; case PatternTerm::TypeAssertionEOL: generateAssertionEOL(state); break; case PatternTerm::TypeAssertionWordBoundary: generateAssertionWordBoundary(state); break; case PatternTerm::TypePatternCharacter: switch (term.quantityType) { case QuantifierFixedCount: if (term.quantityCount == 1) { if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) { generatePatternCharacterPair(state); state.nextTerm(); } else generatePatternCharacterSingle(state); } else generatePatternCharacterFixed(state); break; case QuantifierGreedy: generatePatternCharacterGreedy(state); break; case QuantifierNonGreedy: generatePatternCharacterNonGreedy(state); break; } break; case PatternTerm::TypeCharacterClass: switch (term.quantityType) { case QuantifierFixedCount: if (term.quantityCount == 1) generateCharacterClassSingle(state); else generateCharacterClassFixed(state); break; case QuantifierGreedy: generateCharacterClassGreedy(state); break; case QuantifierNonGreedy: generateCharacterClassNonGreedy(state); break; } break; case PatternTerm::TypeBackReference: m_shouldFallBack = true; break; case PatternTerm::TypeForwardReference: break; case PatternTerm::TypeParenthesesSubpattern: if (term.quantityCount == 1 && !term.parentheses.isCopy) generateParenthesesSingle(state); else if (term.parentheses.isTerminal) generateParenthesesGreedyNoBacktrack(state); else m_shouldFallBack = true; break; case PatternTerm::TypeParentheticalAssertion: generateParentheticalAssertion(state); break; } } void generateDisjunction(PatternDisjunction* disjunction) { TermGenerationState state(disjunction, 0); state.resetAlternative(); // check availability for the next alternative int countCheckedForCurrentAlternative = 0; int countToCheckForFirstAlternative = 0; bool hasShorterAlternatives = false; bool setRepeatAlternativeLabels = false; JumpList notEnoughInputForPreviousAlternative; Label firstAlternative; Label firstAlternativeInputChecked; // The label 'firstAlternative' is used to plant a check to see if there is // sufficient input available to run the first repeating alternative. // The label 'firstAlternativeInputChecked' will jump directly to matching // the first repeating alternative having skipped this check. if (state.alternativeValid()) { PatternAlternative* alternative = state.alternative(); if (!alternative->onceThrough()) { firstAlternative = Label(this); setRepeatAlternativeLabels = true; } countToCheckForFirstAlternative = alternative->m_minimumSize; state.checkedTotal += countToCheckForFirstAlternative; if (countToCheckForFirstAlternative) notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative)); countCheckedForCurrentAlternative = countToCheckForFirstAlternative; } if (setRepeatAlternativeLabels) firstAlternativeInputChecked = Label(this); while (state.alternativeValid()) { PatternAlternative* alternative = state.alternative(); optimizeAlternative(alternative); // Track whether any alternatives are shorter than the first one. if (!alternative->onceThrough()) hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative); for (state.resetTerm(); state.termValid(); state.nextTerm()) generateTerm(state); // If we get here, the alternative matched. if (m_pattern.m_body->m_callFrameSize) addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); ASSERT(index != returnRegister); if (m_pattern.m_body->m_hasFixedSize) { move(index, returnRegister); if (alternative->m_minimumSize) sub32(Imm32(alternative->m_minimumSize), returnRegister); store32(returnRegister, output); } else load32(Address(output), returnRegister); store32(index, Address(output, 4)); generateReturn(); state.nextAlternative(); if (alternative->onceThrough() && state.alternativeValid()) state.clearBacktrack(); // if there are any more alternatives, plant the check for input before looping. if (state.alternativeValid()) { state.setJumpListToPriorParen(0); PatternAlternative* nextAlternative = state.alternative(); if (!setRepeatAlternativeLabels && !nextAlternative->onceThrough()) { // We have handled non-repeating alternatives, jump to next iteration // and loop over repeating alternatives. state.jumpToBacktrack(this); countToCheckForFirstAlternative = nextAlternative->m_minimumSize; // If we get here, there the last input checked failed. notEnoughInputForPreviousAlternative.link(this); state.linkAlternativeBacktracks(this); // Back up to start the looping alternatives. if (countCheckedForCurrentAlternative) sub32(Imm32(countCheckedForCurrentAlternative), index); firstAlternative = Label(this); state.checkedTotal = countToCheckForFirstAlternative; if (countToCheckForFirstAlternative) notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative)); countCheckedForCurrentAlternative = countToCheckForFirstAlternative; firstAlternativeInputChecked = Label(this); setRepeatAlternativeLabels = true; } else { int countToCheckForNextAlternative = nextAlternative->m_minimumSize; if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one. // If we get here, then the last input checked failed. notEnoughInputForPreviousAlternative.link(this); // Check if sufficent input available to run the next alternative notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); // We are now in the correct state to enter the next alternative; this add is only required // to mirror and revert operation of the sub32, just below. add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); // If we get here, then the last input checked passed. state.linkAlternativeBacktracks(this); // No need to check if we can run the next alternative, since it is shorter - // just update index. sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one. // If we get here, then the last input checked failed. // If there is insufficient input to run the current alternative, and the next alternative is longer, // then there is definitely not enough input to run it - don't even check. Just adjust index, as if // we had checked. notEnoughInputForPreviousAlternative.link(this); add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index); notEnoughInputForPreviousAlternative.append(jump()); // The next alternative is longer than the current one; check the difference. state.linkAlternativeBacktracks(this); notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); } else { // CASE 3: Both alternatives are the same length. ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative); // If the next alterative is the same length as this one, then no need to check the input - // if there was sufficent input to run the current alternative then there is sufficient // input to run the next one; if not, there isn't. state.linkAlternativeBacktracks(this); } state.checkedTotal -= countCheckedForCurrentAlternative; countCheckedForCurrentAlternative = countToCheckForNextAlternative; state.checkedTotal += countCheckedForCurrentAlternative; } } } // If we get here, all Alternatives failed... state.checkedTotal -= countCheckedForCurrentAlternative; if (!setRepeatAlternativeLabels) { // If there are no alternatives that need repeating (all are marked 'onceThrough') then just link // the match failures to this point, and fall through to the return below. state.linkAlternativeBacktracks(this, true); notEnoughInputForPreviousAlternative.link(this); } else { // How much more input need there be to be able to retry from the first alternative? // examples: // /yarr_jit/ or /wrec|pcre/ // In these examples we need check for one more input before looping. // /yarr_jit|pcre/ // In this case we need check for 5 more input to loop (+4 to allow for the first alterative // being four longer than the last alternative checked, and another +1 to effectively move // the start position along by one). // /yarr|rules/ or /wrec|notsomuch/ // In these examples, provided that there was sufficient input to have just been matching for // the second alternative we can loop without checking for available input (since the second // alternative is longer than the first). In the latter example we need to decrement index // (by 4) so the start position is only progressed by 1 from the last iteration. int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1; // First, deal with the cases where there was sufficient input to try the last alternative. if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below. state.linkAlternativeBacktracks(this, true); else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop! state.linkAlternativeBacktracksTo(this, firstAlternativeInputChecked, true); else { // no need to check the input, but we do have some bookkeeping to do first. state.linkAlternativeBacktracks(this, true); // Where necessary update our preserved start position. if (!m_pattern.m_body->m_hasFixedSize) { move(index, regT0); sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0); store32(regT0, Address(output)); } // Update index if necessary, and loop (without checking). if (incrementForNextIter) add32(Imm32(incrementForNextIter), index); jump().linkTo(firstAlternativeInputChecked, this); } notEnoughInputForPreviousAlternative.link(this); // Update our idea of the start position, if we're tracking this. if (!m_pattern.m_body->m_hasFixedSize) { if (countCheckedForCurrentAlternative - 1) { move(index, regT0); sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0); store32(regT0, Address(output)); } else store32(index, Address(output)); } // Check if there is sufficent input to run the first alternative again. jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this); // No - insufficent input to run the first alteranative, are there any other alternatives we // might need to check? If so, the last check will have left the index incremented by // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative // LESS input is available, to have the effect of just progressing the start position by 1 // from the last iteration. If this check passes we can just jump up to the check associated // with the first alternative in the loop. This is a bit sad, since we'll end up trying the // first alternative again, and this check will fail (otherwise the check planted just above // here would have passed). This is a bit sad, however it saves trying to do something more // complex here in compilation, and in the common case we should end up coallescing the checks. // // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150, // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there // is sufficient input to run either alternative (constantly failing). If there had been only // one alternative, or if the shorter alternative had come first, we would have terminated // immediately. :-/ if (hasShorterAlternatives) jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this); // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true, // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ... // but since we're about to return a failure this doesn't really matter!) } if (m_pattern.m_body->m_callFrameSize) addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); move(TrustedImm32(-1), returnRegister); generateReturn(); m_expressionState.emitParenthesesTail(this); m_expressionState.emitIndirectJumpTable(this); m_expressionState.linkToNextIteration(this); } void generateEnter() { #if CPU(X86_64) push(X86Registers::ebp); move(stackPointerRegister, X86Registers::ebp); push(X86Registers::ebx); #elif CPU(X86) push(X86Registers::ebp); move(stackPointerRegister, X86Registers::ebp); // TODO: do we need spill registers to fill the output pointer if there are no sub captures? push(X86Registers::ebx); push(X86Registers::edi); push(X86Registers::esi); // load output into edi (2 = saved ebp + return address). #if COMPILER(MSVC) loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input); loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index); loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length); loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output); #else loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output); #endif #elif CPU(ARM) push(ARMRegisters::r4); push(ARMRegisters::r5); push(ARMRegisters::r6); #if CPU(ARM_TRADITIONAL) push(ARMRegisters::r8); // scratch register #endif move(ARMRegisters::r3, output); #elif CPU(SH4) push(SH4Registers::r11); push(SH4Registers::r13); #elif CPU(MIPS) // Do nothing. #endif } void generateReturn() { #if CPU(X86_64) pop(X86Registers::ebx); pop(X86Registers::ebp); #elif CPU(X86) pop(X86Registers::esi); pop(X86Registers::edi); pop(X86Registers::ebx); pop(X86Registers::ebp); #elif CPU(ARM) #if CPU(ARM_TRADITIONAL) pop(ARMRegisters::r8); // scratch register #endif pop(ARMRegisters::r6); pop(ARMRegisters::r5); pop(ARMRegisters::r4); #elif CPU(SH4) pop(SH4Registers::r13); pop(SH4Registers::r11); #elif CPU(MIPS) // Do nothing #endif ret(); } public: YarrGenerator(YarrPattern& pattern) : m_pattern(pattern) , m_shouldFallBack(false) { } void generate() { generateEnter(); if (!m_pattern.m_body->m_hasFixedSize) store32(index, Address(output)); if (m_pattern.m_body->m_callFrameSize) subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); generateDisjunction(m_pattern.m_body); } void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject) { generate(); LinkBuffer patchBuffer(this, globalData->regexAllocator.poolForSize(size()), 0); for (unsigned i = 0; i < m_expressionState.m_backtrackRecords.size(); ++i) patchBuffer.patch(m_expressionState.m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_expressionState.m_backtrackRecords[i].backtrackLocation)); jitObject.set(patchBuffer.finalizeCode()); jitObject.setFallBack(m_shouldFallBack); } private: YarrPattern& m_pattern; bool m_shouldFallBack; GenerationState m_expressionState; }; void jitCompile(YarrPattern& pattern, JSGlobalData* globalData, YarrCodeBlock& jitObject) { YarrGenerator(pattern).compile(globalData, jitObject); } int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output) { return jitObject.execute(input, start, length, output); } }} #endif