/* * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org) * Copyright (c) 2007, 2008 Apple Inc. All rights reserved. * Copyright (C) 2009 Torch Mobile, Inc. * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "config.h" #include "RegExp.h" #include "Lexer.h" #include "yarr/Yarr.h" #include "yarr/YarrJIT.h" #include <stdio.h> #include <stdlib.h> #include <string.h> #include <wtf/Assertions.h> #include <wtf/OwnArrayPtr.h> namespace JSC { RegExpFlags regExpFlags(const UString& string) { RegExpFlags flags = NoFlags; for (unsigned i = 0; i < string.length(); ++i) { switch (string.characters()[i]) { case 'g': if (flags & FlagGlobal) return InvalidFlags; flags = static_cast<RegExpFlags>(flags | FlagGlobal); break; case 'i': if (flags & FlagIgnoreCase) return InvalidFlags; flags = static_cast<RegExpFlags>(flags | FlagIgnoreCase); break; case 'm': if (flags & FlagMultiline) return InvalidFlags; flags = static_cast<RegExpFlags>(flags | FlagMultiline); break; default: return InvalidFlags; } } return flags; } struct RegExpRepresentation { #if ENABLE(YARR_JIT) Yarr::YarrCodeBlock m_regExpJITCode; #endif OwnPtr<Yarr::BytecodePattern> m_regExpBytecode; }; inline RegExp::RegExp(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags) : m_patternString(patternString) , m_flags(flags) , m_constructionError(0) , m_numSubpatterns(0) #if ENABLE(REGEXP_TRACING) , m_rtMatchCallCount(0) , m_rtMatchFoundCount(0) #endif , m_representation(adoptPtr(new RegExpRepresentation)) { m_state = compile(globalData); } RegExp::~RegExp() { } PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags) { RefPtr<RegExp> res = adoptRef(new RegExp(globalData, patternString, flags)); #if ENABLE(REGEXP_TRACING) globalData->addRegExpToTrace(res); #endif return res.release(); } RegExp::RegExpState RegExp::compile(JSGlobalData* globalData) { Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); if (m_constructionError) return ParseError; m_numSubpatterns = pattern.m_numSubpatterns; RegExpState res = ByteCode; #if ENABLE(YARR_JIT) if (!pattern.m_containsBackreferences && globalData->canUseJIT()) { Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode); #if ENABLE(YARR_JIT_DEBUG) if (!m_representation->m_regExpJITCode.isFallBack()) res = JITCode; else res = ByteCode; #else if (!m_representation->m_regExpJITCode.isFallBack()) return JITCode; #endif } #endif m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator); return res; } int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector) { if (startOffset < 0) startOffset = 0; #if ENABLE(REGEXP_TRACING) m_rtMatchCallCount++; #endif if (static_cast<unsigned>(startOffset) > s.length() || s.isNull()) return -1; if (m_state != ParseError) { int offsetVectorSize = (m_numSubpatterns + 1) * 2; int* offsetVector; Vector<int, 32> nonReturnedOvector; if (ovector) { ovector->resize(offsetVectorSize); offsetVector = ovector->data(); } else { nonReturnedOvector.resize(offsetVectorSize); offsetVector = nonReturnedOvector.data(); } ASSERT(offsetVector); // Initialize offsetVector with the return value (index 0) and the // first subpattern start indicies (even index values) set to -1. // No need to init the subpattern end indicies. for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++) offsetVector[j] = -1; int result; #if ENABLE(YARR_JIT) if (m_state == JITCode) { result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector); #if ENABLE(YARR_JIT_DEBUG) matchCompareWithInterpreter(s, startOffset, offsetVector, result); #endif } else #endif result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector); ASSERT(result >= -1); #if ENABLE(REGEXP_TRACING) if (result != -1) m_rtMatchFoundCount++; #endif return result; } return -1; } #if ENABLE(YARR_JIT_DEBUG) void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* offsetVector, int jitResult) { int offsetVectorSize = (m_numSubpatterns + 1) * 2; Vector<int, 32> interpreterOvector; interpreterOvector.resize(offsetVectorSize); int* interpreterOffsetVector = interpreterOvector.data(); int interpreterResult = 0; int differences = 0; // Initialize interpreterOffsetVector with the return value (index 0) and the // first subpattern start indicies (even index values) set to -1. // No need to init the subpattern end indicies. for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++) interpreterOffsetVector[j] = -1; interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector); if (jitResult != interpreterResult) differences++; for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) if ((offsetVector[j] != interpreterOffsetVector[j]) || ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))) differences++; if (differences) { fprintf(stderr, "RegExp Discrepency for /%s/\n string input ", pattern().utf8().data()); unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset); fprintf(stderr, (segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset); if (jitResult != interpreterResult) { fprintf(stderr, " JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult); differences--; } else { fprintf(stderr, " Correct result = %d\n", jitResult); } if (differences) { for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) { if (offsetVector[j] != interpreterOffsetVector[j]) fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]); if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])) fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]); } } } } #endif #if ENABLE(REGEXP_TRACING) void RegExp::printTraceData() { char formattedPattern[41]; char rawPattern[41]; strncpy(rawPattern, pattern().utf8().data(), 40); rawPattern[40]= '\0'; int pattLen = strlen(rawPattern); snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern); #if ENABLE(YARR_JIT) Yarr::YarrCodeBlock& codeBlock = m_representation->m_regExpJITCode; const size_t jitAddrSize = 20; char jitAddr[jitAddrSize]; if (m_state == JITCode) snprintf(jitAddr, jitAddrSize, "fallback"); else snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.getAddr())); #else const char* jitAddr = "JIT Off"; #endif printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount); } #endif } // namespace JSC