// Copyright 2006 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Dump the regexp into a string showing structure.
// Tested by parse_unittest.cc
// This function traverses the regexp recursively,
// meaning that on inputs like Regexp::Simplify of
// a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100},
// it takes time and space exponential in the size of the
// original regular expression. It can also use stack space
// linear in the size of the regular expression for inputs
// like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*.
// IT IS NOT SAFE TO CALL FROM PRODUCTION CODE.
// As a result, Dump is provided only in the testing
// library (see BUILD).
#include <string>
#include <vector>
#include "util/test.h"
#include "re2/stringpiece.h"
#include "re2/regexp.h"
// Cause a link error if this file is used outside of testing.
DECLARE_string(test_tmpdir);
namespace re2 {
static const char* kOpcodeNames[] = {
"bad",
"no",
"emp",
"lit",
"str",
"cat",
"alt",
"star",
"plus",
"que",
"rep",
"cap",
"dot",
"byte",
"bol",
"eol",
"wb", // kRegexpWordBoundary
"nwb", // kRegexpNoWordBoundary
"bot",
"eot",
"cc",
"match",
};
// Create string representation of regexp with explicit structure.
// Nothing pretty, just for testing.
static void DumpRegexpAppending(Regexp* re, string* s) {
if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {
StringAppendF(s, "op%d", re->op());
} else {
switch (re->op()) {
default:
break;
case kRegexpStar:
case kRegexpPlus:
case kRegexpQuest:
case kRegexpRepeat:
if (re->parse_flags() & Regexp::NonGreedy)
s->append("n");
break;
}
s->append(kOpcodeNames[re->op()]);
if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) {
Rune r = re->rune();
if ('a' <= r && r <= 'z')
s->append("fold");
}
if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) {
for (int i = 0; i < re->nrunes(); i++) {
Rune r = re->runes()[i];
if ('a' <= r && r <= 'z') {
s->append("fold");
break;
}
}
}
}
s->append("{");
switch (re->op()) {
default:
break;
case kRegexpEndText:
if (!(re->parse_flags() & Regexp::WasDollar)) {
s->append("\\z");
}
break;
case kRegexpLiteral: {
Rune r = re->rune();
char buf[UTFmax+1];
buf[runetochar(buf, &r)] = 0;
s->append(buf);
break;
}
case kRegexpLiteralString:
for (int i = 0; i < re->nrunes(); i++) {
Rune r = re->runes()[i];
char buf[UTFmax+1];
buf[runetochar(buf, &r)] = 0;
s->append(buf);
}
break;
case kRegexpConcat:
case kRegexpAlternate:
for (int i = 0; i < re->nsub(); i++)
DumpRegexpAppending(re->sub()[i], s);
break;
case kRegexpStar:
case kRegexpPlus:
case kRegexpQuest:
DumpRegexpAppending(re->sub()[0], s);
break;
case kRegexpCapture:
if (re->name()) {
s->append(*re->name());
s->append(":");
}
DumpRegexpAppending(re->sub()[0], s);
break;
case kRegexpRepeat:
s->append(StringPrintf("%d,%d ", re->min(), re->max()));
DumpRegexpAppending(re->sub()[0], s);
break;
case kRegexpCharClass: {
string sep;
for (CharClass::iterator it = re->cc()->begin();
it != re->cc()->end(); ++it) {
RuneRange rr = *it;
s->append(sep);
if (rr.lo == rr.hi)
s->append(StringPrintf("%#x", rr.lo));
else
s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi));
sep = " ";
}
break;
}
}
s->append("}");
}
string Regexp::Dump() {
string s;
// Make sure being called from a unit test.
if (FLAGS_test_tmpdir.empty()) {
LOG(ERROR) << "Cannot use except for testing.";
return s;
}
DumpRegexpAppending(this, &s);
return s;
}
} // namespace re2