// Copyright 2008 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Exhaustive testing of regular expression matching.
// Each test picks an alphabet (e.g., "abc"), a maximum string length,
// a maximum regular expression length, and a maximum number of letters
// that can appear in the regular expression. Given these parameters,
// it tries every possible regular expression and string, verifying that
// the NFA, DFA, and a trivial backtracking implementation agree about
// the location of the match.
#include <stdlib.h>
#include <stdio.h>
#ifndef LOGGING
#define LOGGING 0
#endif
#include "util/test.h"
#include "re2/testing/exhaustive_tester.h"
#include "re2/testing/tester.h"
DEFINE_bool(show_regexps, false, "show regexps during testing");
DEFINE_int32(max_bad_regexp_inputs, 1,
"Stop testing a regular expression after finding this many "
"strings that break it.");
// Compiled in debug mode, the usual tests run for over an hour.
// Have to cut it down to make the unit test machines happy.
DEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode.");
namespace re2 {
static char* escape(const StringPiece& sp) {
static char buf[512];
char* p = buf;
*p++ = '\"';
for (int i = 0; i < sp.size(); i++) {
if(p+5 >= buf+sizeof buf)
LOG(FATAL) << "ExhaustiveTester escape: too long";
if(sp[i] == '\\' || sp[i] == '\"') {
*p++ = '\\';
*p++ = sp[i];
} else if(sp[i] == '\n') {
*p++ = '\\';
*p++ = 'n';
} else {
*p++ = sp[i];
}
}
*p++ = '\"';
*p = '\0';
return buf;
}
static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
if (!re.Match(input, 0, input.size(), anchor, m, n)) {
printf("-");
return;
}
for (int i = 0; i < n; i++) {
if (i > 0)
printf(" ");
if (m[i].begin() == NULL)
printf("-");
else
printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cast<int>(m[i].end() - input.begin()));
}
}
// Processes a single generated regexp.
// Compiles it using Regexp interface and PCRE, and then
// checks that NFA, DFA, and PCRE all return the same results.
void ExhaustiveTester::HandleRegexp(const string& const_regexp) {
regexps_++;
string regexp = const_regexp;
if (!topwrapper_.empty())
regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
if (FLAGS_show_regexps) {
printf("\r%s", regexp.c_str());
fflush(stdout);
}
if (LOGGING) {
// Write out test cases and answers for use in testing
// other implementations, such as Go's regexp package.
if (randomstrings_)
LOG(ERROR) << "Cannot log with random strings.";
if (regexps_ == 1) { // first
printf("strings\n");
strgen_.Reset();
while (strgen_.HasNext())
printf("%s\n", escape(strgen_.Next()));
printf("regexps\n");
}
printf("%s\n", escape(regexp));
RE2 re(regexp);
RE2::Options longest;
longest.set_longest_match(true);
RE2 relongest(regexp, longest);
int ngroup = re.NumberOfCapturingGroups()+1;
StringPiece* group = new StringPiece[ngroup];
strgen_.Reset();
while (strgen_.HasNext()) {
StringPiece input = strgen_.Next();
PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
printf(";");
PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
printf(";");
PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
printf(";");
PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
printf("\n");
}
delete[] group;
return;
}
Tester tester(regexp);
if (tester.error())
return;
strgen_.Reset();
strgen_.GenerateNULL();
if (randomstrings_)
strgen_.Random(stringseed_, stringcount_);
int bad_inputs = 0;
while (strgen_.HasNext()) {
tests_++;
if (!tester.TestInput(strgen_.Next())) {
failures_++;
if (++bad_inputs >= FLAGS_max_bad_regexp_inputs)
break;
}
}
}
// Runs an exhaustive test on the given parameters.
void ExhaustiveTest(int maxatoms, int maxops,
const vector<string>& alphabet,
const vector<string>& ops,
int maxstrlen, const vector<string>& stralphabet,
const string& wrapper,
const string& topwrapper) {
if (DEBUG_MODE && FLAGS_quick_debug_mode) {
if (maxatoms > 1)
maxatoms--;
if (maxops > 1)
maxops--;
if (maxstrlen > 1)
maxstrlen--;
}
ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
maxstrlen, stralphabet, wrapper,
topwrapper);
t.Generate();
if (!LOGGING) {
printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
}
EXPECT_EQ(0, t.failures());
}
// Runs an exhaustive test using the given parameters and
// the basic egrep operators.
void EgrepTest(int maxatoms, int maxops, const string& alphabet,
int maxstrlen, const string& stralphabet,
const string& wrapper) {
const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
for (int i = 0; i < arraysize(tops); i++) {
ExhaustiveTest(maxatoms, maxops,
Split("", alphabet),
RegexpGenerator::EgrepOps(),
maxstrlen,
Split("", stralphabet),
wrapper,
tops[i]);
}
}
} // namespace re2