普通文本  |  165行  |  4.03 KB

// Copyright 2006 The RE2 Authors.  All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Dump the regexp into a string showing structure.
// Tested by parse_unittest.cc

// This function traverses the regexp recursively,
// meaning that on inputs like Regexp::Simplify of
// a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100},
// it takes time and space exponential in the size of the
// original regular expression.  It can also use stack space
// linear in the size of the regular expression for inputs
// like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*.
// IT IS NOT SAFE TO CALL FROM PRODUCTION CODE.
// As a result, Dump is provided only in the testing
// library (see BUILD).

#include <string>
#include <vector>
#include "util/test.h"
#include "re2/stringpiece.h"
#include "re2/regexp.h"

// Cause a link error if this file is used outside of testing.
DECLARE_string(test_tmpdir);

namespace re2 {

static const char* kOpcodeNames[] = {
  "bad",
  "no",
  "emp",
  "lit",
  "str",
  "cat",
  "alt",
  "star",
  "plus",
  "que",
  "rep",
  "cap",
  "dot",
  "byte",
  "bol",
  "eol",
  "wb",   // kRegexpWordBoundary
  "nwb",  // kRegexpNoWordBoundary
  "bot",
  "eot",
  "cc",
  "match",
};

// Create string representation of regexp with explicit structure.
// Nothing pretty, just for testing.
static void DumpRegexpAppending(Regexp* re, string* s) {
  if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {
    StringAppendF(s, "op%d", re->op());
  } else {
    switch (re->op()) {
      default:
        break;
      case kRegexpStar:
      case kRegexpPlus:
      case kRegexpQuest:
      case kRegexpRepeat:
        if (re->parse_flags() & Regexp::NonGreedy)
          s->append("n");
        break;
    }
    s->append(kOpcodeNames[re->op()]);
    if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) {
      Rune r = re->rune();
      if ('a' <= r && r <= 'z')
        s->append("fold");
    }
    if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) {
      for (int i = 0; i < re->nrunes(); i++) {
        Rune r = re->runes()[i];
        if ('a' <= r && r <= 'z') {
          s->append("fold");
          break;
        }
      }
    }
  }
  s->append("{");
  switch (re->op()) {
    default:
      break;
    case kRegexpEndText:
      if (!(re->parse_flags() & Regexp::WasDollar)) {
        s->append("\\z");
      }
      break;
    case kRegexpLiteral: {
      Rune r = re->rune();
      char buf[UTFmax+1];
      buf[runetochar(buf, &r)] = 0;
      s->append(buf);
      break;
    }
    case kRegexpLiteralString:
      for (int i = 0; i < re->nrunes(); i++) {
        Rune r = re->runes()[i];
        char buf[UTFmax+1];
        buf[runetochar(buf, &r)] = 0;
        s->append(buf);
      }
      break;
    case kRegexpConcat:
    case kRegexpAlternate:
      for (int i = 0; i < re->nsub(); i++)
        DumpRegexpAppending(re->sub()[i], s);
      break;
    case kRegexpStar:
    case kRegexpPlus:
    case kRegexpQuest:
      DumpRegexpAppending(re->sub()[0], s);
      break;
    case kRegexpCapture:
      if (re->name()) {
        s->append(*re->name());
        s->append(":");
      }
      DumpRegexpAppending(re->sub()[0], s);
      break;
    case kRegexpRepeat:
      s->append(StringPrintf("%d,%d ", re->min(), re->max()));
      DumpRegexpAppending(re->sub()[0], s);
      break;
    case kRegexpCharClass: {
      string sep;
      for (CharClass::iterator it = re->cc()->begin();
           it != re->cc()->end(); ++it) {
        RuneRange rr = *it;
        s->append(sep);
        if (rr.lo == rr.hi)
          s->append(StringPrintf("%#x", rr.lo));
        else
          s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi));
        sep = " ";
      }
      break;
    }
  }
  s->append("}");
}

string Regexp::Dump() {
  string s;

  // Make sure being called from a unit test.
  if (FLAGS_test_tmpdir.empty()) {
    LOG(ERROR) << "Cannot use except for testing.";
    return s;
  }

  DumpRegexpAppending(this, &s);
  return s;
}

}  // namespace re2