// Copyright 2015 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "base/strings/pattern.h" #include "base/third_party/icu/icu_utf.h" namespace base { namespace { static bool IsWildcard(base_icu::UChar32 character) { return character == '*' || character == '?'; } // Move the strings pointers to the point where they start to differ. template <typename CHAR, typename NEXT> static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, const CHAR** string, const CHAR* string_end, NEXT next) { const CHAR* escape = NULL; while (*pattern != pattern_end && *string != string_end) { if (!escape && IsWildcard(**pattern)) { // We don't want to match wildcard here, except if it's escaped. return; } // Check if the escapement char is found. If so, skip it and move to the // next character. if (!escape && **pattern == '\\') { escape = *pattern; next(pattern, pattern_end); continue; } // Check if the chars match, if so, increment the ptrs. const CHAR* pattern_next = *pattern; const CHAR* string_next = *string; base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); if (pattern_char == next(&string_next, string_end) && pattern_char != CBU_SENTINEL) { *pattern = pattern_next; *string = string_next; } else { // Uh oh, it did not match, we are done. If the last char was an // escapement, that means that it was an error to advance the ptr here, // let's put it back where it was. This also mean that the MatchPattern // function will return false because if we can't match an escape char // here, then no one will. if (escape) { *pattern = escape; } return; } escape = NULL; } } template <typename CHAR, typename NEXT> static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { while (*pattern != end) { if (!IsWildcard(**pattern)) return; next(pattern, end); } } template <typename CHAR, typename NEXT> static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, const CHAR* pattern, const CHAR* pattern_end, int depth, NEXT next) { const int kMaxDepth = 16; if (depth > kMaxDepth) return false; // Eat all the matching chars. EatSameChars(&pattern, pattern_end, &eval, eval_end, next); // If the string is empty, then the pattern must be empty too, or contains // only wildcards. if (eval == eval_end) { EatWildcard(&pattern, pattern_end, next); return pattern == pattern_end; } // Pattern is empty but not string, this is not a match. if (pattern == pattern_end) return false; // If this is a question mark, then we need to compare the rest with // the current string or the string with one character eaten. const CHAR* next_pattern = pattern; next(&next_pattern, pattern_end); if (pattern[0] == '?') { if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, next)) return true; const CHAR* next_eval = eval; next(&next_eval, eval_end); if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, depth + 1, next)) return true; } // This is a *, try to match all the possible substrings with the remainder // of the pattern. if (pattern[0] == '*') { // Collapse duplicate wild cards (********** into *) so that the // method does not recurse unnecessarily. http://crbug.com/52839 EatWildcard(&next_pattern, pattern_end, next); while (eval != eval_end) { if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, next)) return true; eval++; } // We reached the end of the string, let see if the pattern contains only // wildcards. if (eval == eval_end) { EatWildcard(&pattern, pattern_end, next); if (pattern != pattern_end) return false; return true; } } return false; } struct NextCharUTF8 { base_icu::UChar32 operator()(const char** p, const char* end) { base_icu::UChar32 c; int offset = 0; CBU8_NEXT(*p, offset, end - *p, c); *p += offset; return c; } }; struct NextCharUTF16 { base_icu::UChar32 operator()(const char16** p, const char16* end) { base_icu::UChar32 c; int offset = 0; CBU16_NEXT(*p, offset, end - *p, c); *p += offset; return c; } }; } // namespace bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) { return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(), pattern.data() + pattern.size(), 0, NextCharUTF8()); } bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) { return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(), pattern.data() + pattern.size(), 0, NextCharUTF16()); } } // namespace base