// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include <string> #include "base/i18n/rtl.h" #include "base/i18n/string_search.h" #include "base/strings/string16.h" #include "base/strings/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" #include "third_party/icu/source/i18n/unicode/usearch.h" namespace base { namespace i18n { // Note on setting default locale for testing: The current default locale on // the Mac trybot is en_US_POSIX, with which primary-level collation strength // string search is case-sensitive, when normally it should be // case-insensitive. In other locales (including en_US which English speakers // in the U.S. use), this search would be case-insensitive as expected. TEST(StringSearchTest, ASCII) { std::string default_locale(uloc_getDefault()); bool locale_is_posix = (default_locale == "en_US_POSIX"); if (locale_is_posix) SetICUDefaultLocale("en_US"); size_t index = 0; size_t length = 0; EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(5U, length); EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"), &index, &length)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length)); EXPECT_EQ(4U, index); EXPECT_EQ(6U, length); EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( ASCIIToUTF16("searching within empty string"), string16(), &index, &length)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( string16(), ASCIIToUTF16("searching for empty string"), &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(0U, length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"), &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(18U, length); if (locale_is_posix) SetICUDefaultLocale(default_locale.data()); } TEST(StringSearchTest, UnicodeLocaleIndependent) { // Base characters const string16 e_base = WideToUTF16(L"e"); const string16 E_base = WideToUTF16(L"E"); const string16 a_base = WideToUTF16(L"a"); // Composed characters const string16 e_with_acute_accent = WideToUTF16(L"\u00e9"); const string16 E_with_acute_accent = WideToUTF16(L"\u00c9"); const string16 e_with_grave_accent = WideToUTF16(L"\u00e8"); const string16 E_with_grave_accent = WideToUTF16(L"\u00c8"); const string16 a_with_acute_accent = WideToUTF16(L"\u00e1"); // Decomposed characters const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301"); const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301"); const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300"); const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300"); const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301"); std::string default_locale(uloc_getDefault()); bool locale_is_posix = (default_locale == "en_US_POSIX"); if (locale_is_posix) SetICUDefaultLocale("en_US"); size_t index = 0; size_t length = 0; EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_base, e_with_acute_accent, &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(e_with_acute_accent.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_acute_accent, e_base, &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(e_base.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_base, e_with_acute_combining_mark, &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(e_with_acute_combining_mark.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_acute_combining_mark, e_base, &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(e_base.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_acute_combining_mark, e_with_acute_accent, &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(e_with_acute_accent.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_acute_accent, e_with_acute_combining_mark, &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(e_with_acute_combining_mark.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_acute_combining_mark, e_with_grave_combining_mark, &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(e_with_grave_combining_mark.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_grave_combining_mark, e_with_acute_combining_mark, &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(e_with_acute_combining_mark.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_acute_combining_mark, e_with_grave_accent, &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(e_with_grave_accent.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_grave_accent, e_with_acute_combining_mark, &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(e_with_acute_combining_mark.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( E_with_acute_accent, e_with_acute_accent, &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(e_with_acute_accent.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( E_with_grave_accent, e_with_acute_accent, &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(e_with_acute_accent.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( E_with_acute_combining_mark, e_with_grave_accent, &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(e_with_grave_accent.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( E_with_grave_combining_mark, e_with_acute_accent, &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(e_with_acute_accent.size(), length); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( E_base, e_with_grave_accent, &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(e_with_grave_accent.size(), length); EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( a_with_acute_accent, e_with_acute_accent, &index, &length)); EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( a_with_acute_combining_mark, e_with_acute_combining_mark, &index, &length)); if (locale_is_posix) SetICUDefaultLocale(default_locale.data()); } TEST(StringSearchTest, UnicodeLocaleDependent) { // Base characters const string16 a_base = WideToUTF16(L"a"); // Composed characters const string16 a_with_ring = WideToUTF16(L"\u00e5"); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( a_base, a_with_ring, NULL, NULL)); const char* default_locale = uloc_getDefault(); SetICUDefaultLocale("da"); EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( a_base, a_with_ring, NULL, NULL)); SetICUDefaultLocale(default_locale); } TEST(StringSearchTest, FixedPatternMultipleSearch) { std::string default_locale(uloc_getDefault()); bool locale_is_posix = (default_locale == "en_US_POSIX"); if (locale_is_posix) SetICUDefaultLocale("en_US"); size_t index = 0; size_t length = 0; // Search "hello" over multiple texts. FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello")); EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length)); EXPECT_EQ(2U, index); EXPECT_EQ(5U, length); EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length)); EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length)); EXPECT_EQ(0U, index); EXPECT_EQ(5U, length); if (locale_is_posix) SetICUDefaultLocale(default_locale.data()); } } // namespace i18n } // namespace base