// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/browser/autofill/address_field.h" #include <stddef.h> #include "base/logging.h" #include "base/memory/scoped_ptr.h" #include "base/string16.h" #include "base/string_util.h" #include "base/utf_string_conversions.h" #include "chrome/browser/autofill/autofill_field.h" #include "grit/autofill_resources.h" #include "ui/base/l10n/l10n_util.h" bool AddressField::GetFieldInfo(FieldTypeMap* field_type_map) const { AutofillFieldType address_company; AutofillFieldType address_line1; AutofillFieldType address_line2; AutofillFieldType address_city; AutofillFieldType address_state; AutofillFieldType address_zip; AutofillFieldType address_country; switch (type_) { case kShippingAddress: // Fall through. Autofill does not support shipping addresses. case kGenericAddress: address_company = COMPANY_NAME; address_line1 = ADDRESS_HOME_LINE1; address_line2 = ADDRESS_HOME_LINE2; address_city = ADDRESS_HOME_CITY; address_state = ADDRESS_HOME_STATE; address_zip = ADDRESS_HOME_ZIP; address_country = ADDRESS_HOME_COUNTRY; break; case kBillingAddress: address_company = COMPANY_NAME; address_line1 = ADDRESS_BILLING_LINE1; address_line2 = ADDRESS_BILLING_LINE2; address_city = ADDRESS_BILLING_CITY; address_state = ADDRESS_BILLING_STATE; address_zip = ADDRESS_BILLING_ZIP; address_country = ADDRESS_BILLING_COUNTRY; break; default: NOTREACHED(); return false; } bool ok; ok = Add(field_type_map, company_, AutofillType(address_company)); DCHECK(ok); ok = ok && Add(field_type_map, address1_, AutofillType(address_line1)); DCHECK(ok); ok = ok && Add(field_type_map, address2_, AutofillType(address_line2)); DCHECK(ok); ok = ok && Add(field_type_map, city_, AutofillType(address_city)); DCHECK(ok); ok = ok && Add(field_type_map, state_, AutofillType(address_state)); DCHECK(ok); ok = ok && Add(field_type_map, zip_, AutofillType(address_zip)); DCHECK(ok); ok = ok && Add(field_type_map, country_, AutofillType(address_country)); DCHECK(ok); return ok; } FormFieldType AddressField::GetFormFieldType() const { return kAddressType; } AddressField* AddressField::Parse( std::vector<AutofillField*>::const_iterator* iter, bool is_ecml) { DCHECK(iter); if (!iter) return NULL; scoped_ptr<AddressField> address_field(new AddressField); std::vector<AutofillField*>::const_iterator q = *iter; string16 pattern; // The ECML standard uses 2 letter country codes. So we will // have to remember that this is an ECML form, for when we fill // it out. address_field->is_ecml_ = is_ecml; string16 attention_ignored = l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE); string16 region_ignored = l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE); // Allow address fields to appear in any order. while (true) { if (ParseCompany(&q, is_ecml, address_field.get()) || ParseAddressLines(&q, is_ecml, address_field.get()) || ParseCity(&q, is_ecml, address_field.get()) || ParseState(&q, is_ecml, address_field.get()) || ParseZipCode(&q, is_ecml, address_field.get()) || ParseCountry(&q, is_ecml, address_field.get())) { continue; } else if (ParseText(&q, attention_ignored) || ParseText(&q, region_ignored)) { // We ignore the following: // * Attention. // * Province/Region/Other. continue; } else if (*q != **iter && ParseEmpty(&q)) { // Ignore non-labeled fields within an address; the page // MapQuest Driving Directions North America.html contains such a field. // We only ignore such fields after we've parsed at least one other field; // otherwise we'd effectively parse address fields before other field // types after any non-labeled fields, and we want email address fields to // have precedence since some pages contain fields labeled // "Email address". continue; } else { // No field found. break; } } // If we have identified any address fields in this field then it should be // added to the list of fields. if (address_field->company_ != NULL || address_field->address1_ != NULL || address_field->address2_ != NULL || address_field->city_ != NULL || address_field->state_ != NULL || address_field->zip_ != NULL || address_field->zip4_ || address_field->country_ != NULL) { *iter = q; return address_field.release(); } return NULL; } AddressType AddressField::FindType() const { // This is not a full address, so don't even bother trying to figure // out its type. if (address1_ == NULL) return kGenericAddress; // First look at the field name, which itself will sometimes contain // "bill" or "ship". We could check for the ECML type prefixes // here, but there's no need to since ECML's prefixes Ecom_BillTo // and Ecom_ShipTo contain "bill" and "ship" anyway. string16 name = StringToLowerASCII(address1_->name); return AddressTypeFromText(name); } bool AddressField::IsFullAddress() { return address1_ != NULL; } AddressField::AddressField() : company_(NULL), address1_(NULL), address2_(NULL), city_(NULL), state_(NULL), zip_(NULL), zip4_(NULL), country_(NULL), type_(kGenericAddress), is_ecml_(false) { } // static bool AddressField::ParseCompany( std::vector<AutofillField*>::const_iterator* iter, bool is_ecml, AddressField* address_field) { if (address_field->company_ && !address_field->company_->IsEmpty()) return false; string16 pattern; if (is_ecml) pattern = GetEcmlPattern(kEcmlShipToCompanyName, kEcmlBillToCompanyName, '|'); else pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE); if (!ParseText(iter, pattern, &address_field->company_)) return false; return true; } // static bool AddressField::ParseAddressLines( std::vector<AutofillField*>::const_iterator* iter, bool is_ecml, AddressField* address_field) { // We only match the string "address" in page text, not in element names, // because sometimes every element in a group of address fields will have // a name containing the string "address"; for example, on the page // Kohl's - Register Billing Address.html the text element labeled "city" // has the name "BILL_TO_ADDRESS<>city". We do match address labels // such as "address1", which appear as element names on various pages (eg // AmericanGirl-Registration.html, BloomingdalesBilling.html, // EBay Registration Enter Information.html). if (address_field->address1_) return false; string16 pattern; if (is_ecml) { pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|'); if (!ParseText(iter, pattern, &address_field->address1_)) return false; } else { pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE); string16 label_pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); if (!ParseText(iter, pattern, &address_field->address1_)) if (!ParseLabelText(iter, label_pattern, &address_field->address1_)) return false; } // Optionally parse more address lines, which may have empty labels. // Some pages have 3 address lines (eg SharperImageModifyAccount.html) // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! if (is_ecml) { pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|'); if (!ParseEmptyText(iter, &address_field->address2_)) ParseText(iter, pattern, &address_field->address2_); } else { pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE); string16 label_pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); if (!ParseEmptyText(iter, &address_field->address2_)) if (!ParseText(iter, pattern, &address_field->address2_)) ParseLabelText(iter, label_pattern, &address_field->address2_); } // Try for a third line, which we will promptly discard. if (address_field->address2_ != NULL) { if (is_ecml) { pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|'); ParseText(iter, pattern); } else { pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE); if (!ParseEmptyText(iter, NULL)) ParseText(iter, pattern, NULL); } } return true; } // static bool AddressField::ParseCountry( std::vector<AutofillField*>::const_iterator* iter, bool is_ecml, AddressField* address_field) { // Parse a country. The occasional page (e.g. // Travelocity_New Member Information1.html) calls this a "location". // Note: ECML standard uses 2 letter country code (ISO 3166) if (address_field->country_ && !address_field->country_->IsEmpty()) return false; string16 pattern; if (is_ecml) pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|'); else pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE); if (!ParseText(iter, pattern, &address_field->country_)) return false; return true; } // static bool AddressField::ParseZipCode( std::vector<AutofillField*>::const_iterator* iter, bool is_ecml, AddressField* address_field) { // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this // is called a "post code". // // HACK: Just for the MapQuest driving directions page we match the // exact name "1z", which MapQuest uses to label its zip code field. // Hopefully before long we'll be smart enough to find the zip code // on that page automatically. if (address_field->zip_) return false; // We may be out of fields. if (!**iter) return false; string16 pattern; if (is_ecml) { pattern = GetEcmlPattern(kEcmlShipToPostalCode, kEcmlBillToPostalCode, '|'); } else { pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_CODE_RE); } AddressType tempType; string16 name = (**iter)->name; // Note: comparisons using the ecml compliant name as a prefix must be used in // order to accommodate Google Checkout. See FormFieldSet::GetEcmlPattern for // more detail. string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode)); if (StartsWith(name, bill_to_postal_code_field, false)) { tempType = kBillingAddress; } else if (StartsWith(name, bill_to_postal_code_field, false)) { tempType = kShippingAddress; } else { tempType = kGenericAddress; } if (!ParseText(iter, pattern, &address_field->zip_)) return false; address_field->type_ = tempType; if (!is_ecml) { // Look for a zip+4, whose field name will also often contain // the substring "zip". ParseText(iter, l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE), &address_field->zip4_); } return true; } // static bool AddressField::ParseCity( std::vector<AutofillField*>::const_iterator* iter, bool is_ecml, AddressField* address_field) { // Parse a city name. Some UK pages (e.g. The China Shop2.html) use // the term "town". if (address_field->city_) return false; string16 pattern; if (is_ecml) pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|'); else pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE); if (!ParseText(iter, pattern, &address_field->city_)) return false; return true; } // static bool AddressField::ParseState( std::vector<AutofillField*>::const_iterator* iter, bool is_ecml, AddressField* address_field) { if (address_field->state_) return false; string16 pattern; if (is_ecml) pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|'); else pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE); if (!ParseText(iter, pattern, &address_field->state_)) return false; return true; } AddressType AddressField::AddressTypeFromText(const string16 &text) { if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE)) != string16::npos || text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE)) != string16::npos) // This text could be a checkbox label such as "same as my billing // address" or "use my shipping address". // ++ It would help if we generally skipped all text that appears // after a check box. return kGenericAddress; // Not all pages say "billing address" and "shipping address" explicitly; // for example, Craft Catalog1.html has "Bill-to Address" and // "Ship-to Address". size_t bill = text.rfind( l10n_util::GetStringUTF16(IDS_AUTOFILL_BILLING_DESIGNATOR_RE)); size_t ship = text.rfind( l10n_util::GetStringUTF16(IDS_AUTOFILL_SHIPPING_DESIGNATOR_RE)); if (bill == string16::npos && ship == string16::npos) return kGenericAddress; if (bill != string16::npos && ship == string16::npos) return kBillingAddress; if (bill == string16::npos && ship != string16::npos) return kShippingAddress; if (bill > ship) return kBillingAddress; return kShippingAddress; }