/* ******************************************************************************* * Copyright (C) 1997-2014, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ #include "uassert.h" #include "decimalformatpattern.h" #if !UCONFIG_NO_FORMATTING #include "unicode/dcfmtsym.h" #include "unicode/format.h" #include "unicode/utf16.h" #ifdef FMT_DEBUG #define debug(x) printf("%s:%d: %s\n", __FILE__,__LINE__, x); #else #define debug(x) #endif #define kPatternZeroDigit ((UChar)0x0030) /*'0'*/ #define kPatternSignificantDigit ((UChar)0x0040) /*'@'*/ #define kPatternGroupingSeparator ((UChar)0x002C) /*','*/ #define kPatternDecimalSeparator ((UChar)0x002E) /*'.'*/ #define kPatternPerMill ((UChar)0x2030) #define kPatternPercent ((UChar)0x0025) /*'%'*/ #define kPatternDigit ((UChar)0x0023) /*'#'*/ #define kPatternSeparator ((UChar)0x003B) /*';'*/ #define kPatternExponent ((UChar)0x0045) /*'E'*/ #define kPatternPlus ((UChar)0x002B) /*'+'*/ #define kPatternMinus ((UChar)0x002D) /*'-'*/ #define kPatternPadEscape ((UChar)0x002A) /*'*'*/ #define kQuote ((UChar)0x0027) /*'\''*/ #define kCurrencySign ((UChar)0x00A4) #define kDefaultPad ((UChar)0x0020) /* */ U_NAMESPACE_BEGIN // TODO: Travis Keep: Copied from numfmt.cpp static int32_t kDoubleIntegerDigits = 309; static int32_t kDoubleFractionDigits = 340; // TODO: Travis Keep: Copied from numfmt.cpp static int32_t gDefaultMaxIntegerDigits = 2000000000; // TODO: Travis Keep: This function was copied from format.cpp static void syntaxError(const UnicodeString& pattern, int32_t pos, UParseError& parseError) { parseError.offset = pos; parseError.line=0; // we are not using line number // for pre-context int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1 /* subtract 1 so that we have room for null*/)); int32_t stop = pos; pattern.extract(start,stop-start,parseError.preContext,0); //null terminate the buffer parseError.preContext[stop-start] = 0; //for post-context start = pos+1; stop = ((pos+U_PARSE_CONTEXT_LEN)<=pattern.length()) ? (pos+(U_PARSE_CONTEXT_LEN-1)) : pattern.length(); pattern.extract(start,stop-start,parseError.postContext,0); //null terminate the buffer parseError.postContext[stop-start]= 0; } DecimalFormatPattern::DecimalFormatPattern() : fMinimumIntegerDigits(1), fMaximumIntegerDigits(gDefaultMaxIntegerDigits), fMinimumFractionDigits(0), fMaximumFractionDigits(3), fUseSignificantDigits(FALSE), fMinimumSignificantDigits(1), fMaximumSignificantDigits(6), fUseExponentialNotation(FALSE), fMinExponentDigits(0), fExponentSignAlwaysShown(FALSE), fCurrencySignCount(fgCurrencySignCountZero), fGroupingUsed(TRUE), fGroupingSize(0), fGroupingSize2(0), fMultiplier(1), fDecimalSeparatorAlwaysShown(FALSE), fFormatWidth(0), fRoundingIncrementUsed(FALSE), fRoundingIncrement(), fPad(kPatternPadEscape), fNegPatternsBogus(TRUE), fPosPatternsBogus(TRUE), fNegPrefixPattern(), fNegSuffixPattern(), fPosPrefixPattern(), fPosSuffixPattern(), fPadPosition(DecimalFormatPattern::kPadBeforePrefix) { } DecimalFormatPatternParser::DecimalFormatPatternParser() : fZeroDigit(kPatternZeroDigit), fSigDigit(kPatternSignificantDigit), fGroupingSeparator((UChar)kPatternGroupingSeparator), fDecimalSeparator((UChar)kPatternDecimalSeparator), fPercent((UChar)kPatternPercent), fPerMill((UChar)kPatternPerMill), fDigit((UChar)kPatternDigit), fSeparator((UChar)kPatternSeparator), fExponent((UChar)kPatternExponent), fPlus((UChar)kPatternPlus), fMinus((UChar)kPatternMinus), fPadEscape((UChar)kPatternPadEscape) { } void DecimalFormatPatternParser::useSymbols( const DecimalFormatSymbols& symbols) { fZeroDigit = symbols.getConstSymbol( DecimalFormatSymbols::kZeroDigitSymbol).char32At(0); fSigDigit = symbols.getConstSymbol( DecimalFormatSymbols::kSignificantDigitSymbol).char32At(0); fGroupingSeparator = symbols.getConstSymbol( DecimalFormatSymbols::kGroupingSeparatorSymbol); fDecimalSeparator = symbols.getConstSymbol( DecimalFormatSymbols::kDecimalSeparatorSymbol); fPercent = symbols.getConstSymbol( DecimalFormatSymbols::kPercentSymbol); fPerMill = symbols.getConstSymbol( DecimalFormatSymbols::kPerMillSymbol); fDigit = symbols.getConstSymbol( DecimalFormatSymbols::kDigitSymbol); fSeparator = symbols.getConstSymbol( DecimalFormatSymbols::kPatternSeparatorSymbol); fExponent = symbols.getConstSymbol( DecimalFormatSymbols::kExponentialSymbol); fPlus = symbols.getConstSymbol( DecimalFormatSymbols::kPlusSignSymbol); fMinus = symbols.getConstSymbol( DecimalFormatSymbols::kMinusSignSymbol); fPadEscape = symbols.getConstSymbol( DecimalFormatSymbols::kPadEscapeSymbol); } void DecimalFormatPatternParser::applyPatternWithoutExpandAffix( const UnicodeString& pattern, DecimalFormatPattern& out, UParseError& parseError, UErrorCode& status) { if (U_FAILURE(status)) { return; } out = DecimalFormatPattern(); // Clear error struct parseError.offset = -1; parseError.preContext[0] = parseError.postContext[0] = (UChar)0; // TODO: Travis Keep: This won't always work. UChar nineDigit = (UChar)(fZeroDigit + 9); int32_t digitLen = fDigit.length(); int32_t groupSepLen = fGroupingSeparator.length(); int32_t decimalSepLen = fDecimalSeparator.length(); int32_t pos = 0; int32_t patLen = pattern.length(); // Part 0 is the positive pattern. Part 1, if present, is the negative // pattern. for (int32_t part=0; part<2 && pos<patLen; ++part) { // The subpart ranges from 0 to 4: 0=pattern proper, 1=prefix, // 2=suffix, 3=prefix in quote, 4=suffix in quote. Subpart 0 is // between the prefix and suffix, and consists of pattern // characters. In the prefix and suffix, percent, perMill, and // currency symbols are recognized and translated. int32_t subpart = 1, sub0Start = 0, sub0Limit = 0, sub2Limit = 0; // It's important that we don't change any fields of this object // prematurely. We set the following variables for the multiplier, // grouping, etc., and then only change the actual object fields if // everything parses correctly. This also lets us register // the data from part 0 and ignore the part 1, except for the // prefix and suffix. UnicodeString prefix; UnicodeString suffix; int32_t decimalPos = -1; int32_t multiplier = 1; int32_t digitLeftCount = 0, zeroDigitCount = 0, digitRightCount = 0, sigDigitCount = 0; int8_t groupingCount = -1; int8_t groupingCount2 = -1; int32_t padPos = -1; UChar32 padChar = 0; int32_t roundingPos = -1; DigitList roundingInc; int8_t expDigits = -1; UBool expSignAlways = FALSE; // The affix is either the prefix or the suffix. UnicodeString* affix = &prefix; int32_t start = pos; UBool isPartDone = FALSE; UChar32 ch; for (; !isPartDone && pos < patLen; ) { // Todo: account for surrogate pairs ch = pattern.char32At(pos); switch (subpart) { case 0: // Pattern proper subpart (between prefix & suffix) // Process the digits, decimal, and grouping characters. We // record five pieces of information. We expect the digits // to occur in the pattern ####00.00####, and we record the // number of left digits, zero (central) digits, and right // digits. The position of the last grouping character is // recorded (should be somewhere within the first two blocks // of characters), as is the position of the decimal point, // if any (should be in the zero digits). If there is no // decimal point, then there should be no right digits. if (pattern.compare(pos, digitLen, fDigit) == 0) { if (zeroDigitCount > 0 || sigDigitCount > 0) { ++digitRightCount; } else { ++digitLeftCount; } if (groupingCount >= 0 && decimalPos < 0) { ++groupingCount; } pos += digitLen; } else if ((ch >= fZeroDigit && ch <= nineDigit) || ch == fSigDigit) { if (digitRightCount > 0) { // Unexpected '0' debug("Unexpected '0'") status = U_UNEXPECTED_TOKEN; syntaxError(pattern,pos,parseError); return; } if (ch == fSigDigit) { ++sigDigitCount; } else { if (ch != fZeroDigit && roundingPos < 0) { roundingPos = digitLeftCount + zeroDigitCount; } if (roundingPos >= 0) { roundingInc.append((char)(ch - fZeroDigit + '0')); } ++zeroDigitCount; } if (groupingCount >= 0 && decimalPos < 0) { ++groupingCount; } pos += U16_LENGTH(ch); } else if (pattern.compare(pos, groupSepLen, fGroupingSeparator) == 0) { if (decimalPos >= 0) { // Grouping separator after decimal debug("Grouping separator after decimal") status = U_UNEXPECTED_TOKEN; syntaxError(pattern,pos,parseError); return; } groupingCount2 = groupingCount; groupingCount = 0; pos += groupSepLen; } else if (pattern.compare(pos, decimalSepLen, fDecimalSeparator) == 0) { if (decimalPos >= 0) { // Multiple decimal separators debug("Multiple decimal separators") status = U_MULTIPLE_DECIMAL_SEPARATORS; syntaxError(pattern,pos,parseError); return; } // Intentionally incorporate the digitRightCount, // even though it is illegal for this to be > 0 // at this point. We check pattern syntax below. decimalPos = digitLeftCount + zeroDigitCount + digitRightCount; pos += decimalSepLen; } else { if (pattern.compare(pos, fExponent.length(), fExponent) == 0) { if (expDigits >= 0) { // Multiple exponential symbols debug("Multiple exponential symbols") status = U_MULTIPLE_EXPONENTIAL_SYMBOLS; syntaxError(pattern,pos,parseError); return; } if (groupingCount >= 0) { // Grouping separator in exponential pattern debug("Grouping separator in exponential pattern") status = U_MALFORMED_EXPONENTIAL_PATTERN; syntaxError(pattern,pos,parseError); return; } pos += fExponent.length(); // Check for positive prefix if (pos < patLen && pattern.compare(pos, fPlus.length(), fPlus) == 0) { expSignAlways = TRUE; pos += fPlus.length(); } // Use lookahead to parse out the exponential part of the // pattern, then jump into suffix subpart. expDigits = 0; while (pos < patLen && pattern.char32At(pos) == fZeroDigit) { ++expDigits; pos += U16_LENGTH(fZeroDigit); } // 1. Require at least one mantissa pattern digit // 2. Disallow "#+ @" in mantissa // 3. Require at least one exponent pattern digit if (((digitLeftCount + zeroDigitCount) < 1 && (sigDigitCount + digitRightCount) < 1) || (sigDigitCount > 0 && digitLeftCount > 0) || expDigits < 1) { // Malformed exponential pattern debug("Malformed exponential pattern") status = U_MALFORMED_EXPONENTIAL_PATTERN; syntaxError(pattern,pos,parseError); return; } } // Transition to suffix subpart subpart = 2; // suffix subpart affix = &suffix; sub0Limit = pos; continue; } break; case 1: // Prefix subpart case 2: // Suffix subpart // Process the prefix / suffix characters // Process unquoted characters seen in prefix or suffix // subpart. // Several syntax characters implicitly begins the // next subpart if we are in the prefix; otherwise // they are illegal if unquoted. if (!pattern.compare(pos, digitLen, fDigit) || !pattern.compare(pos, groupSepLen, fGroupingSeparator) || !pattern.compare(pos, decimalSepLen, fDecimalSeparator) || (ch >= fZeroDigit && ch <= nineDigit) || ch == fSigDigit) { if (subpart == 1) { // prefix subpart subpart = 0; // pattern proper subpart sub0Start = pos; // Reprocess this character continue; } else { status = U_UNQUOTED_SPECIAL; syntaxError(pattern,pos,parseError); return; } } else if (ch == kCurrencySign) { affix->append(kQuote); // Encode currency // Use lookahead to determine if the currency sign is // doubled or not. U_ASSERT(U16_LENGTH(kCurrencySign) == 1); if ((pos+1) < pattern.length() && pattern[pos+1] == kCurrencySign) { affix->append(kCurrencySign); ++pos; // Skip over the doubled character if ((pos+1) < pattern.length() && pattern[pos+1] == kCurrencySign) { affix->append(kCurrencySign); ++pos; // Skip over the doubled character out.fCurrencySignCount = fgCurrencySignCountInPluralFormat; } else { out.fCurrencySignCount = fgCurrencySignCountInISOFormat; } } else { out.fCurrencySignCount = fgCurrencySignCountInSymbolFormat; } // Fall through to append(ch) } else if (ch == kQuote) { // A quote outside quotes indicates either the opening // quote or two quotes, which is a quote literal. That is, // we have the first quote in 'do' or o''clock. U_ASSERT(U16_LENGTH(kQuote) == 1); ++pos; if (pos < pattern.length() && pattern[pos] == kQuote) { affix->append(kQuote); // Encode quote // Fall through to append(ch) } else { subpart += 2; // open quote continue; } } else if (pattern.compare(pos, fSeparator.length(), fSeparator) == 0) { // Don't allow separators in the prefix, and don't allow // separators in the second pattern (part == 1). if (subpart == 1 || part == 1) { // Unexpected separator debug("Unexpected separator") status = U_UNEXPECTED_TOKEN; syntaxError(pattern,pos,parseError); return; } sub2Limit = pos; isPartDone = TRUE; // Go to next part pos += fSeparator.length(); break; } else if (pattern.compare(pos, fPercent.length(), fPercent) == 0) { // Next handle characters which are appended directly. if (multiplier != 1) { // Too many percent/perMill characters debug("Too many percent characters") status = U_MULTIPLE_PERCENT_SYMBOLS; syntaxError(pattern,pos,parseError); return; } affix->append(kQuote); // Encode percent/perMill affix->append(kPatternPercent); // Use unlocalized pattern char multiplier = 100; pos += fPercent.length(); break; } else if (pattern.compare(pos, fPerMill.length(), fPerMill) == 0) { // Next handle characters which are appended directly. if (multiplier != 1) { // Too many percent/perMill characters debug("Too many perMill characters") status = U_MULTIPLE_PERMILL_SYMBOLS; syntaxError(pattern,pos,parseError); return; } affix->append(kQuote); // Encode percent/perMill affix->append(kPatternPerMill); // Use unlocalized pattern char multiplier = 1000; pos += fPerMill.length(); break; } else if (pattern.compare(pos, fPadEscape.length(), fPadEscape) == 0) { if (padPos >= 0 || // Multiple pad specifiers (pos+1) == pattern.length()) { // Nothing after padEscape debug("Multiple pad specifiers") status = U_MULTIPLE_PAD_SPECIFIERS; syntaxError(pattern,pos,parseError); return; } padPos = pos; pos += fPadEscape.length(); padChar = pattern.char32At(pos); pos += U16_LENGTH(padChar); break; } else if (pattern.compare(pos, fMinus.length(), fMinus) == 0) { affix->append(kQuote); // Encode minus affix->append(kPatternMinus); pos += fMinus.length(); break; } else if (pattern.compare(pos, fPlus.length(), fPlus) == 0) { affix->append(kQuote); // Encode plus affix->append(kPatternPlus); pos += fPlus.length(); break; } // Unquoted, non-special characters fall through to here, as // well as other code which needs to append something to the // affix. affix->append(ch); pos += U16_LENGTH(ch); break; case 3: // Prefix subpart, in quote case 4: // Suffix subpart, in quote // A quote within quotes indicates either the closing // quote or two quotes, which is a quote literal. That is, // we have the second quote in 'do' or 'don''t'. if (ch == kQuote) { ++pos; if (pos < pattern.length() && pattern[pos] == kQuote) { affix->append(kQuote); // Encode quote // Fall through to append(ch) } else { subpart -= 2; // close quote continue; } } affix->append(ch); pos += U16_LENGTH(ch); break; } } if (sub0Limit == 0) { sub0Limit = pattern.length(); } if (sub2Limit == 0) { sub2Limit = pattern.length(); } /* Handle patterns with no '0' pattern character. These patterns * are legal, but must be recodified to make sense. "##.###" -> * "#0.###". ".###" -> ".0##". * * We allow patterns of the form "####" to produce a zeroDigitCount * of zero (got that?); although this seems like it might make it * possible for format() to produce empty strings, format() checks * for this condition and outputs a zero digit in this situation. * Having a zeroDigitCount of zero yields a minimum integer digits * of zero, which allows proper round-trip patterns. We don't want * "#" to become "#0" when toPattern() is called (even though that's * what it really is, semantically). */ if (zeroDigitCount == 0 && sigDigitCount == 0 && digitLeftCount > 0 && decimalPos >= 0) { // Handle "###.###" and "###." and ".###" int n = decimalPos; if (n == 0) ++n; // Handle ".###" digitRightCount = digitLeftCount - n; digitLeftCount = n - 1; zeroDigitCount = 1; } // Do syntax checking on the digits, decimal points, and quotes. if ((decimalPos < 0 && digitRightCount > 0 && sigDigitCount == 0) || (decimalPos >= 0 && (sigDigitCount > 0 || decimalPos < digitLeftCount || decimalPos > (digitLeftCount + zeroDigitCount))) || groupingCount == 0 || groupingCount2 == 0 || (sigDigitCount > 0 && zeroDigitCount > 0) || subpart > 2) { // subpart > 2 == unmatched quote debug("Syntax error") status = U_PATTERN_SYNTAX_ERROR; syntaxError(pattern,pos,parseError); return; } // Make sure pad is at legal position before or after affix. if (padPos >= 0) { if (padPos == start) { padPos = DecimalFormatPattern::kPadBeforePrefix; } else if (padPos+2 == sub0Start) { padPos = DecimalFormatPattern::kPadAfterPrefix; } else if (padPos == sub0Limit) { padPos = DecimalFormatPattern::kPadBeforeSuffix; } else if (padPos+2 == sub2Limit) { padPos = DecimalFormatPattern::kPadAfterSuffix; } else { // Illegal pad position debug("Illegal pad position") status = U_ILLEGAL_PAD_POSITION; syntaxError(pattern,pos,parseError); return; } } if (part == 0) { out.fPosPatternsBogus = FALSE; out.fPosPrefixPattern = prefix; out.fPosSuffixPattern = suffix; out.fNegPatternsBogus = TRUE; out.fNegPrefixPattern.remove(); out.fNegSuffixPattern.remove(); out.fUseExponentialNotation = (expDigits >= 0); if (out.fUseExponentialNotation) { out.fMinExponentDigits = expDigits; } out.fExponentSignAlwaysShown = expSignAlways; int32_t digitTotalCount = digitLeftCount + zeroDigitCount + digitRightCount; // The effectiveDecimalPos is the position the decimal is at or // would be at if there is no decimal. Note that if // decimalPos<0, then digitTotalCount == digitLeftCount + // zeroDigitCount. int32_t effectiveDecimalPos = decimalPos >= 0 ? decimalPos : digitTotalCount; UBool isSigDig = (sigDigitCount > 0); out.fUseSignificantDigits = isSigDig; if (isSigDig) { out.fMinimumSignificantDigits = sigDigitCount; out.fMaximumSignificantDigits = sigDigitCount + digitRightCount; } else { int32_t minInt = effectiveDecimalPos - digitLeftCount; out.fMinimumIntegerDigits = minInt; out.fMaximumIntegerDigits = out.fUseExponentialNotation ? digitLeftCount + out.fMinimumIntegerDigits : gDefaultMaxIntegerDigits; out.fMaximumFractionDigits = decimalPos >= 0 ? (digitTotalCount - decimalPos) : 0; out.fMinimumFractionDigits = decimalPos >= 0 ? (digitLeftCount + zeroDigitCount - decimalPos) : 0; } out.fGroupingUsed = groupingCount > 0; out.fGroupingSize = (groupingCount > 0) ? groupingCount : 0; out.fGroupingSize2 = (groupingCount2 > 0 && groupingCount2 != groupingCount) ? groupingCount2 : 0; out.fMultiplier = multiplier; out.fDecimalSeparatorAlwaysShown = decimalPos == 0 || decimalPos == digitTotalCount; if (padPos >= 0) { out.fPadPosition = (DecimalFormatPattern::EPadPosition) padPos; // To compute the format width, first set up sub0Limit - // sub0Start. Add in prefix/suffix length later. // fFormatWidth = prefix.length() + suffix.length() + // sub0Limit - sub0Start; out.fFormatWidth = sub0Limit - sub0Start; out.fPad = padChar; } else { out.fFormatWidth = 0; } if (roundingPos >= 0) { out.fRoundingIncrementUsed = TRUE; roundingInc.setDecimalAt(effectiveDecimalPos - roundingPos); out.fRoundingIncrement = roundingInc; } else { out.fRoundingIncrementUsed = FALSE; } } else { out.fNegPatternsBogus = FALSE; out.fNegPrefixPattern = prefix; out.fNegSuffixPattern = suffix; } } if (pattern.length() == 0) { out.fNegPatternsBogus = TRUE; out.fNegPrefixPattern.remove(); out.fNegSuffixPattern.remove(); out.fPosPatternsBogus = FALSE; out.fPosPrefixPattern.remove(); out.fPosSuffixPattern.remove(); out.fMinimumIntegerDigits = 0; out.fMaximumIntegerDigits = kDoubleIntegerDigits; out.fMinimumFractionDigits = 0; out.fMaximumFractionDigits = kDoubleFractionDigits; out.fUseExponentialNotation = FALSE; out.fCurrencySignCount = fgCurrencySignCountZero; out.fGroupingUsed = FALSE; out.fGroupingSize = 0; out.fGroupingSize2 = 0; out.fMultiplier = 1; out.fDecimalSeparatorAlwaysShown = FALSE; out.fFormatWidth = 0; out.fRoundingIncrementUsed = FALSE; } // If there was no negative pattern, or if the negative pattern is // identical to the positive pattern, then prepend the minus sign to the // positive pattern to form the negative pattern. if (out.fNegPatternsBogus || (out.fNegPrefixPattern == out.fPosPrefixPattern && out.fNegSuffixPattern == out.fPosSuffixPattern)) { out.fNegPatternsBogus = FALSE; out.fNegSuffixPattern = out.fPosSuffixPattern; out.fNegPrefixPattern.append(kQuote).append(kPatternMinus) .append(out.fPosPrefixPattern); } } U_NAMESPACE_END #endif /* !UCONFIG_NO_FORMATTING */