/*
* (C) 1999 Lars Knoll (knoll@kde.org)
* Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
* Copyright (C) 2007-2009 Torch Mobile, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include "config.h"
#include "PlatformString.h"
#include "CString.h"
#include "FloatConversion.h"
#include "StringBuffer.h"
#include "TextBreakIterator.h"
#include "TextEncoding.h"
#include <wtf/dtoa.h>
#include <limits>
#include <stdarg.h>
#include <wtf/ASCIICType.h>
#include <wtf/StringExtras.h>
#include <wtf/Vector.h>
#include <wtf/unicode/Unicode.h>
#include <wtf/unicode/UTF8.h>
#if USE(JSC)
#include <runtime/Identifier.h>
using JSC::Identifier;
using JSC::UString;
#endif
using namespace WTF;
using namespace WTF::Unicode;
namespace WebCore {
String::String(const UChar* str, unsigned len)
{
if (!str)
return;
m_impl = StringImpl::create(str, len);
}
String::String(const UChar* str)
{
if (!str)
return;
int len = 0;
while (str[len] != UChar(0))
len++;
m_impl = StringImpl::create(str, len);
}
String::String(const char* str)
{
if (!str)
return;
m_impl = StringImpl::create(str);
}
String::String(const char* str, unsigned length)
{
if (!str)
return;
m_impl = StringImpl::create(str, length);
}
void String::append(const String& str)
{
if (str.isEmpty())
return;
// FIXME: This is extremely inefficient. So much so that we might want to take this
// out of String's API. We can make it better by optimizing the case where exactly
// one String is pointing at this StringImpl, but even then it's going to require a
// call to fastMalloc every single time.
if (str.m_impl) {
if (m_impl) {
UChar* data;
RefPtr<StringImpl> newImpl =
StringImpl::createUninitialized(m_impl->length() + str.length(), data);
memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
memcpy(data + m_impl->length(), str.characters(), str.length() * sizeof(UChar));
m_impl = newImpl.release();
} else
m_impl = str.m_impl;
}
}
void String::append(char c)
{
// FIXME: This is extremely inefficient. So much so that we might want to take this
// out of String's API. We can make it better by optimizing the case where exactly
// one String is pointing at this StringImpl, but even then it's going to require a
// call to fastMalloc every single time.
if (m_impl) {
UChar* data;
RefPtr<StringImpl> newImpl =
StringImpl::createUninitialized(m_impl->length() + 1, data);
memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
data[m_impl->length()] = c;
m_impl = newImpl.release();
} else
m_impl = StringImpl::create(&c, 1);
}
void String::append(UChar c)
{
// FIXME: This is extremely inefficient. So much so that we might want to take this
// out of String's API. We can make it better by optimizing the case where exactly
// one String is pointing at this StringImpl, but even then it's going to require a
// call to fastMalloc every single time.
if (m_impl) {
UChar* data;
RefPtr<StringImpl> newImpl =
StringImpl::createUninitialized(m_impl->length() + 1, data);
memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
data[m_impl->length()] = c;
m_impl = newImpl.release();
} else
m_impl = StringImpl::create(&c, 1);
}
String operator+(const String& a, const String& b)
{
if (a.isEmpty())
return b;
if (b.isEmpty())
return a;
String c = a;
c += b;
return c;
}
String operator+(const String& s, const char* cs)
{
return s + String(cs);
}
String operator+(const char* cs, const String& s)
{
return String(cs) + s;
}
void String::insert(const String& str, unsigned pos)
{
if (str.isEmpty()) {
if (str.isNull())
return;
if (isNull())
m_impl = str.impl();
return;
}
insert(str.characters(), str.length(), pos);
}
void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
{
if (!m_impl) {
if (!charactersToAppend)
return;
m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
return;
}
if (!lengthToAppend)
return;
ASSERT(charactersToAppend);
UChar* data;
RefPtr<StringImpl> newImpl =
StringImpl::createUninitialized(length() + lengthToAppend, data);
memcpy(data, characters(), length() * sizeof(UChar));
memcpy(data + length(), charactersToAppend, lengthToAppend * sizeof(UChar));
m_impl = newImpl.release();
}
void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position)
{
if (position >= length()) {
append(charactersToInsert, lengthToInsert);
return;
}
ASSERT(m_impl);
if (!lengthToInsert)
return;
ASSERT(charactersToInsert);
UChar* data;
RefPtr<StringImpl> newImpl =
StringImpl::createUninitialized(length() + lengthToInsert, data);
memcpy(data, characters(), position * sizeof(UChar));
memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar));
memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar));
m_impl = newImpl.release();
}
UChar String::operator[](unsigned i) const
{
if (!m_impl || i >= m_impl->length())
return 0;
return m_impl->characters()[i];
}
UChar32 String::characterStartingAt(unsigned i) const
{
if (!m_impl || i >= m_impl->length())
return 0;
return m_impl->characterStartingAt(i);
}
unsigned String::length() const
{
if (!m_impl)
return 0;
return m_impl->length();
}
void String::truncate(unsigned position)
{
if (position >= length())
return;
UChar* data;
RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data);
memcpy(data, characters(), position * sizeof(UChar));
m_impl = newImpl.release();
}
void String::remove(unsigned position, int lengthToRemove)
{
if (lengthToRemove <= 0)
return;
if (position >= length())
return;
if (static_cast<unsigned>(lengthToRemove) > length() - position)
lengthToRemove = length() - position;
UChar* data;
RefPtr<StringImpl> newImpl =
StringImpl::createUninitialized(length() - lengthToRemove, data);
memcpy(data, characters(), position * sizeof(UChar));
memcpy(data + position, characters() + position + lengthToRemove,
(length() - lengthToRemove - position) * sizeof(UChar));
m_impl = newImpl.release();
}
String String::substring(unsigned pos, unsigned len) const
{
if (!m_impl)
return String();
return m_impl->substring(pos, len);
}
String String::lower() const
{
if (!m_impl)
return String();
return m_impl->lower();
}
String String::upper() const
{
if (!m_impl)
return String();
return m_impl->upper();
}
String String::stripWhiteSpace() const
{
if (!m_impl)
return String();
return m_impl->stripWhiteSpace();
}
String String::simplifyWhiteSpace() const
{
if (!m_impl)
return String();
return m_impl->simplifyWhiteSpace();
}
String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const
{
if (!m_impl)
return String();
return m_impl->removeCharacters(findMatch);
}
String String::foldCase() const
{
if (!m_impl)
return String();
return m_impl->foldCase();
}
bool String::percentage(int& result) const
{
if (!m_impl || !m_impl->length())
return false;
if ((*m_impl)[m_impl->length() - 1] != '%')
return false;
result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1);
return true;
}
const UChar* String::characters() const
{
if (!m_impl)
return 0;
return m_impl->characters();
}
const UChar* String::charactersWithNullTermination()
{
if (!m_impl)
return 0;
if (m_impl->hasTerminatingNullCharacter())
return m_impl->characters();
m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl);
return m_impl->characters();
}
String String::format(const char *format, ...)
{
#if PLATFORM(QT)
// Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf.
// https://bugs.webkit.org/show_bug.cgi?id=18994
va_list args;
va_start(args, format);
QString buffer;
buffer.vsprintf(format, args);
va_end(args);
return buffer;
#elif OS(WINCE)
va_list args;
va_start(args, format);
Vector<char, 256> buffer;
int bufferSize = 256;
buffer.resize(bufferSize);
for (;;) {
int written = vsnprintf(buffer.data(), bufferSize, format, args);
va_end(args);
if (written == 0)
return String("");
if (written > 0)
return StringImpl::create(buffer.data(), written);
bufferSize <<= 1;
buffer.resize(bufferSize);
va_start(args, format);
}
#else
va_list args;
va_start(args, format);
Vector<char, 256> buffer;
// Do the format once to get the length.
#if COMPILER(MSVC)
int result = _vscprintf(format, args);
#else
char ch;
int result = vsnprintf(&ch, 1, format, args);
// We need to call va_end() and then va_start() again here, as the
// contents of args is undefined after the call to vsnprintf
// according to http://man.cx/snprintf(3)
//
// Not calling va_end/va_start here happens to work on lots of
// systems, but fails e.g. on 64bit Linux.
va_end(args);
va_start(args, format);
#endif
if (result == 0)
return String("");
if (result < 0)
return String();
unsigned len = result;
buffer.grow(len + 1);
// Now do the formatting again, guaranteed to fit.
vsnprintf(buffer.data(), buffer.size(), format, args);
va_end(args);
return StringImpl::create(buffer.data(), len);
#endif
}
String String::number(short n)
{
return String::format("%hd", n);
}
String String::number(unsigned short n)
{
return String::format("%hu", n);
}
String String::number(int n)
{
return String::format("%d", n);
}
String String::number(unsigned n)
{
return String::format("%u", n);
}
String String::number(long n)
{
return String::format("%ld", n);
}
String String::number(unsigned long n)
{
return String::format("%lu", n);
}
String String::number(long long n)
{
#if OS(WINDOWS) && !PLATFORM(QT)
return String::format("%I64i", n);
#else
return String::format("%lli", n);
#endif
}
String String::number(unsigned long long n)
{
#if OS(WINDOWS) && !PLATFORM(QT)
return String::format("%I64u", n);
#else
return String::format("%llu", n);
#endif
}
String String::number(double n)
{
return String::format("%.6lg", n);
}
int String::toIntStrict(bool* ok, int base) const
{
if (!m_impl) {
if (ok)
*ok = false;
return 0;
}
return m_impl->toIntStrict(ok, base);
}
unsigned String::toUIntStrict(bool* ok, int base) const
{
if (!m_impl) {
if (ok)
*ok = false;
return 0;
}
return m_impl->toUIntStrict(ok, base);
}
int64_t String::toInt64Strict(bool* ok, int base) const
{
if (!m_impl) {
if (ok)
*ok = false;
return 0;
}
return m_impl->toInt64Strict(ok, base);
}
uint64_t String::toUInt64Strict(bool* ok, int base) const
{
if (!m_impl) {
if (ok)
*ok = false;
return 0;
}
return m_impl->toUInt64Strict(ok, base);
}
intptr_t String::toIntPtrStrict(bool* ok, int base) const
{
if (!m_impl) {
if (ok)
*ok = false;
return 0;
}
return m_impl->toIntPtrStrict(ok, base);
}
int String::toInt(bool* ok) const
{
if (!m_impl) {
if (ok)
*ok = false;
return 0;
}
return m_impl->toInt(ok);
}
unsigned String::toUInt(bool* ok) const
{
if (!m_impl) {
if (ok)
*ok = false;
return 0;
}
return m_impl->toUInt(ok);
}
int64_t String::toInt64(bool* ok) const
{
if (!m_impl) {
if (ok)
*ok = false;
return 0;
}
return m_impl->toInt64(ok);
}
uint64_t String::toUInt64(bool* ok) const
{
if (!m_impl) {
if (ok)
*ok = false;
return 0;
}
return m_impl->toUInt64(ok);
}
intptr_t String::toIntPtr(bool* ok) const
{
if (!m_impl) {
if (ok)
*ok = false;
return 0;
}
return m_impl->toIntPtr(ok);
}
double String::toDouble(bool* ok) const
{
if (!m_impl) {
if (ok)
*ok = false;
return 0.0;
}
return m_impl->toDouble(ok);
}
float String::toFloat(bool* ok) const
{
if (!m_impl) {
if (ok)
*ok = false;
return 0.0f;
}
return m_impl->toFloat(ok);
}
String String::threadsafeCopy() const
{
if (!m_impl)
return String();
return m_impl->threadsafeCopy();
}
String String::crossThreadString() const
{
if (!m_impl)
return String();
return m_impl->crossThreadString();
}
bool String::isEmpty() const
{
return !m_impl || !m_impl->length();
}
void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const
{
result.clear();
int startPos = 0;
int endPos;
while ((endPos = find(separator, startPos)) != -1) {
if (allowEmptyEntries || startPos != endPos)
result.append(substring(startPos, endPos - startPos));
startPos = endPos + separator.length();
}
if (allowEmptyEntries || startPos != static_cast<int>(length()))
result.append(substring(startPos));
}
void String::split(const String& separator, Vector<String>& result) const
{
return split(separator, false, result);
}
void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const
{
result.clear();
int startPos = 0;
int endPos;
while ((endPos = find(separator, startPos)) != -1) {
if (allowEmptyEntries || startPos != endPos)
result.append(substring(startPos, endPos - startPos));
startPos = endPos + 1;
}
if (allowEmptyEntries || startPos != static_cast<int>(length()))
result.append(substring(startPos));
}
void String::split(UChar separator, Vector<String>& result) const
{
return split(String(&separator, 1), false, result);
}
#ifndef NDEBUG
Vector<char> String::ascii() const
{
if (m_impl)
return m_impl->ascii();
const char* nullMsg = "(null impl)";
Vector<char, 2048> buffer;
for (int i = 0; nullMsg[i]; ++i)
buffer.append(nullMsg[i]);
buffer.append('\0');
return buffer;
}
#endif
CString String::latin1() const
{
return Latin1Encoding().encode(characters(), length(), QuestionMarksForUnencodables);
}
CString String::utf8() const
{
return UTF8Encoding().encode(characters(), length(), QuestionMarksForUnencodables);
}
String String::fromUTF8(const char* string, size_t size)
{
if (!string)
return String();
return UTF8Encoding().decode(string, size);
}
String String::fromUTF8(const char* string)
{
if (!string)
return String();
return UTF8Encoding().decode(string, strlen(string));
}
String String::fromUTF8WithLatin1Fallback(const char* string, size_t size)
{
String result = fromUTF8(string, size);
if (!result)
result = String(string, size);
return result;
}
#if USE(JSC)
String::String(const Identifier& str)
{
if (str.isNull())
return;
m_impl = StringImpl::create(str.ustring());
}
String::String(const UString& str)
{
if (str.isNull())
return;
m_impl = StringImpl::create(str);
}
String::operator UString() const
{
if (!m_impl)
return UString();
return m_impl->ustring();
}
#endif
// String Operations
static bool isCharacterAllowedInBase(UChar c, int base)
{
if (c > 0x7F)
return false;
if (isASCIIDigit(c))
return c - '0' < base;
if (isASCIIAlpha(c)) {
if (base > 36)
base = 36;
return (c >= 'a' && c < 'a' + base - 10)
|| (c >= 'A' && c < 'A' + base - 10);
}
return false;
}
template <typename IntegralType>
static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base)
{
static const IntegralType integralMax = std::numeric_limits<IntegralType>::max();
static const bool isSigned = std::numeric_limits<IntegralType>::is_signed;
const IntegralType maxMultiplier = integralMax / base;
IntegralType value = 0;
bool isOk = false;
bool isNegative = false;
if (!data)
goto bye;
// skip leading whitespace
while (length && isSpaceOrNewline(*data)) {
length--;
data++;
}
if (isSigned && length && *data == '-') {
length--;
data++;
isNegative = true;
} else if (length && *data == '+') {
length--;
data++;
}
if (!length || !isCharacterAllowedInBase(*data, base))
goto bye;
while (length && isCharacterAllowedInBase(*data, base)) {
length--;
IntegralType digitValue;
UChar c = *data;
if (isASCIIDigit(c))
digitValue = c - '0';
else if (c >= 'a')
digitValue = c - 'a' + 10;
else
digitValue = c - 'A' + 10;
if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative))
goto bye;
value = base * value + digitValue;
data++;
}
#if COMPILER(MSVC)
#pragma warning(push, 0)
#pragma warning(disable:4146)
#endif
if (isNegative)
value = -value;
#if COMPILER(MSVC)
#pragma warning(pop)
#endif
// skip trailing space
while (length && isSpaceOrNewline(*data)) {
length--;
data++;
}
if (!length)
isOk = true;
bye:
if (ok)
*ok = isOk;
return isOk ? value : 0;
}
static unsigned lengthOfCharactersAsInteger(const UChar* data, size_t length)
{
size_t i = 0;
// Allow leading spaces.
for (; i != length; ++i) {
if (!isSpaceOrNewline(data[i]))
break;
}
// Allow sign.
if (i != length && (data[i] == '+' || data[i] == '-'))
++i;
// Allow digits.
for (; i != length; ++i) {
if (!isASCIIDigit(data[i]))
break;
}
return i;
}
int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)
{
return toIntegralType<int>(data, length, ok, base);
}
unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)
{
return toIntegralType<unsigned>(data, length, ok, base);
}
int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)
{
return toIntegralType<int64_t>(data, length, ok, base);
}
uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base)
{
return toIntegralType<uint64_t>(data, length, ok, base);
}
intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base)
{
return toIntegralType<intptr_t>(data, length, ok, base);
}
int charactersToInt(const UChar* data, size_t length, bool* ok)
{
return toIntegralType<int>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
}
unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)
{
return toIntegralType<unsigned>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
}
int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)
{
return toIntegralType<int64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
}
uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)
{
return toIntegralType<uint64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
}
intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok)
{
return toIntegralType<intptr_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
}
double charactersToDouble(const UChar* data, size_t length, bool* ok)
{
if (!length) {
if (ok)
*ok = false;
return 0.0;
}
Vector<char, 256> bytes(length + 1);
for (unsigned i = 0; i < length; ++i)
bytes[i] = data[i] < 0x7F ? data[i] : '?';
bytes[length] = '\0';
char* end;
double val = WTF::strtod(bytes.data(), &end);
if (ok)
*ok = (end == 0 || *end == '\0');
return val;
}
float charactersToFloat(const UChar* data, size_t length, bool* ok)
{
// FIXME: This will return ok even when the string fits into a double but not a float.
return narrowPrecisionToFloat(charactersToDouble(data, length, ok));
}
PassRefPtr<SharedBuffer> utf8Buffer(const String& string)
{
// Allocate a buffer big enough to hold all the characters.
const int length = string.length();
Vector<char> buffer(length * 3);
// Convert to runs of 8-bit characters.
char* p = buffer.data();
const UChar* d = string.characters();
ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), true);
if (result != conversionOK)
return 0;
buffer.shrink(p - buffer.data());
return SharedBuffer::adoptVector(buffer);
}
unsigned String::numGraphemeClusters() const
{
TextBreakIterator* it = characterBreakIterator(characters(), length());
if (!it)
return length();
unsigned num = 0;
while (textBreakNext(it) != TextBreakDone)
++num;
return num;
}
unsigned String::numCharactersInGraphemeClusters(unsigned numGraphemeClusters) const
{
TextBreakIterator* it = characterBreakIterator(characters(), length());
if (!it)
return min(length(), numGraphemeClusters);
for (unsigned i = 0; i < numGraphemeClusters; ++i) {
if (textBreakNext(it) == TextBreakDone)
return length();
}
return textBreakCurrent(it);
}
} // namespace WebCore
#ifndef NDEBUG
// For use in the debugger - leaks memory
WebCore::String* string(const char*);
WebCore::String* string(const char* s)
{
return new WebCore::String(s);
}
#endif