// Copyright 2014 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_STRING_BUILDER_H_ #define V8_STRING_BUILDER_H_ #include "src/assert-scope.h" #include "src/factory.h" #include "src/handles.h" #include "src/isolate.h" #include "src/objects.h" #include "src/utils.h" namespace v8 { namespace internal { const int kStringBuilderConcatHelperLengthBits = 11; const int kStringBuilderConcatHelperPositionBits = 19; typedef BitField<int, 0, kStringBuilderConcatHelperLengthBits> StringBuilderSubstringLength; typedef BitField<int, kStringBuilderConcatHelperLengthBits, kStringBuilderConcatHelperPositionBits> StringBuilderSubstringPosition; template <typename sinkchar> static inline void StringBuilderConcatHelper(String* special, sinkchar* sink, FixedArray* fixed_array, int array_length) { DisallowHeapAllocation no_gc; int position = 0; for (int i = 0; i < array_length; i++) { Object* element = fixed_array->get(i); if (element->IsSmi()) { // Smi encoding of position and length. int encoded_slice = Smi::cast(element)->value(); int pos; int len; if (encoded_slice > 0) { // Position and length encoded in one smi. pos = StringBuilderSubstringPosition::decode(encoded_slice); len = StringBuilderSubstringLength::decode(encoded_slice); } else { // Position and length encoded in two smis. Object* obj = fixed_array->get(++i); DCHECK(obj->IsSmi()); pos = Smi::cast(obj)->value(); len = -encoded_slice; } String::WriteToFlat(special, sink + position, pos, pos + len); position += len; } else { String* string = String::cast(element); int element_length = string->length(); String::WriteToFlat(string, sink + position, 0, element_length); position += element_length; } } } // Returns the result length of the concatenation. // On illegal argument, -1 is returned. static inline int StringBuilderConcatLength(int special_length, FixedArray* fixed_array, int array_length, bool* one_byte) { DisallowHeapAllocation no_gc; int position = 0; for (int i = 0; i < array_length; i++) { int increment = 0; Object* elt = fixed_array->get(i); if (elt->IsSmi()) { // Smi encoding of position and length. int smi_value = Smi::cast(elt)->value(); int pos; int len; if (smi_value > 0) { // Position and length encoded in one smi. pos = StringBuilderSubstringPosition::decode(smi_value); len = StringBuilderSubstringLength::decode(smi_value); } else { // Position and length encoded in two smis. len = -smi_value; // Get the position and check that it is a positive smi. i++; if (i >= array_length) return -1; Object* next_smi = fixed_array->get(i); if (!next_smi->IsSmi()) return -1; pos = Smi::cast(next_smi)->value(); if (pos < 0) return -1; } DCHECK(pos >= 0); DCHECK(len >= 0); if (pos > special_length || len > special_length - pos) return -1; increment = len; } else if (elt->IsString()) { String* element = String::cast(elt); int element_length = element->length(); increment = element_length; if (*one_byte && !element->HasOnlyOneByteChars()) { *one_byte = false; } } else { return -1; } if (increment > String::kMaxLength - position) { return kMaxInt; // Provoke throw on allocation. } position += increment; } return position; } class FixedArrayBuilder { public: explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity) : array_(isolate->factory()->NewFixedArrayWithHoles(initial_capacity)), length_(0), has_non_smi_elements_(false) { // Require a non-zero initial size. Ensures that doubling the size to // extend the array will work. DCHECK(initial_capacity > 0); } explicit FixedArrayBuilder(Handle<FixedArray> backing_store) : array_(backing_store), length_(0), has_non_smi_elements_(false) { // Require a non-zero initial size. Ensures that doubling the size to // extend the array will work. DCHECK(backing_store->length() > 0); } bool HasCapacity(int elements) { int length = array_->length(); int required_length = length_ + elements; return (length >= required_length); } void EnsureCapacity(int elements) { int length = array_->length(); int required_length = length_ + elements; if (length < required_length) { int new_length = length; do { new_length *= 2; } while (new_length < required_length); Handle<FixedArray> extended_array = array_->GetIsolate()->factory()->NewFixedArrayWithHoles(new_length); array_->CopyTo(0, *extended_array, 0, length_); array_ = extended_array; } } void Add(Object* value) { DCHECK(!value->IsSmi()); DCHECK(length_ < capacity()); array_->set(length_, value); length_++; has_non_smi_elements_ = true; } void Add(Smi* value) { DCHECK(value->IsSmi()); DCHECK(length_ < capacity()); array_->set(length_, value); length_++; } Handle<FixedArray> array() { return array_; } int length() { return length_; } int capacity() { return array_->length(); } Handle<JSArray> ToJSArray(Handle<JSArray> target_array) { JSArray::SetContent(target_array, array_); target_array->set_length(Smi::FromInt(length_)); return target_array; } private: Handle<FixedArray> array_; int length_; bool has_non_smi_elements_; }; class ReplacementStringBuilder { public: ReplacementStringBuilder(Heap* heap, Handle<String> subject, int estimated_part_count) : heap_(heap), array_builder_(heap->isolate(), estimated_part_count), subject_(subject), character_count_(0), is_one_byte_(subject->IsOneByteRepresentation()) { // Require a non-zero initial size. Ensures that doubling the size to // extend the array will work. DCHECK(estimated_part_count > 0); } static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from, int to) { DCHECK(from >= 0); int length = to - from; DCHECK(length > 0); if (StringBuilderSubstringLength::is_valid(length) && StringBuilderSubstringPosition::is_valid(from)) { int encoded_slice = StringBuilderSubstringLength::encode(length) | StringBuilderSubstringPosition::encode(from); builder->Add(Smi::FromInt(encoded_slice)); } else { // Otherwise encode as two smis. builder->Add(Smi::FromInt(-length)); builder->Add(Smi::FromInt(from)); } } void EnsureCapacity(int elements) { array_builder_.EnsureCapacity(elements); } void AddSubjectSlice(int from, int to) { AddSubjectSlice(&array_builder_, from, to); IncrementCharacterCount(to - from); } void AddString(Handle<String> string) { int length = string->length(); DCHECK(length > 0); AddElement(*string); if (!string->IsOneByteRepresentation()) { is_one_byte_ = false; } IncrementCharacterCount(length); } MaybeHandle<String> ToString(); void IncrementCharacterCount(int by) { if (character_count_ > String::kMaxLength - by) { STATIC_ASSERT(String::kMaxLength < kMaxInt); character_count_ = kMaxInt; } else { character_count_ += by; } } private: void AddElement(Object* element) { DCHECK(element->IsSmi() || element->IsString()); DCHECK(array_builder_.capacity() > array_builder_.length()); array_builder_.Add(element); } Heap* heap_; FixedArrayBuilder array_builder_; Handle<String> subject_; int character_count_; bool is_one_byte_; }; class IncrementalStringBuilder { public: explicit IncrementalStringBuilder(Isolate* isolate); INLINE(String::Encoding CurrentEncoding()) { return encoding_; } template <typename SrcChar, typename DestChar> INLINE(void Append(SrcChar c)); INLINE(void AppendCharacter(uint8_t c)) { if (encoding_ == String::ONE_BYTE_ENCODING) { Append<uint8_t, uint8_t>(c); } else { Append<uint8_t, uc16>(c); } } INLINE(void AppendCString(const char* s)) { const uint8_t* u = reinterpret_cast<const uint8_t*>(s); if (encoding_ == String::ONE_BYTE_ENCODING) { while (*u != '\0') Append<uint8_t, uint8_t>(*(u++)); } else { while (*u != '\0') Append<uint8_t, uc16>(*(u++)); } } INLINE(void AppendCString(const uc16* s)) { if (encoding_ == String::ONE_BYTE_ENCODING) { while (*s != '\0') Append<uc16, uint8_t>(*(s++)); } else { while (*s != '\0') Append<uc16, uc16>(*(s++)); } } INLINE(bool CurrentPartCanFit(int length)) { return part_length_ - current_index_ > length; } void AppendString(Handle<String> string); MaybeHandle<String> Finish(); INLINE(bool HasOverflowed()) const { return overflowed_; } INLINE(int Length()) const { return accumulator_->length() + current_index_; } // Change encoding to two-byte. void ChangeEncoding() { DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_); ShrinkCurrentPart(); encoding_ = String::TWO_BYTE_ENCODING; Extend(); } template <typename DestChar> class NoExtend { public: explicit NoExtend(Handle<String> string, int offset) { DCHECK(string->IsSeqOneByteString() || string->IsSeqTwoByteString()); if (sizeof(DestChar) == 1) { start_ = reinterpret_cast<DestChar*>( Handle<SeqOneByteString>::cast(string)->GetChars() + offset); } else { start_ = reinterpret_cast<DestChar*>( Handle<SeqTwoByteString>::cast(string)->GetChars() + offset); } cursor_ = start_; } INLINE(void Append(DestChar c)) { *(cursor_++) = c; } INLINE(void AppendCString(const char* s)) { const uint8_t* u = reinterpret_cast<const uint8_t*>(s); while (*u != '\0') Append(*(u++)); } int written() { return static_cast<int>(cursor_ - start_); } private: DestChar* start_; DestChar* cursor_; DisallowHeapAllocation no_gc_; }; template <typename DestChar> class NoExtendString : public NoExtend<DestChar> { public: NoExtendString(Handle<String> string, int required_length) : NoExtend<DestChar>(string, 0), string_(string) { DCHECK(string->length() >= required_length); } Handle<String> Finalize() { Handle<SeqString> string = Handle<SeqString>::cast(string_); int length = NoExtend<DestChar>::written(); Handle<String> result = SeqString::Truncate(string, length); string_ = Handle<String>(); return result; } private: Handle<String> string_; }; template <typename DestChar> class NoExtendBuilder : public NoExtend<DestChar> { public: NoExtendBuilder(IncrementalStringBuilder* builder, int required_length) : NoExtend<DestChar>(builder->current_part(), builder->current_index_), builder_(builder) { DCHECK(builder->CurrentPartCanFit(required_length)); } ~NoExtendBuilder() { builder_->current_index_ += NoExtend<DestChar>::written(); } private: IncrementalStringBuilder* builder_; }; private: Factory* factory() { return isolate_->factory(); } INLINE(Handle<String> accumulator()) { return accumulator_; } INLINE(void set_accumulator(Handle<String> string)) { *accumulator_.location() = *string; } INLINE(Handle<String> current_part()) { return current_part_; } INLINE(void set_current_part(Handle<String> string)) { *current_part_.location() = *string; } // Add the current part to the accumulator. void Accumulate(Handle<String> new_part); // Finish the current part and allocate a new part. void Extend(); // Shrink current part to the right size. void ShrinkCurrentPart() { DCHECK(current_index_ < part_length_); set_current_part(SeqString::Truncate( Handle<SeqString>::cast(current_part()), current_index_)); } static const int kInitialPartLength = 32; static const int kMaxPartLength = 16 * 1024; static const int kPartLengthGrowthFactor = 2; Isolate* isolate_; String::Encoding encoding_; bool overflowed_; int part_length_; int current_index_; Handle<String> accumulator_; Handle<String> current_part_; }; template <typename SrcChar, typename DestChar> void IncrementalStringBuilder::Append(SrcChar c) { DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1); if (sizeof(DestChar) == 1) { DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_); SeqOneByteString::cast(*current_part_) ->SeqOneByteStringSet(current_index_++, c); } else { DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_); SeqTwoByteString::cast(*current_part_) ->SeqTwoByteStringSet(current_index_++, c); } if (current_index_ == part_length_) Extend(); } } // namespace internal } // namespace v8 #endif // V8_STRING_BUILDER_H_