// Copyright 2013 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef V8_URI_H_ #define V8_URI_H_ #include "v8.h" #include "string-search.h" #include "v8utils.h" #include "v8conversions.h" namespace v8 { namespace internal { template <typename Char> static INLINE(Vector<const Char> GetCharVector(Handle<String> string)); template <> Vector<const uint8_t> GetCharVector(Handle<String> string) { String::FlatContent flat = string->GetFlatContent(); ASSERT(flat.IsAscii()); return flat.ToOneByteVector(); } template <> Vector<const uc16> GetCharVector(Handle<String> string) { String::FlatContent flat = string->GetFlatContent(); ASSERT(flat.IsTwoByte()); return flat.ToUC16Vector(); } class URIUnescape : public AllStatic { public: template<typename Char> static Handle<String> Unescape(Isolate* isolate, Handle<String> source); private: static const signed char kHexValue['g']; template<typename Char> static Handle<String> UnescapeSlow( Isolate* isolate, Handle<String> string, int start_index); static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2)); template <typename Char> static INLINE(int UnescapeChar(Vector<const Char> vector, int i, int length, int* step)); }; const signed char URIUnescape::kHexValue[] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15 }; template<typename Char> Handle<String> URIUnescape::Unescape(Isolate* isolate, Handle<String> source) { int index; { DisallowHeapAllocation no_allocation; StringSearch<uint8_t, Char> search(isolate, STATIC_ASCII_VECTOR("%")); index = search.Search(GetCharVector<Char>(source), 0); if (index < 0) return source; } return UnescapeSlow<Char>(isolate, source, index); } template <typename Char> Handle<String> URIUnescape::UnescapeSlow( Isolate* isolate, Handle<String> string, int start_index) { bool one_byte = true; int length = string->length(); int unescaped_length = 0; { DisallowHeapAllocation no_allocation; Vector<const Char> vector = GetCharVector<Char>(string); for (int i = start_index; i < length; unescaped_length++) { int step; if (UnescapeChar(vector, i, length, &step) > String::kMaxOneByteCharCode) { one_byte = false; } i += step; } } ASSERT(start_index < length); Handle<String> first_part = isolate->factory()->NewProperSubString(string, 0, start_index); int dest_position = 0; Handle<String> second_part; if (one_byte) { Handle<SeqOneByteString> dest = isolate->factory()->NewRawOneByteString(unescaped_length); DisallowHeapAllocation no_allocation; Vector<const Char> vector = GetCharVector<Char>(string); for (int i = start_index; i < length; dest_position++) { int step; dest->SeqOneByteStringSet(dest_position, UnescapeChar(vector, i, length, &step)); i += step; } second_part = dest; } else { Handle<SeqTwoByteString> dest = isolate->factory()->NewRawTwoByteString(unescaped_length); DisallowHeapAllocation no_allocation; Vector<const Char> vector = GetCharVector<Char>(string); for (int i = start_index; i < length; dest_position++) { int step; dest->SeqTwoByteStringSet(dest_position, UnescapeChar(vector, i, length, &step)); i += step; } second_part = dest; } return isolate->factory()->NewConsString(first_part, second_part); } int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) { if (character1 > 'f') return -1; int hi = kHexValue[character1]; if (hi == -1) return -1; if (character2 > 'f') return -1; int lo = kHexValue[character2]; if (lo == -1) return -1; return (hi << 4) + lo; } template <typename Char> int URIUnescape::UnescapeChar(Vector<const Char> vector, int i, int length, int* step) { uint16_t character = vector[i]; int32_t hi = 0; int32_t lo = 0; if (character == '%' && i <= length - 6 && vector[i + 1] == 'u' && (hi = TwoDigitHex(vector[i + 2], vector[i + 3])) != -1 && (lo = TwoDigitHex(vector[i + 4], vector[i + 5])) != -1) { *step = 6; return (hi << 8) + lo; } else if (character == '%' && i <= length - 3 && (lo = TwoDigitHex(vector[i + 1], vector[i + 2])) != -1) { *step = 3; return lo; } else { *step = 1; return character; } } class URIEscape : public AllStatic { public: template<typename Char> static Handle<String> Escape(Isolate* isolate, Handle<String> string); private: static const char kHexChars[17]; static const char kNotEscaped[256]; static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; } }; const char URIEscape::kHexChars[] = "0123456789ABCDEF"; // kNotEscaped is generated by the following: // // #!/bin/perl // for (my $i = 0; $i < 256; $i++) { // print "\n" if $i % 16 == 0; // my $c = chr($i); // my $escaped = 1; // $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#; // print $escaped ? "0, " : "1, "; // } const char URIEscape::kNotEscaped[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; template<typename Char> Handle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) { ASSERT(string->IsFlat()); int escaped_length = 0; int length = string->length(); { DisallowHeapAllocation no_allocation; Vector<const Char> vector = GetCharVector<Char>(string); for (int i = 0; i < length; i++) { uint16_t c = vector[i]; if (c >= 256) { escaped_length += 6; } else if (IsNotEscaped(c)) { escaped_length++; } else { escaped_length += 3; } // We don't allow strings that are longer than a maximal length. ASSERT(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow. if (escaped_length > String::kMaxLength) { isolate->context()->mark_out_of_memory(); return Handle<String>::null(); } } } // No length change implies no change. Return original string if no change. if (escaped_length == length) return string; Handle<SeqOneByteString> dest = isolate->factory()->NewRawOneByteString(escaped_length); int dest_position = 0; { DisallowHeapAllocation no_allocation; Vector<const Char> vector = GetCharVector<Char>(string); for (int i = 0; i < length; i++) { uint16_t c = vector[i]; if (c >= 256) { dest->SeqOneByteStringSet(dest_position, '%'); dest->SeqOneByteStringSet(dest_position+1, 'u'); dest->SeqOneByteStringSet(dest_position+2, kHexChars[c >> 12]); dest->SeqOneByteStringSet(dest_position+3, kHexChars[(c >> 8) & 0xf]); dest->SeqOneByteStringSet(dest_position+4, kHexChars[(c >> 4) & 0xf]); dest->SeqOneByteStringSet(dest_position+5, kHexChars[c & 0xf]); dest_position += 6; } else if (IsNotEscaped(c)) { dest->SeqOneByteStringSet(dest_position, c); dest_position++; } else { dest->SeqOneByteStringSet(dest_position, '%'); dest->SeqOneByteStringSet(dest_position+1, kHexChars[c >> 4]); dest->SeqOneByteStringSet(dest_position+2, kHexChars[c & 0xf]); dest_position += 3; } } } return dest; } } } // namespace v8::internal #endif // V8_URI_H_