// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "content/common/page_state_serialization.h" #include <algorithm> #include <limits> #include "base/pickle.h" #include "base/strings/string_number_conversions.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "ui/gfx/screen.h" namespace content { namespace { #if defined(OS_ANDROID) float g_device_scale_factor_for_testing = 0.0; #endif //----------------------------------------------------------------------------- void AppendDataToHttpBody(ExplodedHttpBody* http_body, const char* data, int data_length) { ExplodedHttpBodyElement element; element.type = blink::WebHTTPBody::Element::TypeData; element.data.assign(data, data_length); http_body->elements.push_back(element); } void AppendFileRangeToHttpBody(ExplodedHttpBody* http_body, const base::NullableString16& file_path, int file_start, int file_length, double file_modification_time) { ExplodedHttpBodyElement element; element.type = blink::WebHTTPBody::Element::TypeFile; element.file_path = file_path; element.file_start = file_start; element.file_length = file_length; element.file_modification_time = file_modification_time; http_body->elements.push_back(element); } void AppendURLRangeToHttpBody(ExplodedHttpBody* http_body, const GURL& url, int file_start, int file_length, double file_modification_time) { ExplodedHttpBodyElement element; element.type = blink::WebHTTPBody::Element::TypeFileSystemURL; element.filesystem_url = url; element.file_start = file_start; element.file_length = file_length; element.file_modification_time = file_modification_time; http_body->elements.push_back(element); } void AppendBlobToHttpBody(ExplodedHttpBody* http_body, const std::string& uuid) { ExplodedHttpBodyElement element; element.type = blink::WebHTTPBody::Element::TypeBlob; element.blob_uuid = uuid; http_body->elements.push_back(element); } //---------------------------------------------------------------------------- void AppendReferencedFilesFromHttpBody( const std::vector<ExplodedHttpBodyElement>& elements, std::vector<base::NullableString16>* referenced_files) { for (size_t i = 0; i < elements.size(); ++i) { if (elements[i].type == blink::WebHTTPBody::Element::TypeFile) referenced_files->push_back(elements[i].file_path); } } bool AppendReferencedFilesFromDocumentState( const std::vector<base::NullableString16>& document_state, std::vector<base::NullableString16>* referenced_files) { if (document_state.empty()) return true; // This algorithm is adapted from Blink's core/html/FormController.cpp code. // We only care about how that code worked when this code snapshot was taken // as this code is only needed for backwards compat. // // For reference, see FormController::formStatesFromStateVector at: // http://src.chromium.org/viewvc/blink/trunk/Source/core/html/FormController.cpp?pathrev=152274 size_t index = 0; if (document_state.size() < 3) return false; index++; // Skip over magic signature. index++; // Skip over form key. size_t item_count; if (!base::StringToSizeT(document_state[index++].string(), &item_count)) return false; while (item_count--) { if (index + 1 >= document_state.size()) return false; index++; // Skip over name. const base::NullableString16& type = document_state[index++]; if (index >= document_state.size()) return false; size_t value_size; if (!base::StringToSizeT(document_state[index++].string(), &value_size)) return false; if (index + value_size > document_state.size() || index + value_size < index) // Check for overflow. return false; if (EqualsASCII(type.string(), "file")) { if (value_size != 2) return false; referenced_files->push_back(document_state[index++]); index++; // Skip over display name. } else { index += value_size; } } return true; } bool RecursivelyAppendReferencedFiles( const ExplodedFrameState& frame_state, std::vector<base::NullableString16>* referenced_files) { if (!frame_state.http_body.is_null) { AppendReferencedFilesFromHttpBody(frame_state.http_body.elements, referenced_files); } if (!AppendReferencedFilesFromDocumentState(frame_state.document_state, referenced_files)) return false; for (size_t i = 0; i < frame_state.children.size(); ++i) { if (!RecursivelyAppendReferencedFiles(frame_state.children[i], referenced_files)) return false; } return true; } //---------------------------------------------------------------------------- struct SerializeObject { SerializeObject() : version(0), parse_error(false) { } SerializeObject(const char* data, int len) : pickle(data, len), version(0), parse_error(false) { iter = PickleIterator(pickle); } std::string GetAsString() { return std::string(static_cast<const char*>(pickle.data()), pickle.size()); } Pickle pickle; PickleIterator iter; int version; bool parse_error; }; // Version ID of serialized format. // 11: Min version // 12: Adds support for contains_passwords in HTTP body // 13: Adds support for URL (FileSystem URL) // 14: Adds list of referenced files, version written only for first item. // 15: Removes a bunch of values we defined but never used. // 16: Switched from blob urls to blob uuids. // 17: Add a target frame id number. // // NOTE: If the version is -1, then the pickle contains only a URL string. // See ReadPageState. // const int kMinVersion = 11; const int kCurrentVersion = 17; // A bunch of convenience functions to read/write to SerializeObjects. The // de-serializers assume the input data will be in the correct format and fall // back to returning safe defaults when not. void WriteData(const void* data, int length, SerializeObject* obj) { obj->pickle.WriteData(static_cast<const char*>(data), length); } void ReadData(SerializeObject* obj, const void** data, int* length) { const char* tmp; if (obj->pickle.ReadData(&obj->iter, &tmp, length)) { *data = tmp; } else { obj->parse_error = true; *data = NULL; *length = 0; } } void WriteInteger(int data, SerializeObject* obj) { obj->pickle.WriteInt(data); } int ReadInteger(SerializeObject* obj) { int tmp; if (obj->pickle.ReadInt(&obj->iter, &tmp)) return tmp; obj->parse_error = true; return 0; } void ConsumeInteger(SerializeObject* obj) { int unused ALLOW_UNUSED = ReadInteger(obj); } void WriteInteger64(int64 data, SerializeObject* obj) { obj->pickle.WriteInt64(data); } int64 ReadInteger64(SerializeObject* obj) { int64 tmp = 0; if (obj->pickle.ReadInt64(&obj->iter, &tmp)) return tmp; obj->parse_error = true; return 0; } void WriteReal(double data, SerializeObject* obj) { WriteData(&data, sizeof(double), obj); } double ReadReal(SerializeObject* obj) { const void* tmp = NULL; int length = 0; double value = 0.0; ReadData(obj, &tmp, &length); if (length == static_cast<int>(sizeof(double))) { // Use memcpy, as tmp may not be correctly aligned. memcpy(&value, tmp, sizeof(double)); } else { obj->parse_error = true; } return value; } void ConsumeReal(SerializeObject* obj) { double unused ALLOW_UNUSED = ReadReal(obj); } void WriteBoolean(bool data, SerializeObject* obj) { obj->pickle.WriteInt(data ? 1 : 0); } bool ReadBoolean(SerializeObject* obj) { bool tmp; if (obj->pickle.ReadBool(&obj->iter, &tmp)) return tmp; obj->parse_error = true; return false; } void ConsumeBoolean(SerializeObject* obj) { bool unused ALLOW_UNUSED = ReadBoolean(obj); } void WriteGURL(const GURL& url, SerializeObject* obj) { obj->pickle.WriteString(url.possibly_invalid_spec()); } GURL ReadGURL(SerializeObject* obj) { std::string spec; if (obj->pickle.ReadString(&obj->iter, &spec)) return GURL(spec); obj->parse_error = true; return GURL(); } void WriteStdString(const std::string& s, SerializeObject* obj) { obj->pickle.WriteString(s); } std::string ReadStdString(SerializeObject* obj) { std::string s; if (obj->pickle.ReadString(&obj->iter, &s)) return s; obj->parse_error = true; return std::string(); } // WriteString pickles the NullableString16 as <int length><char16* data>. // If length == -1, then the NullableString16 itself is null. Otherwise the // length is the number of char16 (not bytes) in the NullableString16. void WriteString(const base::NullableString16& str, SerializeObject* obj) { if (str.is_null()) { obj->pickle.WriteInt(-1); } else { const char16* data = str.string().data(); size_t length_in_bytes = str.string().length() * sizeof(char16); CHECK_LT(length_in_bytes, static_cast<size_t>(std::numeric_limits<int>::max())); obj->pickle.WriteInt(length_in_bytes); obj->pickle.WriteBytes(data, length_in_bytes); } } // This reads a serialized NullableString16 from obj. If a string can't be // read, NULL is returned. const char16* ReadStringNoCopy(SerializeObject* obj, int* num_chars) { int length_in_bytes; if (!obj->pickle.ReadInt(&obj->iter, &length_in_bytes)) { obj->parse_error = true; return NULL; } if (length_in_bytes < 0) return NULL; const char* data; if (!obj->pickle.ReadBytes(&obj->iter, &data, length_in_bytes)) { obj->parse_error = true; return NULL; } if (num_chars) *num_chars = length_in_bytes / sizeof(char16); return reinterpret_cast<const char16*>(data); } base::NullableString16 ReadString(SerializeObject* obj) { int num_chars; const char16* chars = ReadStringNoCopy(obj, &num_chars); return chars ? base::NullableString16(base::string16(chars, num_chars), false) : base::NullableString16(); } void ConsumeString(SerializeObject* obj) { const char16* unused ALLOW_UNUSED = ReadStringNoCopy(obj, NULL); } template <typename T> void WriteAndValidateVectorSize(const std::vector<T>& v, SerializeObject* obj) { CHECK_LT(v.size(), std::numeric_limits<int>::max() / sizeof(T)); WriteInteger(static_cast<int>(v.size()), obj); } size_t ReadAndValidateVectorSize(SerializeObject* obj, size_t element_size) { size_t num_elements = static_cast<size_t>(ReadInteger(obj)); // Ensure that resizing a vector to size num_elements makes sense. if (std::numeric_limits<int>::max() / element_size <= num_elements) { obj->parse_error = true; return 0; } // Ensure that it is plausible for the pickle to contain num_elements worth // of data. if (obj->pickle.payload_size() <= num_elements) { obj->parse_error = true; return 0; } return num_elements; } // Writes a Vector of strings into a SerializeObject for serialization. void WriteStringVector( const std::vector<base::NullableString16>& data, SerializeObject* obj) { WriteAndValidateVectorSize(data, obj); for (size_t i = 0; i < data.size(); ++i) { WriteString(data[i], obj); } } void ReadStringVector(SerializeObject* obj, std::vector<base::NullableString16>* result) { size_t num_elements = ReadAndValidateVectorSize(obj, sizeof(base::NullableString16)); result->resize(num_elements); for (size_t i = 0; i < num_elements; ++i) (*result)[i] = ReadString(obj); } // Writes an ExplodedHttpBody object into a SerializeObject for serialization. void WriteHttpBody(const ExplodedHttpBody& http_body, SerializeObject* obj) { WriteBoolean(!http_body.is_null, obj); if (http_body.is_null) return; WriteAndValidateVectorSize(http_body.elements, obj); for (size_t i = 0; i < http_body.elements.size(); ++i) { const ExplodedHttpBodyElement& element = http_body.elements[i]; WriteInteger(element.type, obj); if (element.type == blink::WebHTTPBody::Element::TypeData) { WriteData(element.data.data(), static_cast<int>(element.data.size()), obj); } else if (element.type == blink::WebHTTPBody::Element::TypeFile) { WriteString(element.file_path, obj); WriteInteger64(element.file_start, obj); WriteInteger64(element.file_length, obj); WriteReal(element.file_modification_time, obj); } else if (element.type == blink::WebHTTPBody::Element::TypeFileSystemURL) { WriteGURL(element.filesystem_url, obj); WriteInteger64(element.file_start, obj); WriteInteger64(element.file_length, obj); WriteReal(element.file_modification_time, obj); } else { DCHECK(element.type == blink::WebHTTPBody::Element::TypeBlob); WriteStdString(element.blob_uuid, obj); } } WriteInteger64(http_body.identifier, obj); WriteBoolean(http_body.contains_passwords, obj); } void ReadHttpBody(SerializeObject* obj, ExplodedHttpBody* http_body) { // An initial boolean indicates if we have an HTTP body. if (!ReadBoolean(obj)) return; http_body->is_null = false; int num_elements = ReadInteger(obj); for (int i = 0; i < num_elements; ++i) { int type = ReadInteger(obj); if (type == blink::WebHTTPBody::Element::TypeData) { const void* data; int length = -1; ReadData(obj, &data, &length); if (length >= 0) { AppendDataToHttpBody(http_body, static_cast<const char*>(data), length); } } else if (type == blink::WebHTTPBody::Element::TypeFile) { base::NullableString16 file_path = ReadString(obj); int64 file_start = ReadInteger64(obj); int64 file_length = ReadInteger64(obj); double file_modification_time = ReadReal(obj); AppendFileRangeToHttpBody(http_body, file_path, file_start, file_length, file_modification_time); } else if (type == blink::WebHTTPBody::Element::TypeFileSystemURL) { GURL url = ReadGURL(obj); int64 file_start = ReadInteger64(obj); int64 file_length = ReadInteger64(obj); double file_modification_time = ReadReal(obj); AppendURLRangeToHttpBody(http_body, url, file_start, file_length, file_modification_time); } else if (type == blink::WebHTTPBody::Element::TypeBlob) { if (obj->version >= 16) { std::string blob_uuid = ReadStdString(obj); AppendBlobToHttpBody(http_body, blob_uuid); } else { ReadGURL(obj); // Skip the obsolete blob url value. } } } http_body->identifier = ReadInteger64(obj); if (obj->version >= 12) http_body->contains_passwords = ReadBoolean(obj); } // Writes the ExplodedFrameState data into the SerializeObject object for // serialization. void WriteFrameState( const ExplodedFrameState& state, SerializeObject* obj, bool is_top) { // WARNING: This data may be persisted for later use. As such, care must be // taken when changing the serialized format. If a new field needs to be // written, only adding at the end will make it easier to deal with loading // older versions. Similarly, this should NOT save fields with sensitive // data, such as password fields. WriteString(state.url_string, obj); WriteString(state.original_url_string, obj); WriteString(state.target, obj); WriteInteger(state.scroll_offset.x(), obj); WriteInteger(state.scroll_offset.y(), obj); WriteString(state.referrer, obj); WriteStringVector(state.document_state, obj); WriteReal(state.page_scale_factor, obj); WriteInteger64(state.item_sequence_number, obj); WriteInteger64(state.document_sequence_number, obj); WriteInteger64(state.target_frame_id, obj); bool has_state_object = !state.state_object.is_null(); WriteBoolean(has_state_object, obj); if (has_state_object) WriteString(state.state_object, obj); WriteHttpBody(state.http_body, obj); // NOTE: It is a quirk of the format that we still have to write the // http_content_type field when the HTTP body is null. That's why this code // is here instead of inside WriteHttpBody. WriteString(state.http_body.http_content_type, obj); // Subitems const std::vector<ExplodedFrameState>& children = state.children; WriteAndValidateVectorSize(children, obj); for (size_t i = 0; i < children.size(); ++i) WriteFrameState(children[i], obj, false); } void ReadFrameState(SerializeObject* obj, bool is_top, ExplodedFrameState* state) { if (obj->version < 14 && !is_top) ConsumeInteger(obj); // Skip over redundant version field. state->url_string = ReadString(obj); state->original_url_string = ReadString(obj); state->target = ReadString(obj); if (obj->version < 15) { ConsumeString(obj); // Skip obsolete parent field. ConsumeString(obj); // Skip obsolete title field. ConsumeString(obj); // Skip obsolete alternate title field. ConsumeReal(obj); // Skip obsolete visited time field. } int x = ReadInteger(obj); int y = ReadInteger(obj); state->scroll_offset = gfx::Point(x, y); if (obj->version < 15) { ConsumeBoolean(obj); // Skip obsolete target item flag. ConsumeInteger(obj); // Skip obsolete visit count field. } state->referrer = ReadString(obj); ReadStringVector(obj, &state->document_state); state->page_scale_factor = ReadReal(obj); state->item_sequence_number = ReadInteger64(obj); state->document_sequence_number = ReadInteger64(obj); if (obj->version >= 17) state->target_frame_id = ReadInteger64(obj); bool has_state_object = ReadBoolean(obj); if (has_state_object) state->state_object = ReadString(obj); ReadHttpBody(obj, &state->http_body); // NOTE: It is a quirk of the format that we still have to read the // http_content_type field when the HTTP body is null. That's why this code // is here instead of inside ReadHttpBody. state->http_body.http_content_type = ReadString(obj); if (obj->version < 14) ConsumeString(obj); // Skip unused referrer string. #if defined(OS_ANDROID) if (obj->version == 11) { // Now-unused values that shipped in this version of Chrome for Android when // it was on a private branch. ReadReal(obj); ReadBoolean(obj); // In this version, page_scale_factor included device_scale_factor and // scroll offsets were premultiplied by pageScaleFactor. if (state->page_scale_factor) { float device_scale_factor = g_device_scale_factor_for_testing; if (!device_scale_factor) { device_scale_factor = gfx::Screen::GetNativeScreen()->GetPrimaryDisplay(). device_scale_factor(); } state->scroll_offset = gfx::Point(state->scroll_offset.x() / state->page_scale_factor, state->scroll_offset.y() / state->page_scale_factor); state->page_scale_factor /= device_scale_factor; } } #endif // Subitems size_t num_children = ReadAndValidateVectorSize(obj, sizeof(ExplodedFrameState)); state->children.resize(num_children); for (size_t i = 0; i < num_children; ++i) ReadFrameState(obj, false, &state->children[i]); } void WritePageState(const ExplodedPageState& state, SerializeObject* obj) { WriteInteger(obj->version, obj); WriteStringVector(state.referenced_files, obj); WriteFrameState(state.top, obj, true); } void ReadPageState(SerializeObject* obj, ExplodedPageState* state) { obj->version = ReadInteger(obj); if (obj->version == -1) { GURL url = ReadGURL(obj); // NOTE: GURL::possibly_invalid_spec() always returns valid UTF-8. state->top.url_string = state->top.original_url_string = base::NullableString16(UTF8ToUTF16(url.possibly_invalid_spec()), false); return; } if (obj->version > kCurrentVersion || obj->version < kMinVersion) { obj->parse_error = true; return; } if (obj->version >= 14) ReadStringVector(obj, &state->referenced_files); ReadFrameState(obj, true, &state->top); if (obj->version < 14) RecursivelyAppendReferencedFiles(state->top, &state->referenced_files); // De-dupe state->referenced_files.erase( std::unique(state->referenced_files.begin(), state->referenced_files.end()), state->referenced_files.end()); } } // namespace ExplodedHttpBodyElement::ExplodedHttpBodyElement() : type(blink::WebHTTPBody::Element::TypeData), file_start(0), file_length(-1), file_modification_time(std::numeric_limits<double>::quiet_NaN()) { } ExplodedHttpBodyElement::~ExplodedHttpBodyElement() { } ExplodedHttpBody::ExplodedHttpBody() : identifier(0), contains_passwords(false), is_null(true) { } ExplodedHttpBody::~ExplodedHttpBody() { } ExplodedFrameState::ExplodedFrameState() : item_sequence_number(0), document_sequence_number(0), target_frame_id(0), page_scale_factor(0.0) { } ExplodedFrameState::~ExplodedFrameState() { } ExplodedPageState::ExplodedPageState() { } ExplodedPageState::~ExplodedPageState() { } bool DecodePageState(const std::string& encoded, ExplodedPageState* exploded) { *exploded = ExplodedPageState(); if (encoded.empty()) return true; SerializeObject obj(encoded.data(), static_cast<int>(encoded.size())); ReadPageState(&obj, exploded); return !obj.parse_error; } bool EncodePageState(const ExplodedPageState& exploded, std::string* encoded) { SerializeObject obj; obj.version = kCurrentVersion; WritePageState(exploded, &obj); *encoded = obj.GetAsString(); return true; } #if defined(OS_ANDROID) bool DecodePageStateWithDeviceScaleFactorForTesting( const std::string& encoded, float device_scale_factor, ExplodedPageState* exploded) { g_device_scale_factor_for_testing = device_scale_factor; bool rv = DecodePageState(encoded, exploded); g_device_scale_factor_for_testing = 0.0; return rv; } #endif } // namespace content