// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "webkit/glue/multipart_response_delegate.h"
#include "base/logging.h"
#include "base/string_number_conversions.h"
#include "base/string_util.h"
#include "net/base/net_util.h"
#include "net/http/http_util.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebHTTPHeaderVisitor.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebString.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebURLLoaderClient.h"
using WebKit::WebHTTPHeaderVisitor;
using WebKit::WebString;
using WebKit::WebURLLoader;
using WebKit::WebURLLoaderClient;
using WebKit::WebURLResponse;
namespace webkit_glue {
namespace {
// The list of response headers that we do not copy from the original
// response when generating a WebURLResponse for a MIME payload.
const char* kReplaceHeaders[] = {
"content-type",
"content-length",
"content-disposition",
"content-range",
"range",
"set-cookie"
};
class HeaderCopier : public WebHTTPHeaderVisitor {
public:
HeaderCopier(WebURLResponse* response)
: response_(response) {
}
virtual void visitHeader(const WebString& name, const WebString& value) {
const std::string& name_utf8 = name.utf8();
for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
if (LowerCaseEqualsASCII(name_utf8, kReplaceHeaders[i]))
return;
}
response_->setHTTPHeaderField(name, value);
}
private:
WebURLResponse* response_;
};
} // namespace
MultipartResponseDelegate::MultipartResponseDelegate(
WebURLLoaderClient* client,
WebURLLoader* loader,
const WebURLResponse& response,
const std::string& boundary)
: client_(client),
loader_(loader),
original_response_(response),
encoded_data_length_(0),
boundary_("--"),
first_received_data_(true),
processing_headers_(false),
stop_sending_(false),
has_sent_first_response_(false) {
// Some servers report a boundary prefixed with "--". See bug 5786.
if (StartsWithASCII(boundary, "--", true)) {
boundary_.assign(boundary);
} else {
boundary_.append(boundary);
}
}
void MultipartResponseDelegate::OnReceivedData(const char* data,
int data_len,
int encoded_data_length) {
// stop_sending_ means that we've already received the final boundary token.
// The server should stop sending us data at this point, but if it does, we
// just throw it away.
if (stop_sending_)
return;
data_.append(data, data_len);
encoded_data_length_ += encoded_data_length;
if (first_received_data_) {
// Some servers don't send a boundary token before the first chunk of
// data. We handle this case anyway (Gecko does too).
first_received_data_ = false;
// Eat leading \r\n
int pos = PushOverLine(data_, 0);
if (pos)
data_ = data_.substr(pos);
if (data_.length() < boundary_.length() + 2) {
// We don't have enough data yet to make a boundary token. Just wait
// until the next chunk of data arrives.
first_received_data_ = true;
return;
}
if (0 != data_.compare(0, boundary_.length(), boundary_)) {
data_ = boundary_ + "\n" + data_;
}
}
DCHECK(!first_received_data_);
// Headers
if (processing_headers_) {
// Eat leading \r\n
int pos = PushOverLine(data_, 0);
if (pos)
data_ = data_.substr(pos);
if (ParseHeaders()) {
// Successfully parsed headers.
processing_headers_ = false;
} else {
// Get more data before trying again.
return;
}
}
DCHECK(!processing_headers_);
size_t boundary_pos;
while ((boundary_pos = FindBoundary()) != std::string::npos) {
if (client_) {
// Strip out trailing \n\r characters in the buffer preceding the
// boundary on the same lines as Firefox.
size_t data_length = boundary_pos;
if (boundary_pos > 0 && data_[boundary_pos - 1] == '\n') {
data_length--;
if (boundary_pos > 1 && data_[boundary_pos - 2] == '\r') {
data_length--;
}
}
if (data_length > 0) {
// Send the last data chunk.
client_->didReceiveData(loader_,
data_.data(),
static_cast<int>(data_length),
encoded_data_length_);
encoded_data_length_ = 0;
}
}
size_t boundary_end_pos = boundary_pos + boundary_.length();
if (boundary_end_pos < data_.length() && '-' == data_[boundary_end_pos]) {
// This was the last boundary so we can stop processing.
stop_sending_ = true;
data_.clear();
return;
}
// We can now throw out data up through the boundary
int offset = PushOverLine(data_, boundary_end_pos);
data_ = data_.substr(boundary_end_pos + offset);
// Ok, back to parsing headers
if (!ParseHeaders()) {
processing_headers_ = true;
break;
}
}
// At this point, we should send over any data we have, but keep enough data
// buffered to handle a boundary that may have been truncated.
if (!processing_headers_ && data_.length() > boundary_.length()) {
// If the last character is a new line character, go ahead and just send
// everything we have buffered. This matches an optimization in Gecko.
int send_length = data_.length() - boundary_.length();
if (data_[data_.length() - 1] == '\n')
send_length = data_.length();
if (client_)
client_->didReceiveData(loader_,
data_.data(),
send_length,
encoded_data_length_);
data_ = data_.substr(send_length);
encoded_data_length_ = 0;
}
}
void MultipartResponseDelegate::OnCompletedRequest() {
// If we have any pending data and we're not in a header, go ahead and send
// it to WebCore.
if (!processing_headers_ && !data_.empty() && !stop_sending_ && client_) {
client_->didReceiveData(loader_,
data_.data(),
static_cast<int>(data_.length()),
encoded_data_length_);
encoded_data_length_ = 0;
}
}
int MultipartResponseDelegate::PushOverLine(const std::string& data,
size_t pos) {
int offset = 0;
if (pos < data.length() && (data[pos] == '\r' || data[pos] == '\n')) {
++offset;
if (pos + 1 < data.length() && data[pos + 1] == '\n')
++offset;
}
return offset;
}
bool MultipartResponseDelegate::ParseHeaders() {
int line_feed_increment = 1;
// Grab the headers being liberal about line endings.
size_t line_start_pos = 0;
size_t line_end_pos = data_.find('\n');
while (line_end_pos != std::string::npos) {
// Handle CRLF
if (line_end_pos > line_start_pos && data_[line_end_pos - 1] == '\r') {
line_feed_increment = 2;
--line_end_pos;
} else {
line_feed_increment = 1;
}
if (line_start_pos == line_end_pos) {
// A blank line, end of headers
line_end_pos += line_feed_increment;
break;
}
// Find the next header line.
line_start_pos = line_end_pos + line_feed_increment;
line_end_pos = data_.find('\n', line_start_pos);
}
// Truncated in the middle of a header, stop parsing.
if (line_end_pos == std::string::npos)
return false;
// Eat headers
std::string headers("\n");
headers.append(data_, 0, line_end_pos);
data_ = data_.substr(line_end_pos);
// Create a WebURLResponse based on the original set of headers + the
// replacement headers. We only replace the same few headers that gecko
// does. See netwerk/streamconv/converters/nsMultiMixedConv.cpp.
std::string content_type = net::GetSpecificHeader(headers, "content-type");
std::string mime_type;
std::string charset;
bool has_charset = false;
net::HttpUtil::ParseContentType(content_type, &mime_type, &charset,
&has_charset);
WebURLResponse response(original_response_.url());
response.setMIMEType(WebString::fromUTF8(mime_type));
response.setTextEncodingName(WebString::fromUTF8(charset));
HeaderCopier copier(&response);
original_response_.visitHTTPHeaderFields(&copier);
for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
std::string name(kReplaceHeaders[i]);
std::string value = net::GetSpecificHeader(headers, name);
if (!value.empty()) {
response.setHTTPHeaderField(WebString::fromUTF8(name),
WebString::fromUTF8(value));
}
}
// To avoid recording every multipart load as a separate visit in
// the history database, we want to keep track of whether the response
// is part of a multipart payload. We do want to record the first visit,
// so we only set isMultipartPayload to true after the first visit.
response.setIsMultipartPayload(has_sent_first_response_);
has_sent_first_response_ = true;
// Send the response!
if (client_)
client_->didReceiveResponse(loader_, response);
return true;
}
// Boundaries are supposed to be preceeded with --, but it looks like gecko
// doesn't require the dashes to exist. See nsMultiMixedConv::FindToken.
size_t MultipartResponseDelegate::FindBoundary() {
size_t boundary_pos = data_.find(boundary_);
if (boundary_pos != std::string::npos) {
// Back up over -- for backwards compat
// TODO(tc): Don't we only want to do this once? Gecko code doesn't seem
// to care.
if (boundary_pos >= 2) {
if ('-' == data_[boundary_pos - 1] && '-' == data_[boundary_pos - 2]) {
boundary_pos -= 2;
boundary_ = "--" + boundary_;
}
}
}
return boundary_pos;
}
bool MultipartResponseDelegate::ReadMultipartBoundary(
const WebURLResponse& response,
std::string* multipart_boundary) {
std::string content_type =
response.httpHeaderField(WebString::fromUTF8("Content-Type")).utf8();
size_t boundary_start_offset = content_type.find("boundary=");
if (boundary_start_offset == std::string::npos)
return false;
boundary_start_offset += strlen("boundary=");
size_t boundary_end_offset = content_type.find(';', boundary_start_offset);
if (boundary_end_offset == std::string::npos)
boundary_end_offset = content_type.length();
size_t boundary_length = boundary_end_offset - boundary_start_offset;
*multipart_boundary =
content_type.substr(boundary_start_offset, boundary_length);
// The byte range response can have quoted boundary strings. This is legal
// as per MIME specifications. Individual data fragements however don't
// contain quoted boundary strings.
TrimString(*multipart_boundary, "\"", multipart_boundary);
return true;
}
bool MultipartResponseDelegate::ReadContentRanges(
const WebURLResponse& response,
int* content_range_lower_bound,
int* content_range_upper_bound,
int* content_range_instance_size) {
std::string content_range = response.httpHeaderField("Content-Range").utf8();
if (content_range.empty()) {
content_range = response.httpHeaderField("Range").utf8();
}
if (content_range.empty()) {
DLOG(WARNING) << "Failed to read content range from response.";
return false;
}
size_t byte_range_lower_bound_start_offset = content_range.find(" ");
if (byte_range_lower_bound_start_offset == std::string::npos) {
return false;
}
// Skip over the initial space.
byte_range_lower_bound_start_offset++;
// Find the lower bound.
size_t byte_range_lower_bound_end_offset =
content_range.find("-", byte_range_lower_bound_start_offset);
if (byte_range_lower_bound_end_offset == std::string::npos) {
return false;
}
size_t byte_range_lower_bound_characters =
byte_range_lower_bound_end_offset - byte_range_lower_bound_start_offset;
std::string byte_range_lower_bound =
content_range.substr(byte_range_lower_bound_start_offset,
byte_range_lower_bound_characters);
// Find the upper bound.
size_t byte_range_upper_bound_start_offset =
byte_range_lower_bound_end_offset + 1;
size_t byte_range_upper_bound_end_offset =
content_range.find("/", byte_range_upper_bound_start_offset);
if (byte_range_upper_bound_end_offset == std::string::npos) {
return false;
}
size_t byte_range_upper_bound_characters =
byte_range_upper_bound_end_offset - byte_range_upper_bound_start_offset;
std::string byte_range_upper_bound =
content_range.substr(byte_range_upper_bound_start_offset,
byte_range_upper_bound_characters);
// Find the instance size.
size_t byte_range_instance_size_start_offset =
byte_range_upper_bound_end_offset + 1;
size_t byte_range_instance_size_end_offset =
content_range.length();
size_t byte_range_instance_size_characters =
byte_range_instance_size_end_offset -
byte_range_instance_size_start_offset;
std::string byte_range_instance_size =
content_range.substr(byte_range_instance_size_start_offset,
byte_range_instance_size_characters);
if (!base::StringToInt(byte_range_lower_bound, content_range_lower_bound))
return false;
if (!base::StringToInt(byte_range_upper_bound, content_range_upper_bound))
return false;
if (!base::StringToInt(byte_range_instance_size, content_range_instance_size))
return false;
return true;
}
} // namespace webkit_glue