// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
#define CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
#include <queue>
#include <set>
#include <string>
#include <vector>
#include "base/basictypes.h"
#include "base/containers/hash_tables.h"
#include "base/files/file_path.h"
#include "base/gtest_prod_util.h"
#include "base/memory/ref_counted.h"
#include "base/memory/weak_ptr.h"
#include "base/time/time.h"
#include "content/common/content_export.h"
#include "content/public/browser/download_item.h"
#include "content/public/browser/download_manager_delegate.h"
#include "content/public/browser/save_page_type.h"
#include "content/public/browser/web_contents_observer.h"
#include "content/public/common/referrer.h"
#include "net/base/net_errors.h"
#include "url/gurl.h"
class GURL;
namespace content {
class DownloadItemImpl;
class DownloadManagerImpl;
class WebContents;
class SaveFileManager;
class SaveItem;
class SavePackage;
struct SaveFileCreateInfo;
// The SavePackage object manages the process of saving a page as only-html or
// complete-html or MHTML and providing the information for displaying saving
// status. Saving page as only-html means means that we save web page to a
// single HTML file regardless internal sub resources and sub frames. Saving
// page as complete-html page means we save not only the main html file the user
// told it to save but also a directory for the auxiliary files such as all
// sub-frame html files, image files, css files and js files. Saving page as
// MHTML means the same thing as complete-html, but it uses the MHTML format to
// contain the html and all auxiliary files in a single text file.
//
// Each page saving job may include one or multiple files which need to be
// saved. Each file is represented by a SaveItem, and all SaveItems are owned
// by the SavePackage. SaveItems are created when a user initiates a page
// saving job, and exist for the duration of one contents's life time.
class CONTENT_EXPORT SavePackage
: public base::RefCountedThreadSafe<SavePackage>,
public WebContentsObserver,
public DownloadItem::Observer,
public base::SupportsWeakPtr<SavePackage> {
public:
enum WaitState {
// State when created but not initialized.
INITIALIZE = 0,
// State when after initializing, but not yet saving.
START_PROCESS,
// Waiting on a list of savable resources from the backend.
RESOURCES_LIST,
// Waiting for data sent from net IO or from file system.
NET_FILES,
// Waiting for html DOM data sent from render process.
HTML_DATA,
// Saving page finished successfully.
SUCCESSFUL,
// Failed to save page.
FAILED
};
static const base::FilePath::CharType kDefaultHtmlExtension[];
// Constructor for user initiated page saving. This constructor results in a
// SavePackage that will generate and sanitize a suggested name for the user
// in the "Save As" dialog box.
explicit SavePackage(WebContents* web_contents);
// This contructor is used only for testing. We can bypass the file and
// directory name generation / sanitization by providing well known paths
// better suited for tests.
SavePackage(WebContents* web_contents,
SavePageType save_type,
const base::FilePath& file_full_path,
const base::FilePath& directory_full_path);
// Initialize the SavePackage. Returns true if it initializes properly. Need
// to make sure that this method must be called in the UI thread because using
// g_browser_process on a non-UI thread can cause crashes during shutdown.
// |cb| will be called when the DownloadItem is created, before data is
// written to disk.
bool Init(const SavePackageDownloadCreatedCallback& cb);
// Cancel all in progress request, might be called by user or internal error.
void Cancel(bool user_action);
void Finish();
// Notifications sent from the file thread to the UI thread.
void StartSave(const SaveFileCreateInfo* info);
bool UpdateSaveProgress(int32 save_id, int64 size, bool write_success);
void SaveFinished(int32 save_id, int64 size, bool is_success);
void SaveFailed(const GURL& save_url);
void SaveCanceled(SaveItem* save_item);
// Rough percent complete, -1 means we don't know (since we didn't receive a
// total size).
int PercentComplete();
bool canceled() const { return user_canceled_ || disk_error_occurred_; }
bool finished() const { return finished_; }
SavePageType save_type() const { return save_type_; }
int contents_id() const { return contents_id_; }
int id() const { return unique_id_; }
WebContents* web_contents() const;
void GetSaveInfo();
private:
friend class base::RefCountedThreadSafe<SavePackage>;
void InitWithDownloadItem(
const SavePackageDownloadCreatedCallback& download_created_callback,
DownloadItemImpl* item);
// Callback for WebContents::GenerateMHTML().
void OnMHTMLGenerated(int64 size);
// For testing only.
SavePackage(WebContents* web_contents,
const base::FilePath& file_full_path,
const base::FilePath& directory_full_path);
virtual ~SavePackage();
// Notes from Init() above applies here as well.
void InternalInit();
void Stop();
void CheckFinish();
void SaveNextFile(bool process_all_remainder_items);
void DoSavingProcess();
// WebContentsObserver implementation.
virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE;
// DownloadItem::Observer implementation.
virtual void OnDownloadDestroyed(DownloadItem* download) OVERRIDE;
// Update the download history of this item upon completion.
void FinalizeDownloadEntry();
// Detach from DownloadManager.
void StopObservation();
// Return max length of a path for a specific base directory.
// This is needed on POSIX, which restrict the length of file names in
// addition to the restriction on the length of path names.
// |base_dir| is assumed to be a directory name with no trailing slash.
static uint32 GetMaxPathLengthForDirectory(const base::FilePath& base_dir);
static bool GetSafePureFileName(
const base::FilePath& dir_path,
const base::FilePath::StringType& file_name_ext,
uint32 max_file_path_len,
base::FilePath::StringType* pure_file_name);
// Create a file name based on the response from the server.
bool GenerateFileName(const std::string& disposition,
const GURL& url,
bool need_html_ext,
base::FilePath::StringType* generated_name);
// Get all savable resource links from current web page, include main
// frame and sub-frame.
void GetAllSavableResourceLinksForCurrentPage();
// Get html data by serializing all frames of current page with lists
// which contain all resource links that have local copy.
void GetSerializedHtmlDataForCurrentPageWithLocalLinks();
// Look up SaveItem by save id from in progress map.
SaveItem* LookupItemInProcessBySaveId(int32 save_id);
// Remove SaveItem from in progress map and put it to saved map.
void PutInProgressItemToSavedMap(SaveItem* save_item);
// Retrieves the URL to be saved from the WebContents.
GURL GetUrlToBeSaved();
void CreateDirectoryOnFileThread(const base::FilePath& website_save_dir,
const base::FilePath& download_save_dir,
bool skip_dir_check,
const std::string& mime_type,
const std::string& accept_langs);
void ContinueGetSaveInfo(const base::FilePath& suggested_path,
bool can_save_as_complete);
void OnPathPicked(
const base::FilePath& final_name,
SavePageType type,
const SavePackageDownloadCreatedCallback& cb);
void OnReceivedSavableResourceLinksForCurrentPage(
const std::vector<GURL>& resources_list,
const std::vector<Referrer>& referrers_list,
const std::vector<GURL>& frames_list);
void OnReceivedSerializedHtmlData(const GURL& frame_url,
const std::string& data,
int32 status);
typedef base::hash_map<std::string, SaveItem*> SaveUrlItemMap;
// in_progress_items_ is map of all saving job in in-progress state.
SaveUrlItemMap in_progress_items_;
// saved_failed_items_ is map of all saving job which are failed.
SaveUrlItemMap saved_failed_items_;
// The number of in process SaveItems.
int in_process_count() const {
return static_cast<int>(in_progress_items_.size());
}
// The number of all SaveItems which have completed, including success items
// and failed items.
int completed_count() const {
return static_cast<int>(saved_success_items_.size() +
saved_failed_items_.size());
}
// The current speed in files per second. This is used to update the
// DownloadItem associated to this SavePackage. The files per second is
// presented by the DownloadItem to the UI as bytes per second, which is
// not correct but matches the way the total and received number of files is
// presented as the total and received bytes.
int64 CurrentSpeed() const;
// Helper function for preparing suggested name for the SaveAs Dialog. The
// suggested name is determined by the web document's title.
base::FilePath GetSuggestedNameForSaveAs(
bool can_save_as_complete,
const std::string& contents_mime_type,
const std::string& accept_langs);
// Ensures that the file name has a proper extension for HTML by adding ".htm"
// if necessary.
static base::FilePath EnsureHtmlExtension(const base::FilePath& name);
// Ensures that the file name has a proper extension for supported formats
// if necessary.
static base::FilePath EnsureMimeExtension(const base::FilePath& name,
const std::string& contents_mime_type);
// Returns extension for supported MIME types (for example, for "text/plain"
// it returns "txt").
static const base::FilePath::CharType* ExtensionForMimeType(
const std::string& contents_mime_type);
typedef std::queue<SaveItem*> SaveItemQueue;
// A queue for items we are about to start saving.
SaveItemQueue waiting_item_queue_;
typedef base::hash_map<int32, SaveItem*> SavedItemMap;
// saved_success_items_ is map of all saving job which are successfully saved.
SavedItemMap saved_success_items_;
// Non-owning pointer for handling file writing on the file thread.
SaveFileManager* file_manager_;
// DownloadManager owns the DownloadItem and handles history and UI.
DownloadManagerImpl* download_manager_;
DownloadItemImpl* download_;
// The URL of the page the user wants to save.
GURL page_url_;
base::FilePath saved_main_file_path_;
base::FilePath saved_main_directory_path_;
// The title of the page the user wants to save.
base::string16 title_;
// Used to calculate package download speed (in files per second).
base::TimeTicks start_tick_;
// Indicates whether the actual saving job is finishing or not.
bool finished_;
// Indicates whether a call to Finish() has been scheduled.
bool mhtml_finishing_;
// Indicates whether user canceled the saving job.
bool user_canceled_;
// Indicates whether user get disk error.
bool disk_error_occurred_;
// Type about saving page as only-html or complete-html.
SavePageType save_type_;
// Number of all need to be saved resources.
size_t all_save_items_count_;
typedef std::set<base::FilePath::StringType,
bool (*)(const base::FilePath::StringType&,
const base::FilePath::StringType&)> FileNameSet;
// This set is used to eliminate duplicated file names in saving directory.
FileNameSet file_name_set_;
typedef base::hash_map<base::FilePath::StringType, uint32> FileNameCountMap;
// This map is used to track serial number for specified filename.
FileNameCountMap file_name_count_map_;
// Indicates current waiting state when SavePackage try to get something
// from outside.
WaitState wait_state_;
// Since for one contents, it can only have one SavePackage in same time.
// Now we actually use render_process_id as the contents's unique id.
const int contents_id_;
// Unique ID for this SavePackage.
const int unique_id_;
// Variables to record errors that happened so we can record them via
// UMA statistics.
bool wrote_to_completed_file_;
bool wrote_to_failed_file_;
friend class SavePackageTest;
FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestSuggestedSaveNames);
FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestLongSafePureFilename);
DISALLOW_COPY_AND_ASSIGN(SavePackage);
};
} // namespace content
#endif // CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_