// Copyright 2014 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "extensions/browser/content_hash_fetcher.h" #include <algorithm> #include "base/base64.h" #include "base/files/file_enumerator.h" #include "base/files/file_util.h" #include "base/json/json_reader.h" #include "base/memory/ref_counted.h" #include "base/metrics/histogram.h" #include "base/synchronization/lock.h" #include "base/task_runner_util.h" #include "base/timer/elapsed_timer.h" #include "base/version.h" #include "content/public/browser/browser_context.h" #include "content/public/browser/browser_thread.h" #include "crypto/sha2.h" #include "extensions/browser/computed_hashes.h" #include "extensions/browser/content_hash_tree.h" #include "extensions/browser/content_verifier_delegate.h" #include "extensions/browser/verified_contents.h" #include "extensions/common/constants.h" #include "extensions/common/extension.h" #include "extensions/common/file_util.h" #include "net/base/load_flags.h" #include "net/url_request/url_fetcher.h" #include "net/url_request/url_fetcher_delegate.h" #include "net/url_request/url_request_status.h" namespace { typedef std::set<base::FilePath> SortedFilePathSet; } // namespace namespace extensions { // This class takes care of doing the disk and network I/O work to ensure we // have both verified_contents.json files from the webstore and // computed_hashes.json files computed over the files in an extension's // directory. class ContentHashFetcherJob : public base::RefCountedThreadSafe<ContentHashFetcherJob>, public net::URLFetcherDelegate { public: typedef base::Callback<void(ContentHashFetcherJob*)> CompletionCallback; ContentHashFetcherJob(net::URLRequestContextGetter* request_context, const ContentVerifierKey& key, const std::string& extension_id, const base::FilePath& extension_path, const GURL& fetch_url, bool force, const CompletionCallback& callback); void Start(); // Cancels this job, which will attempt to stop I/O operations sooner than // just waiting for the entire job to complete. Safe to call from any thread. void Cancel(); // Checks whether this job has been cancelled. Safe to call from any thread. bool IsCancelled(); // Returns whether this job was successful (we have both verified contents // and computed hashes). Even if the job was a success, there might have been // files that were found to have contents not matching expectations; these // are available by calling hash_mismatch_paths(). bool success() { return success_; } bool force() { return force_; } const std::string& extension_id() { return extension_id_; } // Returns the set of paths that had a hash mismatch. const std::set<base::FilePath>& hash_mismatch_paths() { return hash_mismatch_paths_; } private: friend class base::RefCountedThreadSafe<ContentHashFetcherJob>; virtual ~ContentHashFetcherJob(); // Tries to load a verified_contents.json file at |path|. On successfully // reading and validing the file, the verified_contents_ member variable will // be set and this function will return true. If the file does not exist, or // exists but is invalid, it will return false. Also, any invalid // file will be removed from disk and bool LoadVerifiedContents(const base::FilePath& path); // Callback for when we're done doing file I/O to see if we already have // a verified contents file. If we don't, this will kick off a network // request to get one. void DoneCheckingForVerifiedContents(bool found); // URLFetcherDelegate interface virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE; // Callback for when we're done ensuring we have verified contents, and are // ready to move on to MaybeCreateHashes. void DoneFetchingVerifiedContents(bool success); // Callback for the job to write the verified contents to the filesystem. void OnVerifiedContentsWritten(size_t expected_size, int write_result); // The verified contents file from the webstore only contains the treehash // root hash, but for performance we want to cache the individual block level // hashes. This function will create that cache with block-level hashes for // each file in the extension if needed (the treehash root hash for each of // these should equal what is in the verified contents file from the // webstore). void MaybeCreateHashes(); // Computes hashes for all files in |extension_path_|, and uses a // ComputedHashes::Writer to write that information into // |hashes_file|. Returns true on success. bool CreateHashes(const base::FilePath& hashes_file); // Will call the callback, if we haven't been cancelled. void DispatchCallback(); net::URLRequestContextGetter* request_context_; std::string extension_id_; base::FilePath extension_path_; // The url we'll need to use to fetch a verified_contents.json file. GURL fetch_url_; bool force_; CompletionCallback callback_; content::BrowserThread::ID creation_thread_; // Used for fetching content signatures. scoped_ptr<net::URLFetcher> url_fetcher_; // The key used to validate verified_contents.json. ContentVerifierKey key_; // The parsed contents of the verified_contents.json file, either read from // disk or fetched from the network and then written to disk. scoped_ptr<VerifiedContents> verified_contents_; // Whether this job succeeded. bool success_; // Paths that were found to have a mismatching hash. std::set<base::FilePath> hash_mismatch_paths_; // The block size to use for hashing. int block_size_; // Note: this may be accessed from multiple threads, so all access should // be protected by |cancelled_lock_|. bool cancelled_; // A lock for synchronizing access to |cancelled_|. base::Lock cancelled_lock_; DISALLOW_COPY_AND_ASSIGN(ContentHashFetcherJob); }; ContentHashFetcherJob::ContentHashFetcherJob( net::URLRequestContextGetter* request_context, const ContentVerifierKey& key, const std::string& extension_id, const base::FilePath& extension_path, const GURL& fetch_url, bool force, const CompletionCallback& callback) : request_context_(request_context), extension_id_(extension_id), extension_path_(extension_path), fetch_url_(fetch_url), force_(force), callback_(callback), key_(key), success_(false), // TODO(asargent) - use the value from verified_contents.json for each // file, instead of using a constant. block_size_(4096), cancelled_(false) { bool got_id = content::BrowserThread::GetCurrentThreadIdentifier(&creation_thread_); DCHECK(got_id); } void ContentHashFetcherJob::Start() { base::FilePath verified_contents_path = file_util::GetVerifiedContentsPath(extension_path_); base::PostTaskAndReplyWithResult( content::BrowserThread::GetBlockingPool(), FROM_HERE, base::Bind(&ContentHashFetcherJob::LoadVerifiedContents, this, verified_contents_path), base::Bind(&ContentHashFetcherJob::DoneCheckingForVerifiedContents, this)); } void ContentHashFetcherJob::Cancel() { base::AutoLock autolock(cancelled_lock_); cancelled_ = true; } bool ContentHashFetcherJob::IsCancelled() { base::AutoLock autolock(cancelled_lock_); bool result = cancelled_; return result; } ContentHashFetcherJob::~ContentHashFetcherJob() { } bool ContentHashFetcherJob::LoadVerifiedContents(const base::FilePath& path) { if (!base::PathExists(path)) return false; verified_contents_.reset(new VerifiedContents(key_.data, key_.size)); if (!verified_contents_->InitFrom(path, false)) { verified_contents_.reset(); if (!base::DeleteFile(path, false)) LOG(WARNING) << "Failed to delete " << path.value(); return false; } return true; } void ContentHashFetcherJob::DoneCheckingForVerifiedContents(bool found) { if (IsCancelled()) return; if (found) { VLOG(1) << "Found verified contents for " << extension_id_; DoneFetchingVerifiedContents(true); } else { VLOG(1) << "Missing verified contents for " << extension_id_ << ", fetching..."; url_fetcher_.reset( net::URLFetcher::Create(fetch_url_, net::URLFetcher::GET, this)); url_fetcher_->SetRequestContext(request_context_); url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES | net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DISABLE_CACHE); url_fetcher_->SetAutomaticallyRetryOnNetworkChanges(3); url_fetcher_->Start(); } } // Helper function to let us pass ownership of a string via base::Bind with the // contents to be written into a file. Also ensures that the directory for // |path| exists, creating it if needed. static int WriteFileHelper(const base::FilePath& path, scoped_ptr<std::string> content) { base::FilePath dir = path.DirName(); return (base::CreateDirectoryAndGetError(dir, NULL) && base::WriteFile(path, content->data(), content->size())); } void ContentHashFetcherJob::OnURLFetchComplete(const net::URLFetcher* source) { VLOG(1) << "URLFetchComplete for " << extension_id_ << " is_success:" << url_fetcher_->GetStatus().is_success() << " " << fetch_url_.possibly_invalid_spec(); if (IsCancelled()) return; scoped_ptr<std::string> response(new std::string); if (!url_fetcher_->GetStatus().is_success() || !url_fetcher_->GetResponseAsString(response.get())) { DoneFetchingVerifiedContents(false); return; } // Parse the response to make sure it is valid json (on staging sometimes it // can be a login redirect html, xml file, etc. if you aren't logged in with // the right cookies). TODO(asargent) - It would be a nice enhancement to // move to parsing this in a sandboxed helper (crbug.com/372878). scoped_ptr<base::Value> parsed(base::JSONReader::Read(*response)); if (parsed) { VLOG(1) << "JSON parsed ok for " << extension_id_; parsed.reset(); // no longer needed base::FilePath destination = file_util::GetVerifiedContentsPath(extension_path_); size_t size = response->size(); base::PostTaskAndReplyWithResult( content::BrowserThread::GetBlockingPool(), FROM_HERE, base::Bind(&WriteFileHelper, destination, base::Passed(&response)), base::Bind( &ContentHashFetcherJob::OnVerifiedContentsWritten, this, size)); } else { DoneFetchingVerifiedContents(false); } } void ContentHashFetcherJob::OnVerifiedContentsWritten(size_t expected_size, int write_result) { bool success = (write_result >= 0 && static_cast<size_t>(write_result) == expected_size); DoneFetchingVerifiedContents(success); } void ContentHashFetcherJob::DoneFetchingVerifiedContents(bool success) { if (IsCancelled()) return; if (!success) { DispatchCallback(); return; } content::BrowserThread::PostBlockingPoolSequencedTask( "ContentHashFetcher", FROM_HERE, base::Bind(&ContentHashFetcherJob::MaybeCreateHashes, this)); } void ContentHashFetcherJob::MaybeCreateHashes() { if (IsCancelled()) return; base::FilePath hashes_file = file_util::GetComputedHashesPath(extension_path_); if (!force_ && base::PathExists(hashes_file)) { success_ = true; } else { if (force_) base::DeleteFile(hashes_file, false /* recursive */); success_ = CreateHashes(hashes_file); } content::BrowserThread::PostTask( creation_thread_, FROM_HERE, base::Bind(&ContentHashFetcherJob::DispatchCallback, this)); } bool ContentHashFetcherJob::CreateHashes(const base::FilePath& hashes_file) { base::ElapsedTimer timer; if (IsCancelled()) return false; // Make sure the directory exists. if (!base::CreateDirectoryAndGetError(hashes_file.DirName(), NULL)) return false; if (!verified_contents_.get()) { base::FilePath verified_contents_path = file_util::GetVerifiedContentsPath(extension_path_); verified_contents_.reset(new VerifiedContents(key_.data, key_.size)); if (!verified_contents_->InitFrom(verified_contents_path, false)) return false; verified_contents_.reset(); } base::FileEnumerator enumerator(extension_path_, true, /* recursive */ base::FileEnumerator::FILES); // First discover all the file paths and put them in a sorted set. SortedFilePathSet paths; for (;;) { if (IsCancelled()) return false; base::FilePath full_path = enumerator.Next(); if (full_path.empty()) break; paths.insert(full_path); } // Now iterate over all the paths in sorted order and compute the block hashes // for each one. ComputedHashes::Writer writer; for (SortedFilePathSet::iterator i = paths.begin(); i != paths.end(); ++i) { if (IsCancelled()) return false; const base::FilePath& full_path = *i; base::FilePath relative_path; extension_path_.AppendRelativePath(full_path, &relative_path); relative_path = relative_path.NormalizePathSeparatorsTo('/'); if (!verified_contents_->HasTreeHashRoot(relative_path)) continue; std::string contents; if (!base::ReadFileToString(full_path, &contents)) { LOG(ERROR) << "Could not read " << full_path.MaybeAsASCII(); continue; } // Iterate through taking the hash of each block of size (block_size_) of // the file. std::vector<std::string> hashes; ComputedHashes::ComputeHashesForContent(contents, block_size_, &hashes); std::string root = ComputeTreeHashRoot(hashes, block_size_ / crypto::kSHA256Length); if (!verified_contents_->TreeHashRootEquals(relative_path, root)) { VLOG(1) << "content mismatch for " << relative_path.AsUTF8Unsafe(); hash_mismatch_paths_.insert(relative_path); continue; } writer.AddHashes(relative_path, block_size_, hashes); } bool result = writer.WriteToFile(hashes_file); UMA_HISTOGRAM_TIMES("ExtensionContentHashFetcher.CreateHashesTime", timer.Elapsed()); return result; } void ContentHashFetcherJob::DispatchCallback() { { base::AutoLock autolock(cancelled_lock_); if (cancelled_) return; } callback_.Run(this); } // ---- ContentHashFetcher::ContentHashFetcher(content::BrowserContext* context, ContentVerifierDelegate* delegate, const FetchCallback& callback) : context_(context), delegate_(delegate), fetch_callback_(callback), weak_ptr_factory_(this) { } ContentHashFetcher::~ContentHashFetcher() { for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) { i->second->Cancel(); } } void ContentHashFetcher::DoFetch(const Extension* extension, bool force) { DCHECK(extension); IdAndVersion key(extension->id(), extension->version()->GetString()); JobMap::iterator found = jobs_.find(key); if (found != jobs_.end()) { if (!force || found->second->force()) { // Just let the existing job keep running. return; } else { // Kill the existing non-force job, so we can start a new one below. found->second->Cancel(); jobs_.erase(found); } } // TODO(asargent) - we should do something here to remember recent attempts // to fetch signatures by extension id, and use exponential backoff to avoid // hammering the server when we aren't successful in getting them. // crbug.com/373397 DCHECK(extension->version()); GURL url = delegate_->GetSignatureFetchUrl(extension->id(), *extension->version()); ContentHashFetcherJob* job = new ContentHashFetcherJob(context_->GetRequestContext(), delegate_->PublicKey(), extension->id(), extension->path(), url, force, base::Bind(&ContentHashFetcher::JobFinished, weak_ptr_factory_.GetWeakPtr())); jobs_.insert(std::make_pair(key, job)); job->Start(); } void ContentHashFetcher::ExtensionLoaded(const Extension* extension) { CHECK(extension); DoFetch(extension, false); } void ContentHashFetcher::ExtensionUnloaded(const Extension* extension) { CHECK(extension); IdAndVersion key(extension->id(), extension->version()->GetString()); JobMap::iterator found = jobs_.find(key); if (found != jobs_.end()) { found->second->Cancel(); jobs_.erase(found); } } void ContentHashFetcher::JobFinished(ContentHashFetcherJob* job) { if (!job->IsCancelled()) { fetch_callback_.Run(job->extension_id(), job->success(), job->force(), job->hash_mismatch_paths()); } for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) { if (i->second.get() == job) { jobs_.erase(i); break; } } } } // namespace extensions