//===- FuzzerInternal.h - Internal header for the Fuzzer --------*- C++ -* ===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // Define the main class fuzzer::Fuzzer and most functions. //===----------------------------------------------------------------------===// #ifndef LLVM_FUZZER_INTERNAL_H #define LLVM_FUZZER_INTERNAL_H #include <algorithm> #include <atomic> #include <cassert> #include <chrono> #include <climits> #include <cstddef> #include <cstdlib> #include <random> #include <string.h> #include <string> #include <unordered_set> #include <vector> #include "FuzzerExtFunctions.h" #include "FuzzerInterface.h" #include "FuzzerTracePC.h" // Platform detection. #ifdef __linux__ #define LIBFUZZER_LINUX 1 #define LIBFUZZER_APPLE 0 #elif __APPLE__ #define LIBFUZZER_LINUX 0 #define LIBFUZZER_APPLE 1 #else #error "Support for your platform has not been implemented" #endif namespace fuzzer { typedef int (*UserCallback)(const uint8_t *Data, size_t Size); int FuzzerDriver(int *argc, char ***argv, UserCallback Callback); using namespace std::chrono; typedef std::vector<uint8_t> Unit; typedef std::vector<Unit> UnitVector; // A simple POD sized array of bytes. template <size_t kMaxSize> class FixedWord { public: FixedWord() {} FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); } void Set(const uint8_t *B, uint8_t S) { assert(S <= kMaxSize); memcpy(Data, B, S); Size = S; } bool operator==(const FixedWord<kMaxSize> &w) const { return Size == w.Size && 0 == memcmp(Data, w.Data, Size); } bool operator<(const FixedWord<kMaxSize> &w) const { if (Size != w.Size) return Size < w.Size; return memcmp(Data, w.Data, Size) < 0; } static size_t GetMaxSize() { return kMaxSize; } const uint8_t *data() const { return Data; } uint8_t size() const { return Size; } private: uint8_t Size = 0; uint8_t Data[kMaxSize]; }; typedef FixedWord<27> Word; // 28 bytes. bool IsFile(const std::string &Path); std::string FileToString(const std::string &Path); Unit FileToVector(const std::string &Path, size_t MaxSize = 0); void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V, long *Epoch, size_t MaxSize); void WriteToFile(const Unit &U, const std::string &Path); void CopyFileToErr(const std::string &Path); // Returns "Dir/FileName" or equivalent for the current OS. std::string DirPlusFile(const std::string &DirPath, const std::string &FileName); void DupAndCloseStderr(); void CloseStdout(); void Printf(const char *Fmt, ...); void PrintHexArray(const Unit &U, const char *PrintAfter = ""); void PrintHexArray(const uint8_t *Data, size_t Size, const char *PrintAfter = ""); void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter = ""); void PrintASCII(const Unit &U, const char *PrintAfter = ""); void PrintASCII(const Word &W, const char *PrintAfter = ""); std::string Hash(const Unit &U); void SetTimer(int Seconds); void SetSigSegvHandler(); void SetSigBusHandler(); void SetSigAbrtHandler(); void SetSigIllHandler(); void SetSigFpeHandler(); void SetSigIntHandler(); void SetSigTermHandler(); std::string Base64(const Unit &U); int ExecuteCommand(const std::string &Command); size_t GetPeakRSSMb(); // Private copy of SHA1 implementation. static const int kSHA1NumBytes = 20; // Computes SHA1 hash of 'Len' bytes in 'Data', writes kSHA1NumBytes to 'Out'. void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out); // Changes U to contain only ASCII (isprint+isspace) characters. // Returns true iff U has been changed. bool ToASCII(uint8_t *Data, size_t Size); bool IsASCII(const Unit &U); bool IsASCII(const uint8_t *Data, size_t Size); int NumberOfCpuCores(); int GetPid(); void SleepSeconds(int Seconds); class Random { public: Random(unsigned int seed) : R(seed) {} size_t Rand() { return R(); } size_t RandBool() { return Rand() % 2; } size_t operator()(size_t n) { return n ? Rand() % n : 0; } std::mt19937 &Get_mt19937() { return R; } private: std::mt19937 R; }; // Dictionary. // Parses one dictionary entry. // If successfull, write the enty to Unit and returns true, // otherwise returns false. bool ParseOneDictionaryEntry(const std::string &Str, Unit *U); // Parses the dictionary file, fills Units, returns true iff all lines // were parsed succesfully. bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units); class DictionaryEntry { public: DictionaryEntry() {} DictionaryEntry(Word W) : W(W) {} DictionaryEntry(Word W, size_t PositionHint) : W(W), PositionHint(PositionHint) {} const Word &GetW() const { return W; } bool HasPositionHint() const { return PositionHint != std::numeric_limits<size_t>::max(); } size_t GetPositionHint() const { assert(HasPositionHint()); return PositionHint; } void IncUseCount() { UseCount++; } void IncSuccessCount() { SuccessCount++; } size_t GetUseCount() const { return UseCount; } size_t GetSuccessCount() const {return SuccessCount; } private: Word W; size_t PositionHint = std::numeric_limits<size_t>::max(); size_t UseCount = 0; size_t SuccessCount = 0; }; class Dictionary { public: static const size_t kMaxDictSize = 1 << 14; bool ContainsWord(const Word &W) const { return std::any_of(begin(), end(), [&](const DictionaryEntry &DE) { return DE.GetW() == W; }); } const DictionaryEntry *begin() const { return &DE[0]; } const DictionaryEntry *end() const { return begin() + Size; } DictionaryEntry & operator[] (size_t Idx) { assert(Idx < Size); return DE[Idx]; } void push_back(DictionaryEntry DE) { if (Size < kMaxDictSize) this->DE[Size++] = DE; } void clear() { Size = 0; } bool empty() const { return Size == 0; } size_t size() const { return Size; } private: DictionaryEntry DE[kMaxDictSize]; size_t Size = 0; }; struct FuzzingOptions { int Verbosity = 1; size_t MaxLen = 0; int UnitTimeoutSec = 300; int TimeoutExitCode = 77; int ErrorExitCode = 77; int MaxTotalTimeSec = 0; int RssLimitMb = 0; bool DoCrossOver = true; int MutateDepth = 5; bool UseCounters = false; bool UseIndirCalls = true; bool UseTraces = false; bool UseMemcmp = true; bool UseFullCoverageSet = false; bool Reload = true; bool ShuffleAtStartUp = true; bool PreferSmall = true; size_t MaxNumberOfRuns = ULONG_MAX; int ReportSlowUnits = 10; bool OnlyASCII = false; std::string OutputCorpus; std::string ArtifactPrefix = "./"; std::string ExactArtifactPath; bool SaveArtifacts = true; bool PrintNEW = true; // Print a status line when new units are found; bool OutputCSV = false; bool PrintNewCovPcs = false; bool PrintFinalStats = false; bool DetectLeaks = true; bool TruncateUnits = false; bool PruneCorpus = true; }; class MutationDispatcher { public: MutationDispatcher(Random &Rand, const FuzzingOptions &Options); ~MutationDispatcher() {} /// Indicate that we are about to start a new sequence of mutations. void StartMutationSequence(); /// Print the current sequence of mutations. void PrintMutationSequence(); /// Indicate that the current sequence of mutations was successfull. void RecordSuccessfulMutationSequence(); /// Mutates data by invoking user-provided mutator. size_t Mutate_Custom(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by invoking user-provided crossover. size_t Mutate_CustomCrossOver(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by shuffling bytes. size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by erasing a byte. size_t Mutate_EraseByte(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by inserting a byte. size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by chanding one byte. size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by chanding one bit. size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by adding a word from the manual dictionary. size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by adding a word from the temporary automatic dictionary. size_t Mutate_AddWordFromTemporaryAutoDictionary(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by adding a word from the persistent automatic dictionary. size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size, size_t MaxSize); /// Tries to find an ASCII integer in Data, changes it to another ASCII int. size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize); /// CrossOver Data with some other element of the corpus. size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize); /// Applies one of the configured mutations. /// Returns the new size of data which could be up to MaxSize. size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize); /// Applies one of the default mutations. Provided as a service /// to mutation authors. size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize); /// Creates a cross-over of two pieces of Data, returns its size. size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, size_t Size2, uint8_t *Out, size_t MaxOutSize); void AddWordToManualDictionary(const Word &W); void AddWordToAutoDictionary(const Word &W, size_t PositionHint); void ClearAutoDictionary(); void PrintRecommendedDictionary(); void SetCorpus(const std::vector<Unit> *Corpus) { this->Corpus = Corpus; } Random &GetRand() { return Rand; } private: struct Mutator { size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max); const char *Name; }; size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size, size_t MaxSize); size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize, const std::vector<Mutator> &Mutators); Random &Rand; const FuzzingOptions Options; // Dictionary provided by the user via -dict=DICT_FILE. Dictionary ManualDictionary; // Temporary dictionary modified by the fuzzer itself, // recreated periodically. Dictionary TempAutoDictionary; // Persistent dictionary modified by the fuzzer, consists of // entries that led to successfull discoveries in the past mutations. Dictionary PersistentAutoDictionary; std::vector<Mutator> CurrentMutatorSequence; std::vector<DictionaryEntry *> CurrentDictionaryEntrySequence; const std::vector<Unit> *Corpus = nullptr; std::vector<uint8_t> MutateInPlaceHere; std::vector<Mutator> Mutators; std::vector<Mutator> DefaultMutators; }; class Fuzzer { public: // Aggregates all available coverage measurements. struct Coverage { Coverage() { Reset(); } void Reset() { BlockCoverage = 0; CallerCalleeCoverage = 0; PcMapBits = 0; CounterBitmapBits = 0; PcBufferLen = 0; CounterBitmap.clear(); PCMap.Reset(); } std::string DebugString() const; size_t BlockCoverage; size_t CallerCalleeCoverage; size_t PcBufferLen; // Precalculated number of bits in CounterBitmap. size_t CounterBitmapBits; std::vector<uint8_t> CounterBitmap; // Precalculated number of bits in PCMap. size_t PcMapBits; PcCoverageMap PCMap; }; Fuzzer(UserCallback CB, MutationDispatcher &MD, FuzzingOptions Options); void AddToCorpus(const Unit &U) { Corpus.push_back(U); UpdateCorpusDistribution(); } size_t ChooseUnitIdxToMutate(); const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; }; void TruncateUnits(std::vector<Unit> *NewCorpus); void Loop(); void Drill(); void ShuffleAndMinimize(); void InitializeTraceState(); void AssignTaintLabels(uint8_t *Data, size_t Size); size_t CorpusSize() const { return Corpus.size(); } size_t MaxUnitSizeInCorpus() const; void ReadDir(const std::string &Path, long *Epoch, size_t MaxSize) { Printf("Loading corpus: %s\n", Path.c_str()); ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch, MaxSize); } void RereadOutputCorpus(size_t MaxSize); // Save the current corpus to OutputCorpus. void SaveCorpus(); size_t secondsSinceProcessStartUp() { return duration_cast<seconds>(system_clock::now() - ProcessStartTime) .count(); } size_t execPerSec() { size_t Seconds = secondsSinceProcessStartUp(); return Seconds ? TotalNumberOfRuns / Seconds : 0; } size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; } static void StaticAlarmCallback(); static void StaticCrashSignalCallback(); static void StaticInterruptCallback(); void ExecuteCallback(const uint8_t *Data, size_t Size); bool RunOne(const uint8_t *Data, size_t Size); // Merge Corpora[1:] into Corpora[0]. void Merge(const std::vector<std::string> &Corpora); // Returns a subset of 'Extra' that adds coverage to 'Initial'. UnitVector FindExtraUnits(const UnitVector &Initial, const UnitVector &Extra); MutationDispatcher &GetMD() { return MD; } void PrintFinalStats(); void SetMaxLen(size_t MaxLen); void RssLimitCallback(); // Public for tests. void ResetCoverage(); bool InFuzzingThread() const { return IsMyThread; } size_t GetCurrentUnitInFuzzingThead(const uint8_t **Data) const; private: void AlarmCallback(); void CrashCallback(); void InterruptCallback(); void MutateAndTestOne(); void ReportNewCoverage(const Unit &U); bool RunOne(const Unit &U) { return RunOne(U.data(), U.size()); } void RunOneAndUpdateCorpus(const uint8_t *Data, size_t Size); void WriteToOutputCorpus(const Unit &U); void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); void PrintStats(const char *Where, const char *End = "\n"); void PrintStatusForNewUnit(const Unit &U); void ShuffleCorpus(UnitVector *V); void TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size, bool DuringInitialCorpusExecution); // Updates the probability distribution for the units in the corpus. // Must be called whenever the corpus or unit weights are changed. void UpdateCorpusDistribution(); bool UpdateMaxCoverage(); // Trace-based fuzzing: we run a unit with some kind of tracing // enabled and record potentially useful mutations. Then // We apply these mutations one by one to the unit and run it again. // Start tracing; forget all previously proposed mutations. void StartTraceRecording(); // Stop tracing. void StopTraceRecording(); void SetDeathCallback(); static void StaticDeathCallback(); void DumpCurrentUnit(const char *Prefix); void DeathCallback(); void LazyAllocateCurrentUnitData(); uint8_t *CurrentUnitData = nullptr; std::atomic<size_t> CurrentUnitSize; size_t TotalNumberOfRuns = 0; size_t NumberOfNewUnitsAdded = 0; bool HasMoreMallocsThanFrees = false; size_t NumberOfLeakDetectionAttempts = 0; std::vector<Unit> Corpus; std::unordered_set<std::string> UnitHashesAddedToCorpus; std::piecewise_constant_distribution<double> CorpusDistribution; UserCallback CB; MutationDispatcher &MD; FuzzingOptions Options; system_clock::time_point ProcessStartTime = system_clock::now(); system_clock::time_point UnitStartTime; long TimeOfLongestUnitInSeconds = 0; long EpochOfLastReadOfOutputCorpus = 0; // Maximum recorded coverage. Coverage MaxCoverage; // Need to know our own thread. static thread_local bool IsMyThread; }; // Global interface to functions that may or may not be available. extern ExternalFunctions *EF; }; // namespace fuzzer #endif // LLVM_FUZZER_INTERNAL_H