C++程序  |  701行  |  23.79 KB

/*
 *
 * honggfuzz - sanitizer coverage feedback parsing
 * -----------------------------------------------
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License. You may obtain
 * a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * permissions and limitations under the License.
 *
 */

/*
 * Clang sanitizer coverage (sancov) data parsing functions. Supported methods:
 * - raw unified data (preferred method)
 * - individual data per executable/DSO (not preferred since lots of data lost if instrumented
 *   code exits abnormally or with sanitizer unhandled signal (common in Android OS)
 *
 * For raw-unpack method a global (shared across workers) Trie is created for the chosen
 * initial seed and maintained until seed is replaced. Trie nodes store the loaded (as exposed
 * from *.sancov.map file) execs/DSOs from target application using the map name as key. Trie node
 * data struct (trieData_t) maintains information for each instrumented map including a bitmap with
 * all hit relative BB addresses (realBBAddr - baseAddr to circumvent ASLR). Map's bitmap is updated
 * while new areas on target application are discovered based on absolute elitism implemented at
 * fuzz_sanCovFeedback().
 *
 * For individual data files a pid (fuzzer's thread or remote process) based filename search is
 * performed to identify all files belonging to examined execution. This method doesn't implement
 * yet bitmap runtime data to detect newly discovered areas. It's mainly used so far as a comparison
 * metric for raw-unpack method and stability check for sancov experimental features such as
 * coverage counters: http://clang.llvm.org/docs/SanitizerCoverage.html
 */

#include "sancov.h"

#include <ctype.h>
#include <dirent.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>

#include "libcommon/common.h"
#include "libcommon/files.h"
#include "libcommon/log.h"
#include "libcommon/util.h"
#include "sanitizers.h"

/* sancov files magic values */
#define kMagic32 0xC0BFFFFFFFFFFF32
#define kMagic64 0xC0BFFFFFFFFFFF64

/*
 * Each DSO/executable that has been compiled with enabled coverage instrumentation
 * is detected from compiler_rt runtime library when loaded. When coverage_direct
 * method is selected, runtime library is pre-allocating kPcArrayMmapSize [1] byte
 * chunks until the total size of chunks is greater than the number of inserted
 * guards. This effectively means that we might have a large unused (zero-filled)
 * area that we can't identify at runtime (we need to do binary inspection).
 *
 * Runtime maintained data structs size overhead is not affected since fixed-size
 * bitmap is used. However, the way the display coverage statistics are generated
 * is not very accurate because:
 *  a) ASan compiled DSO might get loaded although not followed from monitoring
       execution affecting the counters
 *  b) Not all zero-fill chunks translate into non-hit basic block as they might
 *     be the chunk padding
 *
 * Probably there aren't many we can do to deal with this issue without introducing
 * a huge performance overhead at an already costly feedback method.
 *
 * [1]
 'https://llvm.org/svn/llvm-project/compiler-rt/branches/release_38/lib/sanitizer_common/sanitizer_coverage_libcdep.cc'
 */
#define kPcArrayMmapSize (64 * 1024)

/*
 * bitmap implementation
 */
static bitmap_t* sancov_newBitmap(uint32_t capacity) {
    bitmap_t* pBM = util_Malloc(sizeof(bitmap_t));
    pBM->capacity = capacity;
    pBM->nChunks = (capacity + 31) / 32;
    pBM->pChunks = util_Calloc(pBM->nChunks * sizeof(uint32_t));
    return pBM;
}

static inline bool sancov_queryBitmap(bitmap_t* pBM, uint32_t index) {
    if (index > pBM->capacity) {
        LOG_E("bitmap overflow (%u)", index);
        return false;
    }
    if (pBM->pChunks[index / 32] & (1 << (index % 32))) {
        return true;
    }
    return false;
}

static inline void sancov_setBitmap(bitmap_t* pBM, uint32_t index) {
    /* This will be removed. So far checks only to verify accepted ranges. */
    if (index >= pBM->capacity) {
        LOG_E("Out of range index (%u > %u)", index, pBM->capacity);
    }
    pBM->pChunks[index / 32] |= (1 << (index % 32));
}

static inline void sancov_destroyBitmap(bitmap_t* pBM) {
    free(pBM->pChunks);
    free(pBM);
}

/*
 * Trie implementation
 */
static node_t* sancov_trieCreateNode(char key) {
    node_t* node = (node_t*)util_Malloc(sizeof(node_t));
    node->key = key;
    node->next = NULL;
    node->children = NULL;
    node->parent = NULL;
    node->prev = NULL;

    /* Zero init node's data struct */
    memset(&node->data, 0, sizeof(trieData_t));
    return node;
}

static node_t* sancov_trieSearch(node_t* root, const char* key) {
    node_t *pNodeLevel = root, *pNodePtr = NULL;
    int nodeLevelId = 0;
    while (1) {
        node_t *pNodeFound = NULL, *pCurNode = NULL;
        for (pCurNode = pNodeLevel; pCurNode != NULL; pCurNode = pCurNode->next) {
            if (pCurNode->key == *key) {
                pNodeFound = pCurNode;
                nodeLevelId++;
                break;
            }
        }
        if (pNodeFound == NULL) {
            return NULL;
        }
        if (*key == '\0') {
            pNodePtr = pCurNode;
            return pNodePtr;
        }
        pNodeLevel = pNodeFound->children;
        key++;
    }
}

static void sancov_trieAdd(node_t** root, const char* key) {
    if (*root == NULL) {
        LOG_E("Invalid Trie (NULL root node)");
        return;
    }

    /* Traverse Trie */
    node_t* pTravNode = (*root)->children;
    if (pTravNode == NULL) {
        /* First node */
        for (pTravNode = *root; *key != '\0'; pTravNode = pTravNode->children) {
            pTravNode->children = sancov_trieCreateNode(*key);
            pTravNode->children->parent = pTravNode;
            key++;
        }
        pTravNode->children = sancov_trieCreateNode('\0');
        pTravNode->children->parent = pTravNode;
        return;
    }

    while (*key != '\0') {
        if (*key == pTravNode->key) {
            key++;
            pTravNode = pTravNode->children;
        } else {
            break;
        }
    }
    while (pTravNode->next) {
        if (*key == pTravNode->next->key) {
            key++;
            sancov_trieAdd(&(pTravNode->next), key);
            return;
        }
        pTravNode = pTravNode->next;
    }
    if (*key) {
        pTravNode->next = sancov_trieCreateNode(*key);
    } else {
        pTravNode->next = sancov_trieCreateNode(*key);
    }
    pTravNode->next->parent = pTravNode->parent;
    pTravNode->next->prev = pTravNode;
    if (!*key) {
        return;
    }
    key++;
    for (pTravNode = pTravNode->next; *key; pTravNode = pTravNode->children) {
        pTravNode->children = sancov_trieCreateNode(*key);
        pTravNode->children->parent = pTravNode;
        key++;
    }
    pTravNode->children = sancov_trieCreateNode('\0');
    pTravNode->children->parent = pTravNode;

    return;
}

static inline void sancov_trieFreeNode(node_t* node) {
    /* First destroy bitmap heap buffers allocated for instrumented maps */
    if (node->data.pBM) {
        sancov_destroyBitmap(node->data.pBM);
    }
    free(node);
}

static inline void sancov_trieCreate(node_t** root) {
    /* Create root node if new Trie */
    *root = sancov_trieCreateNode('\0');
}

/* Destroy Trie - iterate nodes and free memory */
UNUSED static void sancov_trieDestroy(node_t* root) {
    node_t* pNode = root;
    node_t* pNodeTmp = root;
    while (pNode) {
        while (pNode->children) {
            pNode = pNode->children;
        }

        if (pNode->prev && pNode->next) {
            pNodeTmp = pNode;
            pNode->next->prev = pNode->prev;
            pNode->prev->next = pNode->next;
            sancov_trieFreeNode(pNodeTmp);
        } else if (pNode->prev && !pNode->next) {
            pNodeTmp = pNode;
            pNode->prev->next = NULL;
            sancov_trieFreeNode(pNodeTmp);
        } else if (!pNode->prev && pNode->next) {
            pNodeTmp = pNode;
            pNode->parent->children = pNode->next;
            pNode->next->prev = NULL;
            pNode = pNode->next;
            sancov_trieFreeNode(pNodeTmp);
        } else {
            pNodeTmp = pNode;
            if (pNode->parent == NULL) {
                /* Root */
                sancov_trieFreeNode(pNodeTmp);
                return;
            }
            pNode = pNode->parent;
            pNode->children = NULL;
            sancov_trieFreeNode(pNodeTmp);
        }
    }
}

/* Modified interpolation search algorithm to search for nearest address fit */
static inline uint64_t sancov_interpSearch(uint64_t* buf, uint64_t size, uint64_t key) {
    /* Avoid extra checks assuming caller always provides non-zero array size */
    uint64_t low = 0;
    uint64_t high = size - 1;
    uint64_t mid = high;

    while (buf[high] != buf[low] && key >= buf[low] && key <= buf[high]) {
        mid = low + (key - buf[low]) * ((high - low) / (buf[high] - buf[low]));
        if (buf[mid] < key) {
            low = mid + 1;
        } else if (key < buf[mid]) {
            high = mid - 1;
        } else {
            return mid;
        }
    }
    return mid;
}

/* qsort struct comparison function (memMap_t struct start addr field) */
static int sancov_qsortCmp(const void* a, const void* b) {
    memMap_t* pA = (memMap_t*)a;
    memMap_t* pB = (memMap_t*)b;
    if (pA->start < pB->start) {
        return -1;
    } else if (pA->start > pB->start) {
        return 1;
    } else {
        /* Normally we should never hit that case */
        LOG_W("Duplicate map start addr detected");
        return 0;
    }
}

static bool sancov_sanCovParseRaw(run_t* run) {
    int dataFd = -1;
    uint8_t* dataBuf = NULL;
    off_t dataFileSz = 0, pos = 0;
    bool is32bit = true;
    char covFile[PATH_MAX] = {0};
    pid_t targetPid = (run->global->linux.pid > 0) ? run->global->linux.pid : run->pid;

    /* Fuzzer local runtime data structs - need free() before exit */
    uint64_t* startMapsIndex = NULL;
    memMap_t* mapsBuf = NULL;

    /* Local counters */
    uint64_t nBBs = 0;         /* Total BB hits found in raw file */
    uint64_t nZeroBBs = 0;     /* Number of non-hit instrumented BBs */
    uint64_t mapsNum = 0;      /* Total number of entries in map file */
    uint64_t noCovMapsNum = 0; /* Loaded DSOs not compiled with coverage */

    /* File line-by-line read help buffers */
    __block char* pLine = NULL;
    size_t lineSz = 0;

    /* Coverage data analysis starts by parsing map file listing */
    snprintf(covFile, sizeof(covFile), "%s/%s/%d.sancov.map", run->global->io.workDir,
        _HF_SANCOV_DIR, targetPid);
    if (!files_exists(covFile)) {
        LOG_D("sancov map file not found");
        return false;
    }
    FILE* fCovMap = fopen(covFile, "rb");
    if (fCovMap == NULL) {
        PLOG_E("Couldn't open '%s' - R/O mode", covFile);
        return false;
    }
    defer { fclose(fCovMap); };

    /* First line contains PC length (32/64-bit) */
    if (getline(&pLine, &lineSz, fCovMap) == -1) {
        LOG_E("Invalid map file '%s'", covFile);
        return false;
    }
    defer {
        free(pLine);
        pLine = NULL;
    };

    int pcLen = atoi(pLine);
    if (pcLen == 32) {
        is32bit = true;
    } else if (pcLen == 64) {
        is32bit = false;
    } else {
        LOG_E("Invalid PC length (%d) in map file '%s'", pcLen, covFile);
    }

    /* See if #maps is available from previous run to avoid realloc inside loop */
    uint64_t prevMapsNum = ATOMIC_GET(run->global->sanCovCnts.dsoCnt);
    if (prevMapsNum > 0) {
        mapsBuf = util_Malloc(prevMapsNum * sizeof(memMap_t));
    }
    /* It's OK to free(NULL) */
    defer { free(mapsBuf); };

    /* Iterate map entries */
    for (;;) {
        if (getline(&pLine, &lineSz, fCovMap) == -1) {
            break;
        }

        /* Trim trailing whitespaces, not sure if needed copied from upstream sancov.py */
        char* lineEnd = pLine + strlen(pLine) - 1;
        while (lineEnd > pLine && isspace((int)*lineEnd)) {
            lineEnd--;
        }
        *(lineEnd + 1) = 0;

        /*
         * Each line has following format:
         * Start    End      Base     bin/DSO name
         * b5843000 b584e6ac b5843000 liblog.so
         */
        memMap_t mapData = {.start = 0};
        char* savePtr = NULL;
        mapData.start = strtoull(strtok_r(pLine, " ", &savePtr), NULL, 16);
        mapData.end = strtoull(strtok_r(NULL, " ", &savePtr), NULL, 16);
        mapData.base = strtoull(strtok_r(NULL, " ", &savePtr), NULL, 16);
        char* mapName = strtok_r(NULL, " ", &savePtr);
        memcpy(mapData.mapName, mapName, strlen(mapName));

        /* Interaction with global Trie should mutex wrap to avoid threads races */
        {
            MX_SCOPED_LOCK(&run->global->sanCov_mutex);

            /* Add entry to Trie with zero data if not already */
            if (!sancov_trieSearch(run->global->covMetadata->children, mapData.mapName)) {
                sancov_trieAdd(&run->global->covMetadata, mapData.mapName);
            }
        }

        /* If no DSO number history (first run) or new DSO loaded, realloc local maps metadata buf
         */
        if (prevMapsNum == 0 || prevMapsNum < mapsNum) {
            if ((mapsBuf = util_Realloc(mapsBuf, (size_t)(mapsNum + 1) * sizeof(memMap_t))) ==
                NULL) {
                PLOG_E("realloc failed (sz=%" PRIu64 ")", (mapsNum + 1) * sizeof(memMap_t));
                return false;
            }
        }

        /* Add entry to local maps metadata array */
        memcpy(&mapsBuf[mapsNum], &mapData, sizeof(memMap_t));

        /* Increase loaded maps counter (includes non-instrumented DSOs too) */
        mapsNum++;
    }

    /* Delete .sancov.map file */
    if (run->global->linux.pid == 0 && run->global->persistent == false) {
        unlink(covFile);
    }

    /* Create a quick index array with maps start addresses */
    startMapsIndex = util_Malloc(mapsNum * sizeof(uint64_t));
    defer { free(startMapsIndex); };

    /* Sort quick maps index */
    qsort(mapsBuf, mapsNum, sizeof(memMap_t), sancov_qsortCmp);
    for (size_t i = 0; i < mapsNum; i++) {
        startMapsIndex[i] = mapsBuf[i].start;
    }

    /* mmap() .sancov.raw file */
    snprintf(covFile, sizeof(covFile), "%s/%s/%d.sancov.raw", run->global->io.workDir,
        _HF_SANCOV_DIR, targetPid);
    dataBuf = files_mapFile(covFile, &dataFileSz, &dataFd, false);
    if (dataBuf == NULL) {
        LOG_E("Couldn't open and map '%s' in R/O mode", covFile);
        return false;
    }
    defer {
        munmap(dataBuf, dataFileSz);
        close(dataFd);
    };

    /*
     * Avoid cost of size checks inside raw data read loop by defining the read function
     * & pivot size based on PC length.
     */
    uint64_t (*pReadRawBBAddrFunc)(const uint8_t*) = NULL;
    uint8_t pivot = 0;
    if (is32bit) {
        pReadRawBBAddrFunc = &util_getUINT32;
        pivot = 4;
    } else {
        pReadRawBBAddrFunc = &util_getUINT64;
        pivot = 8;
    }

    /*
     * Take advantage of data locality (next processed addr is very likely to belong
     * to same map) to avoid Trie node search for each read entry.
     */
    node_t* curMap = NULL;
    uint64_t prevIndex = 0;

    /* Iterate over data buffer containing list of hit BB addresses */
    while (pos < dataFileSz) {
        uint64_t bbAddr = pReadRawBBAddrFunc(dataBuf + pos);
        pos += pivot;
        /* Don't bother for zero BB addr (inserted checks without hit) */
        if (bbAddr == 0x0) {
            nZeroBBs++;
            continue;
        } else {
            /* Find best hit based on start addr & verify range for errors */
            uint64_t bestFit = sancov_interpSearch(startMapsIndex, mapsNum, bbAddr);
            if (bbAddr >= mapsBuf[bestFit].start && bbAddr < mapsBuf[bestFit].end) {
                /* Increase exe/DSO total BB counter */
                mapsBuf[bestFit].bbCnt++;

                /* Update current Trie node if map changed */
                if (curMap == NULL || (prevIndex != bestFit)) {
                    prevIndex = bestFit;

                    /* Interaction with global Trie should mutex wrap to avoid threads races */
                    {
                        MX_SCOPED_LOCK(&run->global->sanCov_mutex);

                        curMap = sancov_trieSearch(
                            run->global->covMetadata->children, mapsBuf[bestFit].mapName);
                        if (curMap == NULL) {
                            LOG_E("Corrupted Trie - '%s' not found", mapsBuf[bestFit].mapName);
                            continue;
                        }

                        /* Maintain bitmaps only for exec/DSOs with coverage enabled - allocate on
                         * first use */
                        if (curMap->data.pBM == NULL) {
                            LOG_D("Allocating bitmap for map '%s'", mapsBuf[bestFit].mapName);
                            curMap->data.pBM = sancov_newBitmap(_HF_SANCOV_BITMAP_SIZE);

                            /*
                             * If bitmap allocation failed, unset cached Trie node ptr
                             * to execute this selection branch again.
                             */
                            if (curMap->data.pBM == NULL) {
                                curMap = NULL;
                                continue;
                            }
                        }
                    }
                }

                /* If new relative BB addr update DSO's bitmap */
                uint32_t relAddr = (uint32_t)(bbAddr - mapsBuf[bestFit].base);
                if (!sancov_queryBitmap(curMap->data.pBM, relAddr)) {
                    /* Interaction with global Trie should mutex wrap to avoid threads races */
                    {
                        MX_SCOPED_LOCK(&run->global->sanCov_mutex);

                        sancov_setBitmap(curMap->data.pBM, relAddr);
                    }

                    /* Also increase new BBs counter at worker's thread runtime data */
                    mapsBuf[bestFit].newBBCnt++;
                }
            } else {
                /*
                 * Normally this should never get executed. If hit, sanitizer
                 * coverage data collection come across some kind of bug.
                 */
                LOG_E("Invalid BB addr (%#" PRIx64 ") at offset %" PRId64, bbAddr, (uint64_t)pos);
            }
        }
        nBBs++;
    }

    /* Finally iterate over all instrumented maps to sum-up the number of newly met BB addresses */
    for (uint64_t i = 0; i < mapsNum; i++) {
        if (mapsBuf[i].bbCnt > 0) {
            run->sanCovCnts.newBBCnt += mapsBuf[i].newBBCnt;
        } else {
            noCovMapsNum++;
        }
    }

    /* Successful parsing - update fuzzer worker's counters */
    run->sanCovCnts.hitBBCnt = nBBs;
    run->sanCovCnts.totalBBCnt = nBBs + nZeroBBs;
    run->sanCovCnts.dsoCnt = mapsNum;
    run->sanCovCnts.iDsoCnt = mapsNum - noCovMapsNum; /* Instrumented DSOs */

    if (run->global->linux.pid == 0 && run->global->persistent == false) {
        unlink(covFile);
    }
    return true;
}

static bool sancov_sanCovParse(run_t* run) {
    int dataFd = -1;
    uint8_t* dataBuf = NULL;
    off_t dataFileSz = 0, pos = 0;
    bool is32bit = true;
    char covFile[PATH_MAX] = {0};
    DIR* pSanCovDir = NULL;
    pid_t targetPid = (run->global->linux.pid > 0) ? run->global->linux.pid : run->pid;

    snprintf(covFile, sizeof(covFile), "%s/%s/%s.%d.sancov", run->global->io.workDir,
        _HF_SANCOV_DIR, files_basename(run->global->exe.cmdline[0]), targetPid);
    if (!files_exists(covFile)) {
        LOG_D("Target sancov file not found");
        return false;
    }

    /* Local cache file suffix to use for file search of target pid data */
    char pidFSuffix[13] = {0};
    snprintf(pidFSuffix, sizeof(pidFSuffix), "%d.sancov", targetPid);

    /* Total BBs counter summarizes all DSOs */
    uint64_t nBBs = 0;

    /* Iterate sancov dir for files generated against target pid */
    snprintf(covFile, sizeof(covFile), "%s/%s", run->global->io.workDir, _HF_SANCOV_DIR);
    pSanCovDir = opendir(covFile);
    if (pSanCovDir == NULL) {
        PLOG_E("opendir('%s')", covFile);
        return false;
    }
    defer { closedir(pSanCovDir); };

    struct dirent* pDir = NULL;
    while ((pDir = readdir(pSanCovDir)) != NULL) {
        /* Parse files with target's pid */
        if (strstr(pDir->d_name, pidFSuffix)) {
            snprintf(covFile, sizeof(covFile), "%s/%s/%s", run->global->io.workDir, _HF_SANCOV_DIR,
                pDir->d_name);
            dataBuf = files_mapFile(covFile, &dataFileSz, &dataFd, false);
            if (dataBuf == NULL) {
                LOG_E("Couldn't open and map '%s' in R/O mode", covFile);
                return false;
            }
            defer {
                munmap(dataBuf, dataFileSz);
                close(dataFd);
            };

            if (dataFileSz < 8) {
                LOG_E("Coverage data file too short");
                return false;
            }

            /* Check magic values & derive PC length */
            uint64_t magic = util_getUINT64(dataBuf);
            if (magic == kMagic32) {
                is32bit = true;
            } else if (magic == kMagic64) {
                is32bit = false;
            } else {
                LOG_E("Invalid coverage data file");
                return false;
            }
            pos += 8;

            /*
             * Avoid cost of size checks inside raw data read loop by defining the read function
             * & pivot size based on PC length.
             */
            uint64_t (*pReadRawBBAddrFunc)(const uint8_t*) = NULL;
            uint8_t pivot = 0;
            if (is32bit) {
                pReadRawBBAddrFunc = &util_getUINT32;
                pivot = 4;
            } else {
                pReadRawBBAddrFunc = &util_getUINT64;
                pivot = 8;
            }

            while (pos < dataFileSz) {
                uint32_t bbAddr = pReadRawBBAddrFunc(dataBuf + pos);
                pos += pivot;
                if (bbAddr == 0x0) {
                    continue;
                }
                nBBs++;
            }
        }
    }

    /* Successful parsing - update fuzzer worker counters */
    run->sanCovCnts.hitBBCnt = nBBs;

    if (run->global->linux.pid == 0 && run->global->persistent == false) {
        unlink(covFile);
    }
    return true;
}

/*
 * Sanitizer coverage data are stored in FS can be parsed via two methods:
 * raw unpack & separate bin/DSO sancov file. Separate bin/DSO sancov file
 * method is usually avoided since coverage data are lost if sanitizer unhandled
 * signal. Additionally, the FS I/O overhead is bigger compared to raw unpack
 * method which uses runtime data structures.
 *
 * Enabled methods are controlled from sanitizer flags in arch.c
 */
void sancov_Analyze(run_t* run) {
    if (!run->global->useSanCov) {
        return;
    }
    /*
     * For now supported methods are implemented in fail-over nature. This will
     * change in the future when best method is concluded.
     */
    if (sancov_sanCovParseRaw(run) == false) {
        sancov_sanCovParse(run);
    }
}

bool sancov_Init(honggfuzz_t* hfuzz) {
    if (hfuzz->useSanCov == false) {
        return true;
    }
    sancov_trieCreate(&hfuzz->covMetadata);

    char sanCovOutDir[PATH_MAX] = {0};
    snprintf(sanCovOutDir, sizeof(sanCovOutDir), "%s/%s", hfuzz->io.workDir, _HF_SANCOV_DIR);
    if (!files_exists(sanCovOutDir)) {
        if (mkdir(sanCovOutDir, S_IRWXU | S_IXGRP | S_IXOTH) != 0) {
            PLOG_E("mkdir() '%s' failed", sanCovOutDir);
        }
    }

    return true;
}