//=- ReachableCodePathInsensitive.cpp ---------------------------*- C++ --*-==//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements a flow-sensitive, path-insensitive analysis of
// determining reachable blocks within a CFG.
//
//===----------------------------------------------------------------------===//

#include "clang/Analysis/Analyses/ReachableCode.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/ExprObjC.h"
#include "clang/AST/StmtCXX.h"
#include "clang/AST/ParentMap.h"
#include "clang/Analysis/AnalysisContext.h"
#include "clang/Analysis/CFG.h"
#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"

using namespace clang;

//===----------------------------------------------------------------------===//
// Core Reachability Analysis routines.
//===----------------------------------------------------------------------===//

static bool isEnumConstant(const Expr *Ex) {
  const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Ex);
  if (!DR)
    return false;
  return isa<EnumConstantDecl>(DR->getDecl());
}

static bool isTrivialExpression(const Expr *Ex) {
  Ex = Ex->IgnoreParenCasts();
  return isa<IntegerLiteral>(Ex) || isa<StringLiteral>(Ex) ||
         isa<CXXBoolLiteralExpr>(Ex) || isa<ObjCBoolLiteralExpr>(Ex) ||
         isa<CharacterLiteral>(Ex) ||
         isEnumConstant(Ex);
}

static bool isTrivialDoWhile(const CFGBlock *B, const Stmt *S) {
  // Check if the block ends with a do...while() and see if 'S' is the
  // condition.
  if (const Stmt *Term = B->getTerminator()) {
    if (const DoStmt *DS = dyn_cast<DoStmt>(Term)) {
      const Expr *Cond = DS->getCond()->IgnoreParenCasts();
      return Cond == S && isTrivialExpression(Cond);
    }
  }
  return false;
}

static bool isDeadReturn(const CFGBlock *B, const Stmt *S) {
  // Look to see if the current control flow ends with a 'return', and see if
  // 'S' is a substatement. The 'return' may not be the last element in the
  // block, or may be in a subsequent block because of destructors.
  const CFGBlock *Current = B;
  while (true) {
    for (CFGBlock::const_reverse_iterator I = Current->rbegin(),
                                          E = Current->rend();
         I != E; ++I) {
      if (Optional<CFGStmt> CS = I->getAs<CFGStmt>()) {
        if (const ReturnStmt *RS = dyn_cast<ReturnStmt>(CS->getStmt())) {
          if (RS == S)
            return true;
          if (const Expr *RE = RS->getRetValue()) {
            RE = RE->IgnoreParenCasts();
            if (RE == S)
              return true;
            ParentMap PM(const_cast<Expr *>(RE));
            // If 'S' is in the ParentMap, it is a subexpression of
            // the return statement.
            return PM.getParent(S);
          }
        }
        break;
      }
    }
    // Note also that we are restricting the search for the return statement
    // to stop at control-flow; only part of a return statement may be dead,
    // without the whole return statement being dead.
    if (Current->getTerminator().isTemporaryDtorsBranch()) {
      // Temporary destructors have a predictable control flow, thus we want to
      // look into the next block for the return statement.
      // We look into the false branch, as we know the true branch only contains
      // the call to the destructor.
      assert(Current->succ_size() == 2);
      Current = *(Current->succ_begin() + 1);
    } else if (!Current->getTerminator() && Current->succ_size() == 1) {
      // If there is only one successor, we're not dealing with outgoing control
      // flow. Thus, look into the next block.
      Current = *Current->succ_begin();
      if (Current->pred_size() > 1) {
        // If there is more than one predecessor, we're dealing with incoming
        // control flow - if the return statement is in that block, it might
        // well be reachable via a different control flow, thus it's not dead.
        return false;
      }
    } else {
      // We hit control flow or a dead end. Stop searching.
      return false;
    }
  }
  llvm_unreachable("Broke out of infinite loop.");
}

static SourceLocation getTopMostMacro(SourceLocation Loc, SourceManager &SM) {
  assert(Loc.isMacroID());
  SourceLocation Last;
  while (Loc.isMacroID()) {
    Last = Loc;
    Loc = SM.getImmediateMacroCallerLoc(Loc);
  }
  return Last;
}

/// Returns true if the statement is expanded from a configuration macro.
static bool isExpandedFromConfigurationMacro(const Stmt *S,
                                             Preprocessor &PP,
                                             bool IgnoreYES_NO = false) {
  // FIXME: This is not very precise.  Here we just check to see if the
  // value comes from a macro, but we can do much better.  This is likely
  // to be over conservative.  This logic is factored into a separate function
  // so that we can refine it later.
  SourceLocation L = S->getLocStart();
  if (L.isMacroID()) {
    if (IgnoreYES_NO) {
      // The Objective-C constant 'YES' and 'NO'
      // are defined as macros.  Do not treat them
      // as configuration values.
      SourceManager &SM = PP.getSourceManager();
      SourceLocation TopL = getTopMostMacro(L, SM);
      StringRef MacroName = PP.getImmediateMacroName(TopL);
      if (MacroName == "YES" || MacroName == "NO")
        return false;
    }
    return true;
  }
  return false;
}

static bool isConfigurationValue(const ValueDecl *D, Preprocessor &PP);

/// Returns true if the statement represents a configuration value.
///
/// A configuration value is something usually determined at compile-time
/// to conditionally always execute some branch.  Such guards are for
/// "sometimes unreachable" code.  Such code is usually not interesting
/// to report as unreachable, and may mask truly unreachable code within
/// those blocks.
static bool isConfigurationValue(const Stmt *S,
                                 Preprocessor &PP,
                                 SourceRange *SilenceableCondVal = nullptr,
                                 bool IncludeIntegers = true,
                                 bool WrappedInParens = false) {
  if (!S)
    return false;

  if (const Expr *Ex = dyn_cast<Expr>(S))
    S = Ex->IgnoreCasts();

  // Special case looking for the sigil '()' around an integer literal.
  if (const ParenExpr *PE = dyn_cast<ParenExpr>(S))
    if (!PE->getLocStart().isMacroID())
      return isConfigurationValue(PE->getSubExpr(), PP, SilenceableCondVal, 
                                  IncludeIntegers, true);

  if (const Expr *Ex = dyn_cast<Expr>(S))
    S = Ex->IgnoreCasts();

  bool IgnoreYES_NO = false;

  switch (S->getStmtClass()) {
    case Stmt::CallExprClass: {
      const FunctionDecl *Callee =
        dyn_cast_or_null<FunctionDecl>(cast<CallExpr>(S)->getCalleeDecl());
      return Callee ? Callee->isConstexpr() : false;
    }
    case Stmt::DeclRefExprClass:
      return isConfigurationValue(cast<DeclRefExpr>(S)->getDecl(), PP);
    case Stmt::ObjCBoolLiteralExprClass:
      IgnoreYES_NO = true;
      // Fallthrough.
    case Stmt::CXXBoolLiteralExprClass:
    case Stmt::IntegerLiteralClass: {
      const Expr *E = cast<Expr>(S);
      if (IncludeIntegers) {
        if (SilenceableCondVal && !SilenceableCondVal->getBegin().isValid())
          *SilenceableCondVal = E->getSourceRange();
        return WrappedInParens || isExpandedFromConfigurationMacro(E, PP, IgnoreYES_NO);
      }
      return false;
    }
    case Stmt::MemberExprClass:
      return isConfigurationValue(cast<MemberExpr>(S)->getMemberDecl(), PP);
    case Stmt::UnaryExprOrTypeTraitExprClass:
      return true;
    case Stmt::BinaryOperatorClass: {
      const BinaryOperator *B = cast<BinaryOperator>(S);
      // Only include raw integers (not enums) as configuration
      // values if they are used in a logical or comparison operator
      // (not arithmetic).
      IncludeIntegers &= (B->isLogicalOp() || B->isComparisonOp());
      return isConfigurationValue(B->getLHS(), PP, SilenceableCondVal,
                                  IncludeIntegers) ||
             isConfigurationValue(B->getRHS(), PP, SilenceableCondVal,
                                  IncludeIntegers);
    }
    case Stmt::UnaryOperatorClass: {
      const UnaryOperator *UO = cast<UnaryOperator>(S);
      if (SilenceableCondVal) 
        *SilenceableCondVal = UO->getSourceRange();      
      return UO->getOpcode() == UO_LNot &&
             isConfigurationValue(UO->getSubExpr(), PP, SilenceableCondVal,
                                  IncludeIntegers, WrappedInParens);
    }
    default:
      return false;
  }
}

static bool isConfigurationValue(const ValueDecl *D, Preprocessor &PP) {
  if (const EnumConstantDecl *ED = dyn_cast<EnumConstantDecl>(D))
    return isConfigurationValue(ED->getInitExpr(), PP);
  if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
    // As a heuristic, treat globals as configuration values.  Note
    // that we only will get here if Sema evaluated this
    // condition to a constant expression, which means the global
    // had to be declared in a way to be a truly constant value.
    // We could generalize this to local variables, but it isn't
    // clear if those truly represent configuration values that
    // gate unreachable code.
    if (!VD->hasLocalStorage())
      return true;

    // As a heuristic, locals that have been marked 'const' explicitly
    // can be treated as configuration values as well.
    return VD->getType().isLocalConstQualified();
  }
  return false;
}

/// Returns true if we should always explore all successors of a block.
static bool shouldTreatSuccessorsAsReachable(const CFGBlock *B,
                                             Preprocessor &PP) {
  if (const Stmt *Term = B->getTerminator()) {
    if (isa<SwitchStmt>(Term))
      return true;
    // Specially handle '||' and '&&'.
    if (isa<BinaryOperator>(Term)) {
      return isConfigurationValue(Term, PP);
    }
  }

  const Stmt *Cond = B->getTerminatorCondition(/* stripParens */ false);
  return isConfigurationValue(Cond, PP);
}

static unsigned scanFromBlock(const CFGBlock *Start,
                              llvm::BitVector &Reachable,
                              Preprocessor *PP,
                              bool IncludeSometimesUnreachableEdges) {
  unsigned count = 0;
  
  // Prep work queue
  SmallVector<const CFGBlock*, 32> WL;
  
  // The entry block may have already been marked reachable
  // by the caller.
  if (!Reachable[Start->getBlockID()]) {
    ++count;
    Reachable[Start->getBlockID()] = true;
  }
  
  WL.push_back(Start);
  
  // Find the reachable blocks from 'Start'.
  while (!WL.empty()) {
    const CFGBlock *item = WL.pop_back_val();

    // There are cases where we want to treat all successors as reachable.
    // The idea is that some "sometimes unreachable" code is not interesting,
    // and that we should forge ahead and explore those branches anyway.
    // This allows us to potentially uncover some "always unreachable" code
    // within the "sometimes unreachable" code.
    // Look at the successors and mark then reachable.
    Optional<bool> TreatAllSuccessorsAsReachable;
    if (!IncludeSometimesUnreachableEdges)
      TreatAllSuccessorsAsReachable = false;

    for (CFGBlock::const_succ_iterator I = item->succ_begin(), 
         E = item->succ_end(); I != E; ++I) {
      const CFGBlock *B = *I;
      if (!B) do {
        const CFGBlock *UB = I->getPossiblyUnreachableBlock();
        if (!UB)
          break;

        if (!TreatAllSuccessorsAsReachable.hasValue()) {
          assert(PP);
          TreatAllSuccessorsAsReachable =
            shouldTreatSuccessorsAsReachable(item, *PP);
        }

        if (TreatAllSuccessorsAsReachable.getValue()) {
          B = UB;
          break;
        }
      }
      while (false);

      if (B) {
        unsigned blockID = B->getBlockID();
        if (!Reachable[blockID]) {
          Reachable.set(blockID);
          WL.push_back(B);
          ++count;
        }
      }
    }
  }
  return count;
}

static unsigned scanMaybeReachableFromBlock(const CFGBlock *Start,
                                            Preprocessor &PP,
                                            llvm::BitVector &Reachable) {
  return scanFromBlock(Start, Reachable, &PP, true);
}

//===----------------------------------------------------------------------===//
// Dead Code Scanner.
//===----------------------------------------------------------------------===//

namespace {
  class DeadCodeScan {
    llvm::BitVector Visited;
    llvm::BitVector &Reachable;
    SmallVector<const CFGBlock *, 10> WorkList;
    Preprocessor &PP;

    typedef SmallVector<std::pair<const CFGBlock *, const Stmt *>, 12>
    DeferredLocsTy;

    DeferredLocsTy DeferredLocs;

  public:
    DeadCodeScan(llvm::BitVector &reachable, Preprocessor &PP)
    : Visited(reachable.size()),
      Reachable(reachable),
      PP(PP) {}

    void enqueue(const CFGBlock *block);
    unsigned scanBackwards(const CFGBlock *Start,
    clang::reachable_code::Callback &CB);

    bool isDeadCodeRoot(const CFGBlock *Block);

    const Stmt *findDeadCode(const CFGBlock *Block);

    void reportDeadCode(const CFGBlock *B,
                        const Stmt *S,
                        clang::reachable_code::Callback &CB);
  };
}

void DeadCodeScan::enqueue(const CFGBlock *block) {
  unsigned blockID = block->getBlockID();
  if (Reachable[blockID] || Visited[blockID])
    return;
  Visited[blockID] = true;
  WorkList.push_back(block);
}

bool DeadCodeScan::isDeadCodeRoot(const clang::CFGBlock *Block) {
  bool isDeadRoot = true;

  for (CFGBlock::const_pred_iterator I = Block->pred_begin(),
       E = Block->pred_end(); I != E; ++I) {
    if (const CFGBlock *PredBlock = *I) {
      unsigned blockID = PredBlock->getBlockID();
      if (Visited[blockID]) {
        isDeadRoot = false;
        continue;
      }
      if (!Reachable[blockID]) {
        isDeadRoot = false;
        Visited[blockID] = true;
        WorkList.push_back(PredBlock);
        continue;
      }
    }
  }

  return isDeadRoot;
}

static bool isValidDeadStmt(const Stmt *S) {
  if (S->getLocStart().isInvalid())
    return false;
  if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(S))
    return BO->getOpcode() != BO_Comma;
  return true;
}

const Stmt *DeadCodeScan::findDeadCode(const clang::CFGBlock *Block) {
  for (CFGBlock::const_iterator I = Block->begin(), E = Block->end(); I!=E; ++I)
    if (Optional<CFGStmt> CS = I->getAs<CFGStmt>()) {
      const Stmt *S = CS->getStmt();
      if (isValidDeadStmt(S))
        return S;
    }

  if (CFGTerminator T = Block->getTerminator()) {
    if (!T.isTemporaryDtorsBranch()) {
      const Stmt *S = T.getStmt();
      if (isValidDeadStmt(S))
        return S;
    }
  }

  return nullptr;
}

static int SrcCmp(const std::pair<const CFGBlock *, const Stmt *> *p1,
                  const std::pair<const CFGBlock *, const Stmt *> *p2) {
  if (p1->second->getLocStart() < p2->second->getLocStart())
    return -1;
  if (p2->second->getLocStart() < p1->second->getLocStart())
    return 1;
  return 0;
}

unsigned DeadCodeScan::scanBackwards(const clang::CFGBlock *Start,
                                     clang::reachable_code::Callback &CB) {

  unsigned count = 0;
  enqueue(Start);

  while (!WorkList.empty()) {
    const CFGBlock *Block = WorkList.pop_back_val();

    // It is possible that this block has been marked reachable after
    // it was enqueued.
    if (Reachable[Block->getBlockID()])
      continue;

    // Look for any dead code within the block.
    const Stmt *S = findDeadCode(Block);

    if (!S) {
      // No dead code.  Possibly an empty block.  Look at dead predecessors.
      for (CFGBlock::const_pred_iterator I = Block->pred_begin(),
           E = Block->pred_end(); I != E; ++I) {
        if (const CFGBlock *predBlock = *I)
          enqueue(predBlock);
      }
      continue;
    }

    // Specially handle macro-expanded code.
    if (S->getLocStart().isMacroID()) {
      count += scanMaybeReachableFromBlock(Block, PP, Reachable);
      continue;
    }

    if (isDeadCodeRoot(Block)) {
      reportDeadCode(Block, S, CB);
      count += scanMaybeReachableFromBlock(Block, PP, Reachable);
    }
    else {
      // Record this statement as the possibly best location in a
      // strongly-connected component of dead code for emitting a
      // warning.
      DeferredLocs.push_back(std::make_pair(Block, S));
    }
  }

  // If we didn't find a dead root, then report the dead code with the
  // earliest location.
  if (!DeferredLocs.empty()) {
    llvm::array_pod_sort(DeferredLocs.begin(), DeferredLocs.end(), SrcCmp);
    for (DeferredLocsTy::iterator I = DeferredLocs.begin(),
         E = DeferredLocs.end(); I != E; ++I) {
      const CFGBlock *Block = I->first;
      if (Reachable[Block->getBlockID()])
        continue;
      reportDeadCode(Block, I->second, CB);
      count += scanMaybeReachableFromBlock(Block, PP, Reachable);
    }
  }

  return count;
}

static SourceLocation GetUnreachableLoc(const Stmt *S,
                                        SourceRange &R1,
                                        SourceRange &R2) {
  R1 = R2 = SourceRange();

  if (const Expr *Ex = dyn_cast<Expr>(S))
    S = Ex->IgnoreParenImpCasts();

  switch (S->getStmtClass()) {
    case Expr::BinaryOperatorClass: {
      const BinaryOperator *BO = cast<BinaryOperator>(S);
      return BO->getOperatorLoc();
    }
    case Expr::UnaryOperatorClass: {
      const UnaryOperator *UO = cast<UnaryOperator>(S);
      R1 = UO->getSubExpr()->getSourceRange();
      return UO->getOperatorLoc();
    }
    case Expr::CompoundAssignOperatorClass: {
      const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(S);
      R1 = CAO->getLHS()->getSourceRange();
      R2 = CAO->getRHS()->getSourceRange();
      return CAO->getOperatorLoc();
    }
    case Expr::BinaryConditionalOperatorClass:
    case Expr::ConditionalOperatorClass: {
      const AbstractConditionalOperator *CO =
      cast<AbstractConditionalOperator>(S);
      return CO->getQuestionLoc();
    }
    case Expr::MemberExprClass: {
      const MemberExpr *ME = cast<MemberExpr>(S);
      R1 = ME->getSourceRange();
      return ME->getMemberLoc();
    }
    case Expr::ArraySubscriptExprClass: {
      const ArraySubscriptExpr *ASE = cast<ArraySubscriptExpr>(S);
      R1 = ASE->getLHS()->getSourceRange();
      R2 = ASE->getRHS()->getSourceRange();
      return ASE->getRBracketLoc();
    }
    case Expr::CStyleCastExprClass: {
      const CStyleCastExpr *CSC = cast<CStyleCastExpr>(S);
      R1 = CSC->getSubExpr()->getSourceRange();
      return CSC->getLParenLoc();
    }
    case Expr::CXXFunctionalCastExprClass: {
      const CXXFunctionalCastExpr *CE = cast <CXXFunctionalCastExpr>(S);
      R1 = CE->getSubExpr()->getSourceRange();
      return CE->getLocStart();
    }
    case Stmt::CXXTryStmtClass: {
      return cast<CXXTryStmt>(S)->getHandler(0)->getCatchLoc();
    }
    case Expr::ObjCBridgedCastExprClass: {
      const ObjCBridgedCastExpr *CSC = cast<ObjCBridgedCastExpr>(S);
      R1 = CSC->getSubExpr()->getSourceRange();
      return CSC->getLParenLoc();
    }
    default: ;
  }
  R1 = S->getSourceRange();
  return S->getLocStart();
}

void DeadCodeScan::reportDeadCode(const CFGBlock *B,
                                  const Stmt *S,
                                  clang::reachable_code::Callback &CB) {
  // Classify the unreachable code found, or suppress it in some cases.
  reachable_code::UnreachableKind UK = reachable_code::UK_Other;

  if (isa<BreakStmt>(S)) {
    UK = reachable_code::UK_Break;
  }
  else if (isTrivialDoWhile(B, S)) {
    return;
  }
  else if (isDeadReturn(B, S)) {
    UK = reachable_code::UK_Return;
  }

  SourceRange SilenceableCondVal;

  if (UK == reachable_code::UK_Other) {
    // Check if the dead code is part of the "loop target" of
    // a for/for-range loop.  This is the block that contains
    // the increment code.
    if (const Stmt *LoopTarget = B->getLoopTarget()) {
      SourceLocation Loc = LoopTarget->getLocStart();
      SourceRange R1(Loc, Loc), R2;

      if (const ForStmt *FS = dyn_cast<ForStmt>(LoopTarget)) {
        const Expr *Inc = FS->getInc();
        Loc = Inc->getLocStart();
        R2 = Inc->getSourceRange();
      }

      CB.HandleUnreachable(reachable_code::UK_Loop_Increment,
                           Loc, SourceRange(), SourceRange(Loc, Loc), R2);
      return;
    }
    
    // Check if the dead block has a predecessor whose branch has
    // a configuration value that *could* be modified to
    // silence the warning.
    CFGBlock::const_pred_iterator PI = B->pred_begin();
    if (PI != B->pred_end()) {
      if (const CFGBlock *PredBlock = PI->getPossiblyUnreachableBlock()) {
        const Stmt *TermCond =
            PredBlock->getTerminatorCondition(/* strip parens */ false);
        isConfigurationValue(TermCond, PP, &SilenceableCondVal);
      }
    }
  }

  SourceRange R1, R2;
  SourceLocation Loc = GetUnreachableLoc(S, R1, R2);
  CB.HandleUnreachable(UK, Loc, SilenceableCondVal, R1, R2);
}

//===----------------------------------------------------------------------===//
// Reachability APIs.
//===----------------------------------------------------------------------===//

namespace clang { namespace reachable_code {

void Callback::anchor() { }

unsigned ScanReachableFromBlock(const CFGBlock *Start,
                                llvm::BitVector &Reachable) {
  return scanFromBlock(Start, Reachable, /* SourceManager* */ nullptr, false);
}

void FindUnreachableCode(AnalysisDeclContext &AC, Preprocessor &PP,
                         Callback &CB) {

  CFG *cfg = AC.getCFG();
  if (!cfg)
    return;

  // Scan for reachable blocks from the entrance of the CFG.
  // If there are no unreachable blocks, we're done.
  llvm::BitVector reachable(cfg->getNumBlockIDs());
  unsigned numReachable =
    scanMaybeReachableFromBlock(&cfg->getEntry(), PP, reachable);
  if (numReachable == cfg->getNumBlockIDs())
    return;
  
  // If there aren't explicit EH edges, we should include the 'try' dispatch
  // blocks as roots.
  if (!AC.getCFGBuildOptions().AddEHEdges) {
    for (CFG::try_block_iterator I = cfg->try_blocks_begin(),
         E = cfg->try_blocks_end() ; I != E; ++I) {
      numReachable += scanMaybeReachableFromBlock(*I, PP, reachable);
    }
    if (numReachable == cfg->getNumBlockIDs())
      return;
  }

  // There are some unreachable blocks.  We need to find the root blocks that
  // contain code that should be considered unreachable.  
  for (CFG::iterator I = cfg->begin(), E = cfg->end(); I != E; ++I) {
    const CFGBlock *block = *I;
    // A block may have been marked reachable during this loop.
    if (reachable[block->getBlockID()])
      continue;
    
    DeadCodeScan DS(reachable, PP);
    numReachable += DS.scanBackwards(block, CB);
    
    if (numReachable == cfg->getNumBlockIDs())
      return;
  }
}

}} // end namespace clang::reachable_code