C++程序  |  399行  |  13.53 KB

// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_ASMJS_ASM_PARSER_H_
#define V8_ASMJS_ASM_PARSER_H_

#include <memory>
#include <string>

#include "src/asmjs/asm-scanner.h"
#include "src/asmjs/asm-types.h"
#include "src/wasm/wasm-module-builder.h"
#include "src/zone/zone-containers.h"

namespace v8 {
namespace internal {

class Utf16CharacterStream;

namespace wasm {

// A custom parser + validator + wasm converter for asm.js:
// http://asmjs.org/spec/latest/
// This parser intentionally avoids the portion of JavaScript parsing
// that are not required to determine if code is valid asm.js code.
// * It is mostly one pass.
// * It bails out on unexpected input.
// * It assumes strict ordering insofar as permitted by asm.js validation rules.
// * It relies on a custom scanner that provides de-duped identifiers in two
//   scopes (local + module wide).
class AsmJsParser {
 public:
  // clang-format off
  enum StandardMember {
    kInfinity,
    kNaN,
#define V(_unused1, name, _unused2, _unused3) kMath##name,
    STDLIB_MATH_FUNCTION_LIST(V)
#undef V
#define V(name, _unused1) kMath##name,
    STDLIB_MATH_VALUE_LIST(V)
#undef V
#define V(name, _unused1, _unused2, _unused3) k##name,
    STDLIB_ARRAY_TYPE_LIST(V)
#undef V
  };
  // clang-format on

  typedef EnumSet<StandardMember, uint64_t> StdlibSet;

  explicit AsmJsParser(Zone* zone, uintptr_t stack_limit,
                       Utf16CharacterStream* stream);
  bool Run();
  const char* failure_message() const { return failure_message_; }
  int failure_location() const { return failure_location_; }
  WasmModuleBuilder* module_builder() { return module_builder_; }
  const StdlibSet* stdlib_uses() const { return &stdlib_uses_; }

 private:
  // clang-format off
  enum class VarKind {
    kUnused,
    kLocal,
    kGlobal,
    kSpecial,
    kFunction,
    kTable,
    kImportedFunction,
#define V(_unused0, Name, _unused1, _unused2) kMath##Name,
    STDLIB_MATH_FUNCTION_LIST(V)
#undef V
#define V(Name, _unused1) kMath##Name,
    STDLIB_MATH_VALUE_LIST(V)
#undef V
  };
  // clang-format on

  // A single import in asm.js can require multiple imports in wasm, if the
  // function is used with different signatures. {cache} keeps the wasm
  // imports for the single asm.js import of name {function_name}.
  struct FunctionImportInfo {
    Vector<const char> function_name;
    ZoneUnorderedMap<FunctionSig, uint32_t> cache;

    // Constructor.
    FunctionImportInfo(Vector<const char> name, Zone* zone)
        : function_name(name), cache(zone) {}
  };

  struct VarInfo {
    AsmType* type = AsmType::None();
    WasmFunctionBuilder* function_builder = nullptr;
    FunctionImportInfo* import = nullptr;
    uint32_t mask = 0;
    uint32_t index = 0;
    VarKind kind = VarKind::kUnused;
    bool mutable_variable = true;
    bool function_defined = false;
  };

  struct GlobalImport {
    Vector<const char> import_name;
    ValueType value_type;
    VarInfo* var_info;
  };

  enum class BlockKind { kRegular, kLoop, kOther };

  struct BlockInfo {
    BlockKind kind;
    AsmJsScanner::token_t label;
  };

  // Helper class to make {TempVariable} safe for nesting.
  class TemporaryVariableScope;

  template <typename T>
  class CachedVectors {
   public:
    explicit CachedVectors(Zone* zone) : reusable_vectors_(zone) {}

    Zone* zone() const { return reusable_vectors_.get_allocator().zone(); }

    inline void fill(ZoneVector<T>* vec) {
      if (reusable_vectors_.empty()) return;
      reusable_vectors_.back().swap(*vec);
      reusable_vectors_.pop_back();
      vec->clear();
    }

    inline void reuse(ZoneVector<T>* vec) {
      reusable_vectors_.emplace_back(std::move(*vec));
    }

   private:
    ZoneVector<ZoneVector<T>> reusable_vectors_;
  };

  template <typename T>
  class CachedVector final : public ZoneVector<T> {
   public:
    explicit CachedVector(CachedVectors<T>& cache)
        : ZoneVector<T>(cache.zone()), cache_(&cache) {
      cache.fill(this);
    }
    ~CachedVector() { cache_->reuse(this); }

   private:
    CachedVectors<T>* cache_;
  };

  Zone* zone_;
  AsmJsScanner scanner_;
  WasmModuleBuilder* module_builder_;
  WasmFunctionBuilder* current_function_builder_;
  AsmType* return_type_;
  uintptr_t stack_limit_;
  StdlibSet stdlib_uses_;
  ZoneVector<VarInfo> global_var_info_;
  ZoneVector<VarInfo> local_var_info_;

  CachedVectors<ValueType> cached_valuetype_vectors_{zone_};
  CachedVectors<AsmType*> cached_asm_type_p_vectors_{zone_};
  CachedVectors<AsmJsScanner::token_t> cached_token_t_vectors_{zone_};
  CachedVectors<int32_t> cached_int_vectors_{zone_};

  int function_temp_locals_offset_;
  int function_temp_locals_used_;
  int function_temp_locals_depth_;

  // Error Handling related
  bool failed_;
  const char* failure_message_;
  int failure_location_;

  // Module Related.
  AsmJsScanner::token_t stdlib_name_;
  AsmJsScanner::token_t foreign_name_;
  AsmJsScanner::token_t heap_name_;

  static const AsmJsScanner::token_t kTokenNone = 0;

  // Track if parsing a heap assignment.
  bool inside_heap_assignment_;
  AsmType* heap_access_type_;

  ZoneVector<BlockInfo> block_stack_;

  // Types used for stdlib function and their set up.
  AsmType* stdlib_dq2d_;
  AsmType* stdlib_dqdq2d_;
  AsmType* stdlib_i2s_;
  AsmType* stdlib_ii2s_;
  AsmType* stdlib_minmax_;
  AsmType* stdlib_abs_;
  AsmType* stdlib_ceil_like_;
  AsmType* stdlib_fround_;

  // When making calls, the return type is needed to lookup signatures.
  // For `+callsite(..)` or `fround(callsite(..))` use this value to pass
  // along the coercion.
  AsmType* call_coercion_;

  // The source position associated with the above {call_coercion}.
  size_t call_coercion_position_;

  // When making calls, the coercion can also appear in the source stream
  // syntactically "behind" the call site. For `callsite(..)|0` use this
  // value to flag that such a coercion must happen.
  AsmType* call_coercion_deferred_;

  // The source position at which requesting a deferred coercion via the
  // aforementioned {call_coercion_deferred} is allowed.
  size_t call_coercion_deferred_position_;

  // The code position of the last heap access shift by an immediate value.
  // For `heap[expr >> value:NumericLiteral]` this indicates from where to
  // delete code when the expression is used as part of a valid heap access.
  // Will be set to {kNoHeapAccessShift} if heap access shift wasn't matched.
  size_t heap_access_shift_position_;
  uint32_t heap_access_shift_value_;
  static const size_t kNoHeapAccessShift = -1;

  // Used to track the last label we've seen so it can be matched to later
  // statements it's attached to.
  AsmJsScanner::token_t pending_label_;

  // Global imports. The list of imported variables that are copied during
  // module instantiation into a corresponding global variable.
  ZoneLinkedList<GlobalImport> global_imports_;

  Zone* zone() { return zone_; }

  inline bool Peek(AsmJsScanner::token_t token) {
    return scanner_.Token() == token;
  }

  inline bool Check(AsmJsScanner::token_t token) {
    if (scanner_.Token() == token) {
      scanner_.Next();
      return true;
    } else {
      return false;
    }
  }

  inline bool CheckForZero() {
    if (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0) {
      scanner_.Next();
      return true;
    } else {
      return false;
    }
  }

  inline bool CheckForDouble(double* value) {
    if (scanner_.IsDouble()) {
      *value = scanner_.AsDouble();
      scanner_.Next();
      return true;
    } else {
      return false;
    }
  }

  inline bool CheckForUnsigned(uint32_t* value) {
    if (scanner_.IsUnsigned()) {
      *value = scanner_.AsUnsigned();
      scanner_.Next();
      return true;
    } else {
      return false;
    }
  }

  inline bool CheckForUnsignedBelow(uint32_t limit, uint32_t* value) {
    if (scanner_.IsUnsigned() && scanner_.AsUnsigned() < limit) {
      *value = scanner_.AsUnsigned();
      scanner_.Next();
      return true;
    } else {
      return false;
    }
  }

  inline AsmJsScanner::token_t Consume() {
    AsmJsScanner::token_t ret = scanner_.Token();
    scanner_.Next();
    return ret;
  }

  void SkipSemicolon();

  VarInfo* GetVarInfo(AsmJsScanner::token_t token);
  uint32_t VarIndex(VarInfo* info);
  void DeclareGlobal(VarInfo* info, bool mutable_variable, AsmType* type,
                     ValueType vtype,
                     const WasmInitExpr& init = WasmInitExpr());
  void DeclareStdlibFunc(VarInfo* info, VarKind kind, AsmType* type);
  void AddGlobalImport(Vector<const char> name, AsmType* type, ValueType vtype,
                       bool mutable_variable, VarInfo* info);

  // Allocates a temporary local variable. The given {index} is absolute within
  // the function body, consider using {TemporaryVariableScope} when nesting.
  uint32_t TempVariable(int index);

  // Preserves a copy of the scanner's current identifier string in the zone.
  Vector<const char> CopyCurrentIdentifierString();

  // Use to set up block stack layers (including synthetic ones for if-else).
  // Begin/Loop/End below are implemented with these plus code generation.
  void BareBegin(BlockKind kind = BlockKind::kOther,
                 AsmJsScanner::token_t label = 0);
  void BareEnd();
  int FindContinueLabelDepth(AsmJsScanner::token_t label);
  int FindBreakLabelDepth(AsmJsScanner::token_t label);

  // Use to set up actual wasm blocks/loops.
  void Begin(AsmJsScanner::token_t label = 0);
  void Loop(AsmJsScanner::token_t label = 0);
  void End();

  void InitializeStdlibTypes();

  FunctionSig* ConvertSignature(AsmType* return_type,
                                const ZoneVector<AsmType*>& params);

  void ValidateModule();            // 6.1 ValidateModule
  void ValidateModuleParameters();  // 6.1 ValidateModule - parameters
  void ValidateModuleVars();        // 6.1 ValidateModule - variables
  void ValidateModuleVar(bool mutable_variable);
  void ValidateModuleVarImport(VarInfo* info, bool mutable_variable);
  void ValidateModuleVarStdlib(VarInfo* info);
  void ValidateModuleVarNewStdlib(VarInfo* info);
  void ValidateModuleVarFromGlobal(VarInfo* info, bool mutable_variable);

  void ValidateExport();         // 6.2 ValidateExport
  void ValidateFunctionTable();  // 6.3 ValidateFunctionTable
  void ValidateFunction();       // 6.4 ValidateFunction
  void ValidateFunctionParams(ZoneVector<AsmType*>* params);
  void ValidateFunctionLocals(size_t param_count,
                              ZoneVector<ValueType>* locals);
  void ValidateStatement();              // 6.5 ValidateStatement
  void Block();                          // 6.5.1 Block
  void ExpressionStatement();            // 6.5.2 ExpressionStatement
  void EmptyStatement();                 // 6.5.3 EmptyStatement
  void IfStatement();                    // 6.5.4 IfStatement
  void ReturnStatement();                // 6.5.5 ReturnStatement
  bool IterationStatement();             // 6.5.6 IterationStatement
  void WhileStatement();                 // 6.5.6 IterationStatement - while
  void DoStatement();                    // 6.5.6 IterationStatement - do
  void ForStatement();                   // 6.5.6 IterationStatement - for
  void BreakStatement();                 // 6.5.7 BreakStatement
  void ContinueStatement();              // 6.5.8 ContinueStatement
  void LabelledStatement();              // 6.5.9 LabelledStatement
  void SwitchStatement();                // 6.5.10 SwitchStatement
  void ValidateCase();                   // 6.6. ValidateCase
  void ValidateDefault();                // 6.7 ValidateDefault
  AsmType* ValidateExpression();         // 6.8 ValidateExpression
  AsmType* Expression(AsmType* expect);  // 6.8.1 Expression
  AsmType* NumericLiteral();             // 6.8.2 NumericLiteral
  AsmType* Identifier();                 // 6.8.3 Identifier
  AsmType* CallExpression();             // 6.8.4 CallExpression
  AsmType* MemberExpression();           // 6.8.5 MemberExpression
  AsmType* AssignmentExpression();       // 6.8.6 AssignmentExpression
  AsmType* UnaryExpression();            // 6.8.7 UnaryExpression
  AsmType* MultiplicativeExpression();   // 6.8.8 MultiplicativeExpression
  AsmType* AdditiveExpression();         // 6.8.9 AdditiveExpression
  AsmType* ShiftExpression();            // 6.8.10 ShiftExpression
  AsmType* RelationalExpression();       // 6.8.11 RelationalExpression
  AsmType* EqualityExpression();         // 6.8.12 EqualityExpression
  AsmType* BitwiseANDExpression();       // 6.8.13 BitwiseANDExpression
  AsmType* BitwiseXORExpression();       // 6.8.14 BitwiseXORExpression
  AsmType* BitwiseORExpression();        // 6.8.15 BitwiseORExpression
  AsmType* ConditionalExpression();      // 6.8.16 ConditionalExpression
  AsmType* ParenthesizedExpression();    // 6.8.17 ParenthesiedExpression
  AsmType* ValidateCall();               // 6.9 ValidateCall
  bool PeekCall();                       // 6.9 ValidateCall - helper
  void ValidateHeapAccess();             // 6.10 ValidateHeapAccess
  void ValidateFloatCoercion();          // 6.11 ValidateFloatCoercion

  // Used as part of {ForStatement}. Scans forward to the next `)` in order to
  // skip over the third expression in a for-statement. This is one piece that
  // makes this parser not be a pure single-pass.
  void ScanToClosingParenthesis();

  // Used as part of {SwitchStatement}. Collects all case labels in the current
  // switch-statement, then resets the scanner position. This is one piece that
  // makes this parser not be a pure single-pass.
  void GatherCases(ZoneVector<int32_t>* cases);
};

}  // namespace wasm
}  // namespace internal
}  // namespace v8

#endif  // V8_ASMJS_ASM_PARSER_H_