C++程序  |  136行  |  3.95 KB

/*
 * Copyright (C) 2018 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Feature extraction language (FEL) parser.
//
// BNF grammar for FEL:
//
// <feature model> ::= { <feature extractor> }
//
// <feature extractor> ::= <extractor spec> |
//                         <extractor spec> '.' <feature extractor> |
//                         <extractor spec> '{' { <feature extractor> } '}'
//
// <extractor spec> ::= <extractor type>
//                      [ '(' <parameter list> ')' ]
//                      [ ':' <extractor name> ]
//
// <parameter list> = ( <parameter> | <argument> ) { ',' <parameter> }
//
// <parameter> ::= <parameter name> '=' <parameter value>
//
// <extractor type> ::= NAME
// <extractor name> ::= NAME | STRING
// <argument> ::= NUMBER
// <parameter name> ::= NAME
// <parameter value> ::= NUMBER | STRING | NAME

#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEL_PARSER_H_
#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEL_PARSER_H_

#include <string>

#include "lang_id/common/fel/feature-descriptors.h"
#include "lang_id/common/lite_base/logging.h"

namespace libtextclassifier3 {
namespace mobile {

class FELParser {
 public:
  // Parses fml specification into feature extractor descriptor.
  // Returns true on success, false on error (e.g., syntax errors).
  bool Parse(const string &source, FeatureExtractorDescriptor *result);

 private:
  // Initializes the parser with the source text.
  // Returns true on success, false on syntax error.
  bool Initialize(const string &source);

  // Outputs an error message, with context info.
  void ReportError(const string &error_message);

  // Moves to the next input character.
  void Next();

  // Moves to the next input item.  Sets item_text_ and item_type_ accordingly.
  // Returns true on success, false on syntax error.
  bool NextItem();

  // Parses a feature descriptor.
  // Returns true on success, false on syntax error.
  bool ParseFeature(FeatureFunctionDescriptor *result);

  // Parses a parameter specification.
  // Returns true on success, false on syntax error.
  bool ParseParameter(FeatureFunctionDescriptor *result);

  // Returns true if end of source input has been reached.
  bool eos() const { return current_ >= source_.end(); }

  // Returns current character.  Other methods should access the current
  // character through this method (instead of using *current_ directly): this
  // method performs extra safety checks.
  //
  // In case of an unsafe access, returns '\0'.
  char CurrentChar() const {
    if ((current_ >= source_.begin()) && (current_ < source_.end())) {
      return *current_;
    } else {
      SAFTM_LOG(ERROR) << "Unsafe char read";
      return '\0';
    }
  }

  // Item types.
  enum ItemTypes {
    END = 0,
    NAME = -1,
    NUMBER = -2,
    STRING = -3,
  };

  // Source text.
  string source_;

  // Current input position.
  string::iterator current_;

  // Line number for current input position.
  int line_number_;

  // Start position for current item.
  string::iterator item_start_;

  // Start position for current line.
  string::iterator line_start_;

  // Line number for current item.
  int item_line_number_;

  // Item type for current item. If this is positive it is interpreted as a
  // character. If it is negative it is interpreted as an item type.
  int item_type_;

  // Text for current item.
  string item_text_;
};

}  // namespace mobile
}  // namespace nlp_saft

#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEL_PARSER_H_