C++程序  |  482行  |  16.06 KB

// state-table.h

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
// Author: riley@google.com (Michael Riley)
//
// \file
// Classes for representing the mapping between state tuples and state Ids.

#ifndef FST_LIB_STATE_TABLE_H__
#define FST_LIB_STATE_TABLE_H__

#include <deque>
using std::deque;
#include <vector>
using std::vector;

#include <fst/bi-table.h>
#include <fst/expanded-fst.h>


namespace fst {

// STATE TABLES - these determine the bijective mapping between state
// tuples (e.g. in composition triples of two FST states and a
// composition filter state) and their corresponding state IDs.
// They are classes, templated on state tuples, of the form:
//
// template <class T>
// class StateTable {
//  public:
//   typedef typename T StateTuple;
//
//   // Required constructors.
//   StateTable();
//
//   // Lookup state ID by tuple. If it doesn't exist, then add it.
//   StateId FindState(const StateTuple &);
//   // Lookup state tuple by state ID.
//   const StateTuple<StateId> &Tuple(StateId) const;
//   // # of stored tuples.
//   StateId Size() const;
// };
//
// A state tuple has the form:
//
// template <class S>
// struct StateTuple {
//   typedef typename S StateId;
//
//   // Required constructors.
//   StateTuple();
//   StateTuple(const StateTuple &);
// };


// An implementation using a hash map for the tuple to state ID mapping.
// The state tuple T must have == defined. H is the hash function.
template <class T, class H>
class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
 public:
  typedef T StateTuple;
  typedef typename StateTuple::StateId StateId;
  using HashBiTable<StateId, T, H>::FindId;
  using HashBiTable<StateId, T, H>::FindEntry;
  using HashBiTable<StateId, T, H>::Size;

  HashStateTable() : HashBiTable<StateId, T, H>() {}

  // Reserves space for table_size elements.
  explicit HashStateTable(size_t table_size)
      : HashBiTable<StateId, T, H>(table_size) {}

  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
};


// An implementation using a hash map for the tuple to state ID mapping.
// The state tuple T must have == defined. H is the hash function.
template <class T, class H>
class CompactHashStateTable
    : public CompactHashBiTable<typename T::StateId, T, H> {
 public:
  typedef T StateTuple;
  typedef typename StateTuple::StateId StateId;
  using CompactHashBiTable<StateId, T, H>::FindId;
  using CompactHashBiTable<StateId, T, H>::FindEntry;
  using CompactHashBiTable<StateId, T, H>::Size;

  CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {}

  // Reserves space for 'table_size' elements.
  explicit CompactHashStateTable(size_t table_size)
      : CompactHashBiTable<StateId, T, H>(table_size) {}

  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
};

// An implementation using a vector for the tuple to state mapping.
// It is passed a function object FP that should fingerprint tuples
// uniquely to an integer that can used as a vector index. Normally,
// VectorStateTable constructs the FP object.  The user can instead
// pass in this object; in that case, VectorStateTable takes its
// ownership.
template <class T, class FP>
class VectorStateTable
    : public VectorBiTable<typename T::StateId, T, FP> {
 public:
  typedef T StateTuple;
  typedef typename StateTuple::StateId StateId;
  using VectorBiTable<StateId, T, FP>::FindId;
  using VectorBiTable<StateId, T, FP>::FindEntry;
  using VectorBiTable<StateId, T, FP>::Size;
  using VectorBiTable<StateId, T, FP>::Fingerprint;

  // Reserves space for 'table_size' elements.
  explicit VectorStateTable(FP *fp = 0, size_t table_size = 0)
      : VectorBiTable<StateId, T, FP>(fp, table_size) {}

  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
};


// An implementation using a vector and a compact hash table. The
// selecting functor S returns true for tuples to be hashed in the
// vector.  The fingerprinting functor FP returns a unique fingerprint
// for each tuple to be hashed in the vector (these need to be
// suitable for indexing in a vector).  The hash functor H is used when
// hashing tuple into the compact hash table.
template <class T, class S, class FP, class H>
class VectorHashStateTable
    : public VectorHashBiTable<typename T::StateId, T, S, FP, H> {
 public:
  typedef T StateTuple;
  typedef typename StateTuple::StateId StateId;
  using VectorHashBiTable<StateId, T, S, FP, H>::FindId;
  using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry;
  using VectorHashBiTable<StateId, T, S, FP, H>::Size;
  using VectorHashBiTable<StateId, T, S, FP, H>::Selector;
  using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint;
  using VectorHashBiTable<StateId, T, S, FP, H>::Hash;

  VectorHashStateTable(S *s, FP *fp, H *h,
                       size_t vector_size = 0,
                       size_t tuple_size = 0)
      : VectorHashBiTable<StateId, T, S, FP, H>(
          s, fp, h, vector_size, tuple_size) {}

  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
};


// An implementation using a hash map for the tuple to state ID
// mapping. This version permits erasing of states.  The state tuple T
// must have == defined and its default constructor must produce a
// tuple that will never be seen. F is the hash function.
template <class T, class F>
class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> {
 public:
  typedef T StateTuple;
  typedef typename StateTuple::StateId StateId;
  using ErasableBiTable<StateId, T, F>::FindId;
  using ErasableBiTable<StateId, T, F>::FindEntry;
  using ErasableBiTable<StateId, T, F>::Size;
  using ErasableBiTable<StateId, T, F>::Erase;

  ErasableStateTable() : ErasableBiTable<StateId, T, F>() {}
  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
};

//
// COMPOSITION STATE TUPLES AND TABLES
//
// The composition state table has the form:
//
// template <class A, class F>
// class ComposeStateTable {
//  public:
//   typedef A Arc;
//   typedef F FilterState;
//   typedef typename A::StateId StateId;
//   typedef ComposeStateTuple<StateId> StateTuple;
//
//   // Required constructors. Copy constructor does not copy state.
//   ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
//   ComposeStateTable(const ComposeStateTable<A, F> &table);
//   // Lookup state ID by tuple. If it doesn't exist, then add it.
//   StateId FindState(const StateTuple &);
//   // Lookup state tuple by state ID.
//   const StateTuple<StateId> &Tuple(StateId) const;
//   // # of stored tuples.
//   StateId Size() const;
//   // Return true if error encountered
//   bool Error() const;
// };

// Represents the composition state.
template <typename S, typename F>
struct ComposeStateTuple {
  typedef S StateId;
  typedef F FilterState;

  ComposeStateTuple()
      : state_id1(kNoStateId), state_id2(kNoStateId),
        filter_state(FilterState::NoState()) {}

  ComposeStateTuple(StateId s1, StateId s2, const FilterState &f)
      : state_id1(s1), state_id2(s2), filter_state(f) {}

  StateId state_id1;         // State Id on fst1
  StateId state_id2;         // State Id on fst2
  FilterState filter_state;  // State of composition filter
};

// Equality of composition state tuples.
template <typename S, typename F>
inline bool operator==(const ComposeStateTuple<S, F>& x,
                       const ComposeStateTuple<S, F>& y) {
  if (&x == &y)
    return true;
  return x.state_id1 == y.state_id1 &&
      x.state_id2 == y.state_id2 &&
      x.filter_state == y.filter_state;
}


// Hashing of composition state tuples.
template <typename S, typename F>
class ComposeHash {
 public:
  size_t operator()(const ComposeStateTuple<S, F>& t) const {
    return t.state_id1 + t.state_id2 * kPrime0 +
        t.filter_state.Hash() * kPrime1;
  }
 private:
  static const size_t kPrime0;
  static const size_t kPrime1;
};

template <typename S, typename F>
const size_t ComposeHash<S, F>::kPrime0 = 7853;

template <typename S, typename F>
const size_t ComposeHash<S, F>::kPrime1 = 7867;


// A HashStateTable over composition tuples.
template <typename A,
          typename F,
          typename H =
          CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>,
                                ComposeHash<typename A::StateId, F> > >
class GenericComposeStateTable : public H {
 public:
  typedef A Arc;
  typedef typename A::StateId StateId;
  typedef F FilterState;
  typedef ComposeStateTuple<StateId, F> StateTuple;

  GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}

  // Reserves space for 'table_size' elements.
  GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2,
                           size_t table_size) : H(table_size) {}

  bool Error() const { return false; }

 private:
  void operator=(const GenericComposeStateTable<A, F> &table);  // disallow
};


//  Fingerprint for general composition tuples.
template  <typename S, typename F>
class ComposeFingerprint {
 public:
  typedef S StateId;
  typedef F FilterState;
  typedef ComposeStateTuple<S, F> StateTuple;

  // Required but suboptimal constructor.
  ComposeFingerprint() : mult1_(8192), mult2_(8192) {
    LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
  }

  // Constructor is provided the sizes of the input FSTs
  ComposeFingerprint(StateId nstates1, StateId nstates2)
      : mult1_(nstates1), mult2_(nstates1 * nstates2) { }

  size_t operator()(const StateTuple &tuple) {
    return tuple.state_id1 + tuple.state_id2 * mult1_ +
        tuple.filter_state.Hash() * mult2_;
  }

 private:
  ssize_t mult1_;
  ssize_t mult2_;
};


// Useful when the first composition state determines the tuple.
template  <typename S, typename F>
class ComposeState1Fingerprint {
 public:
  typedef S StateId;
  typedef F FilterState;
  typedef ComposeStateTuple<S, F> StateTuple;

  size_t operator()(const StateTuple &tuple) { return tuple.state_id1; }
};


// Useful when the second composition state determines the tuple.
template  <typename S, typename F>
class ComposeState2Fingerprint {
 public:
  typedef S StateId;
  typedef F FilterState;
  typedef ComposeStateTuple<S, F> StateTuple;

  size_t operator()(const StateTuple &tuple) { return tuple.state_id2; }
};


// A VectorStateTable over composition tuples.  This can be used when
// the product of number of states in FST1 and FST2 (and the
// composition filter state hash) is manageable. If the FSTs are not
// expanded Fsts, they will first have their states counted.
template  <typename A, typename F>
class ProductComposeStateTable : public
VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
                 ComposeFingerprint<typename A::StateId, F> > {
 public:
  typedef A Arc;
  typedef typename A::StateId StateId;
  typedef F FilterState;
  typedef ComposeStateTuple<StateId, F> StateTuple;
  typedef VectorStateTable<StateTuple,
                           ComposeFingerprint<StateId, F> > StateTable;

  // Reserves space for 'table_size' elements.
  ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2,
                           size_t table_size = 0)
      : StateTable(new ComposeFingerprint<StateId, F>(CountStates(fst1),
                                                      CountStates(fst2)),
                   table_size) {}

  ProductComposeStateTable(const ProductComposeStateTable<A, F> &table)
      : StateTable(new ComposeFingerprint<StateId, F>(table.Fingerprint())) {}

  bool Error() const { return false; }

 private:
  void operator=(const ProductComposeStateTable<A, F> &table);  // disallow
};

// A VectorStateTable over composition tuples.  This can be used when
// FST1 is a string (satisfies kStringProperties) and FST2 is
// epsilon-free and deterministic. It should be used with a
// composition filter that creates at most one filter state per tuple
// under these conditions (e.g. SequenceComposeFilter or
// MatchComposeFilter).
template <typename A, typename F>
class StringDetComposeStateTable : public
VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
                 ComposeState1Fingerprint<typename A::StateId, F> > {
 public:
  typedef A Arc;
  typedef typename A::StateId StateId;
  typedef F FilterState;
  typedef ComposeStateTuple<StateId, F> StateTuple;
  typedef VectorStateTable<StateTuple,
                           ComposeState1Fingerprint<StateId, F> > StateTable;

  StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
      : error_(false) {
    uint64 props1 = kString;
    uint64 props2 = kIDeterministic | kNoIEpsilons;
    if (fst1.Properties(props1, true) != props1 ||
        fst2.Properties(props2, true) != props2) {
      FSTERROR() << "StringDetComposeStateTable: fst1 not a string or"
                 << " fst2 not input deterministic and epsilon-free";
      error_ = true;
    }
  }

  StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table)
      : StateTable(table), error_(table.error_) {}

  bool Error() const { return error_; }

 private:
  bool error_;

  void operator=(const StringDetComposeStateTable<A, F> &table);  // disallow
};


// A VectorStateTable over composition tuples.  This can be used when
// FST2 is a string (satisfies kStringProperties) and FST1 is
// epsilon-free and deterministic. It should be used with a
// composition filter that creates at most one filter state per tuple
// under these conditions (e.g. SequenceComposeFilter or
// MatchComposeFilter).
template <typename A, typename F>
class DetStringComposeStateTable : public
VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
                 ComposeState2Fingerprint<typename A::StateId, F> > {
 public:
  typedef A Arc;
  typedef typename A::StateId StateId;
  typedef F FilterState;
  typedef ComposeStateTuple<StateId, F> StateTuple;
  typedef VectorStateTable<StateTuple,
                           ComposeState2Fingerprint<StateId, F> > StateTable;

  DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
      :error_(false) {
    uint64 props1 = kODeterministic | kNoOEpsilons;
    uint64 props2 = kString;
    if (fst1.Properties(props1, true) != props1 ||
        fst2.Properties(props2, true) != props2) {
      FSTERROR() << "StringDetComposeStateTable: fst2 not a string or"
                 << " fst1 not output deterministic and epsilon-free";
      error_ = true;
    }
  }

  DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table)
      : StateTable(table), error_(table.error_) {}

  bool Error() const { return error_; }

 private:
  bool error_;

  void operator=(const DetStringComposeStateTable<A, F> &table);  // disallow
};


// An ErasableStateTable over composition tuples. The Erase(StateId) method
// can be called if the user either is sure that composition will never return
// to that tuple or doesn't care that if it does, it is assigned a new
// state ID.
template <typename A, typename F>
class ErasableComposeStateTable : public
ErasableStateTable<ComposeStateTuple<typename A::StateId, F>,
                   ComposeHash<typename A::StateId, F> > {
 public:
  typedef A Arc;
  typedef typename A::StateId StateId;
  typedef F FilterState;
  typedef ComposeStateTuple<StateId, F> StateTuple;

  ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}

  bool Error() const { return false; }

 private:
  void operator=(const ErasableComposeStateTable<A, F> &table);  // disallow
};

}  // namespace fst

#endif  // FST_LIB_STATE_TABLE_H__