C++程序  |  498行  |  15.19 KB

// const-fst.h

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
// Author: riley@google.com (Michael Riley)
//
// \file
// Simple concrete immutable FST whose states and arcs are each stored
// in single arrays.

#ifndef FST_LIB_CONST_FST_H__
#define FST_LIB_CONST_FST_H__

#include <string>
#include <vector>
using std::vector;

#include <fst/expanded-fst.h>
#include <fst/fst-decl.h>  // For optional argument declarations
#include <fst/mapped-file.h>
#include <fst/test-properties.h>
#include <fst/util.h>


namespace fst {

template <class A, class U> class ConstFst;
template <class F, class G> void Cast(const F &, G *);

// States and arcs each implemented by single arrays, templated on the
// Arc definition. The unsigned type U is used to represent indices into
// the arc array.
template <class A, class U>
class ConstFstImpl : public FstImpl<A> {
 public:
  using FstImpl<A>::SetInputSymbols;
  using FstImpl<A>::SetOutputSymbols;
  using FstImpl<A>::SetType;
  using FstImpl<A>::SetProperties;
  using FstImpl<A>::Properties;

  typedef A Arc;
  typedef typename A::Weight Weight;
  typedef typename A::StateId StateId;
  typedef U Unsigned;

  ConstFstImpl()
      : states_region_(0), arcs_region_(0), states_(0), arcs_(0), nstates_(0),
        narcs_(0), start_(kNoStateId) {
    string type = "const";
    if (sizeof(U) != sizeof(uint32)) {
      string size;
      Int64ToStr(8 * sizeof(U), &size);
      type += size;
    }
    SetType(type);
    SetProperties(kNullProperties | kStaticProperties);
  }

  explicit ConstFstImpl(const Fst<A> &fst);

  ~ConstFstImpl() {
    delete arcs_region_;
    delete states_region_;
  }

  StateId Start() const { return start_; }

  Weight Final(StateId s) const { return states_[s].final; }

  StateId NumStates() const { return nstates_; }

  size_t NumArcs(StateId s) const { return states_[s].narcs; }

  size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; }

  size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; }

  static ConstFstImpl<A, U> *Read(istream &strm, const FstReadOptions &opts);

  A *Arcs(StateId s) { return arcs_ + states_[s].pos; }

  // Provide information needed for generic state iterator
  void InitStateIterator(StateIteratorData<A> *data) const {
    data->base = 0;
    data->nstates = nstates_;
  }

  // Provide information needed for the generic arc iterator
  void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
    data->base = 0;
    data->arcs = arcs_ + states_[s].pos;
    data->narcs = states_[s].narcs;
    data->ref_count = 0;
  }

 private:
  friend class ConstFst<A, U>;  // Allow finding narcs_, nstates_ during Write

  // States implemented by array *states_ below, arcs by (single) *arcs_.
  struct State {
    Weight final;                // Final weight
    Unsigned pos;                // Start of state's arcs in *arcs_
    Unsigned narcs;              // Number of arcs (per state)
    Unsigned niepsilons;         // # of input epsilons
    Unsigned noepsilons;         // # of output epsilons
    State() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {}
  };

  // Properties always true of this Fst class
  static const uint64 kStaticProperties = kExpanded;
  // Current unaligned file format version. The unaligned version was added and
  // made the default since the aligned version does not work on pipes.
  static const int kFileVersion = 2;
  // Current aligned file format version
  static const int kAlignedFileVersion = 1;
  // Minimum file format version supported
  static const int kMinFileVersion = 1;

  MappedFile *states_region_;    // Mapped file for states
  MappedFile *arcs_region_;      // Mapped file for arcs
  State *states_;                // States represenation
  A *arcs_;                      // Arcs representation
  StateId nstates_;              // Number of states
  size_t narcs_;                 // Number of arcs (per FST)
  StateId start_;                // Initial state

  DISALLOW_COPY_AND_ASSIGN(ConstFstImpl);
};

template <class A, class U>
const uint64 ConstFstImpl<A, U>::kStaticProperties;
template <class A, class U>
const int ConstFstImpl<A, U>::kFileVersion;
template <class A, class U>
const int ConstFstImpl<A, U>::kAlignedFileVersion;
template <class A, class U>
const int ConstFstImpl<A, U>::kMinFileVersion;


template<class A, class U>
ConstFstImpl<A, U>::ConstFstImpl(const Fst<A> &fst) : nstates_(0), narcs_(0) {
  string type = "const";
  if (sizeof(U) != sizeof(uint32)) {
    string size;
    Int64ToStr(sizeof(U) * 8, &size);
    type += size;
  }
  SetType(type);
  SetInputSymbols(fst.InputSymbols());
  SetOutputSymbols(fst.OutputSymbols());
  start_ = fst.Start();

  // Count # of states and arcs.
  for (StateIterator< Fst<A> > siter(fst);
       !siter.Done();
       siter.Next()) {
    ++nstates_;
    StateId s = siter.Value();
    for (ArcIterator< Fst<A> > aiter(fst, s);
         !aiter.Done();
         aiter.Next())
      ++narcs_;
  }
  states_region_ = MappedFile::Allocate(nstates_ * sizeof(*states_));
  arcs_region_ = MappedFile::Allocate(narcs_ * sizeof(*arcs_));
  states_ = reinterpret_cast<State*>(states_region_->mutable_data());
  arcs_ = reinterpret_cast<A*>(arcs_region_->mutable_data());
  size_t pos = 0;
  for (StateId s = 0; s < nstates_; ++s) {
    states_[s].final = fst.Final(s);
    states_[s].pos = pos;
    states_[s].narcs = 0;
    states_[s].niepsilons = 0;
    states_[s].noepsilons = 0;
    for (ArcIterator< Fst<A> > aiter(fst, s);
         !aiter.Done();
         aiter.Next()) {
      const A &arc = aiter.Value();
      ++states_[s].narcs;
      if (arc.ilabel == 0)
        ++states_[s].niepsilons;
      if (arc.olabel == 0)
        ++states_[s].noepsilons;
      arcs_[pos++] = arc;
    }
  }
  SetProperties(fst.Properties(kCopyProperties, true) | kStaticProperties);
}


template<class A, class U>
ConstFstImpl<A, U> *ConstFstImpl<A, U>::Read(istream &strm,
                                             const FstReadOptions &opts) {
  ConstFstImpl<A, U> *impl = new ConstFstImpl<A, U>;
  FstHeader hdr;
  if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
    delete impl;
    return 0;
  }
  impl->start_ = hdr.Start();
  impl->nstates_ = hdr.NumStates();
  impl->narcs_ = hdr.NumArcs();

  // Ensures compatibility
  if (hdr.Version() == kAlignedFileVersion)
    hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);

  if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
    LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
    delete impl;
    return 0;
  }

  size_t b = impl->nstates_ * sizeof(typename ConstFstImpl<A, U>::State);
  impl->states_region_ = MappedFile::Map(&strm, opts, b);
  if (!strm || impl->states_region_ == NULL) {
    LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
    delete impl;
    return 0;
  }
  impl->states_ = reinterpret_cast<State*>(
      impl->states_region_->mutable_data());
  if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
    LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
    delete impl;
    return 0;
  }

  b = impl->narcs_ * sizeof(A);
  impl->arcs_region_ = MappedFile::Map(&strm, opts, b);
  if (!strm || impl->arcs_region_ == NULL) {
    LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
    delete impl;
    return 0;
  }
  impl->arcs_ = reinterpret_cast<A*>(impl->arcs_region_->mutable_data());
  return impl;
}

// Simple concrete immutable FST.  This class attaches interface to
// implementation and handles reference counting, delegating most
// methods to ImplToExpandedFst. The unsigned type U is used to
// represent indices into the arc array (uint32 by default, declared
// in fst-decl.h).
template <class A, class U>
class ConstFst : public ImplToExpandedFst< ConstFstImpl<A, U> > {
 public:
  friend class StateIterator< ConstFst<A, U> >;
  friend class ArcIterator< ConstFst<A, U> >;
  template <class F, class G> void friend Cast(const F &, G *);

  typedef A Arc;
  typedef typename A::StateId StateId;
  typedef ConstFstImpl<A, U> Impl;
  typedef U Unsigned;

  ConstFst() : ImplToExpandedFst<Impl>(new Impl()) {}

  explicit ConstFst(const Fst<A> &fst)
      : ImplToExpandedFst<Impl>(new Impl(fst)) {}

  ConstFst(const ConstFst<A, U> &fst) : ImplToExpandedFst<Impl>(fst) {}

  // Get a copy of this ConstFst. See Fst<>::Copy() for further doc.
  virtual ConstFst<A, U> *Copy(bool safe = false) const {
    return new ConstFst<A, U>(*this);
  }

  // Read a ConstFst from an input stream; return NULL on error
  static ConstFst<A, U> *Read(istream &strm, const FstReadOptions &opts) {
    Impl* impl = Impl::Read(strm, opts);
    return impl ? new ConstFst<A, U>(impl) : 0;
  }

  // Read a ConstFst from a file; return NULL on error
  // Empty filename reads from standard input
  static ConstFst<A, U> *Read(const string &filename) {
    Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
    return impl ? new ConstFst<A, U>(impl) : 0;
  }

  virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
    return WriteFst(*this, strm, opts);
  }

  virtual bool Write(const string &filename) const {
    return Fst<A>::WriteFile(filename);
  }

  template <class F>
  static bool WriteFst(const F &fst, ostream &strm,
                       const FstWriteOptions &opts);

  virtual void InitStateIterator(StateIteratorData<Arc> *data) const {
    GetImpl()->InitStateIterator(data);
  }

  virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
    GetImpl()->InitArcIterator(s, data);
  }

 private:
  explicit ConstFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}

  // Makes visible to friends.
  Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }

  void SetImpl(Impl *impl, bool own_impl = true) {
    ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
  }

  // Use overloading to extract the type of the argument.
  static Impl* GetImplIfConstFst(const ConstFst &const_fst) {
    return const_fst.GetImpl();
  }

  // Note that this does not give privileged treatment to subtypes of ConstFst.
  template<typename NonConstFst>
  static Impl* GetImplIfConstFst(const NonConstFst& fst) {
    return NULL;
  }

  void operator=(const ConstFst<A, U> &fst);  // disallow
};

// Writes Fst in Const format, potentially with a pass over the machine
// before writing to compute number of states and arcs.
//
template <class A, class U>
template <class F>
bool ConstFst<A, U>::WriteFst(const F &fst, ostream &strm,
                              const FstWriteOptions &opts) {
  int file_version = opts.align ? ConstFstImpl<A, U>::kAlignedFileVersion :
      ConstFstImpl<A, U>::kFileVersion;
  size_t num_arcs = -1, num_states = -1;
  size_t start_offset = 0;
  bool update_header = true;
  if (Impl* impl = GetImplIfConstFst(fst)) {
    num_arcs = impl->narcs_;
    num_states = impl->nstates_;
    update_header = false;
  } else if ((start_offset = strm.tellp()) == -1) {
    // precompute values needed for header when we cannot seek to rewrite it.
    num_arcs = 0;
    num_states = 0;
    for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
      num_arcs += fst.NumArcs(siter.Value());
      ++num_states;
    }
    update_header = false;
  }
  FstHeader hdr;
  hdr.SetStart(fst.Start());
  hdr.SetNumStates(num_states);
  hdr.SetNumArcs(num_arcs);
  string type = "const";
  if (sizeof(U) != sizeof(uint32)) {
    string size;
    Int64ToStr(8 * sizeof(U), &size);
    type += size;
  }
  uint64 properties = fst.Properties(kCopyProperties, true) |
      ConstFstImpl<A, U>::kStaticProperties;
  FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties,
                             &hdr);
  if (opts.align && !AlignOutput(strm)) {
    LOG(ERROR) << "Could not align file during write after header";
    return false;
  }
  size_t pos = 0, states = 0;
  typename ConstFstImpl<A, U>::State state;
  for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
    state.final = fst.Final(siter.Value());
    state.pos = pos;
    state.narcs = fst.NumArcs(siter.Value());
    state.niepsilons = fst.NumInputEpsilons(siter.Value());
    state.noepsilons = fst.NumOutputEpsilons(siter.Value());
    strm.write(reinterpret_cast<const char *>(&state), sizeof(state));
    pos += state.narcs;
    ++states;
  }
  hdr.SetNumStates(states);
  hdr.SetNumArcs(pos);
  if (opts.align && !AlignOutput(strm)) {
    LOG(ERROR) << "Could not align file during write after writing states";
  }
  for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
    StateId s = siter.Value();
    for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
      const A &arc = aiter.Value();
      strm.write(reinterpret_cast<const char *>(&arc), sizeof(arc));
    }
  }
  strm.flush();
  if (!strm) {
    LOG(ERROR) << "ConstFst Write write failed: " << opts.source;
    return false;
  }
  if (update_header) {
    return FstImpl<A>::UpdateFstHeader(fst, strm, opts, file_version, type,
                                       properties, &hdr, start_offset);
  } else {
    if (hdr.NumStates() != num_states) {
      LOG(ERROR) << "Inconsistent number of states observed during write";
      return false;
    }
    if (hdr.NumArcs() != num_arcs) {
      LOG(ERROR) << "Inconsistent number of arcs observed during write";
      return false;
    }
  }
  return true;
}

// Specialization for ConstFst; see generic version in fst.h
// for sample usage (but use the ConstFst type!). This version
// should inline.
template <class A, class U>
class StateIterator< ConstFst<A, U> > {
 public:
  typedef typename A::StateId StateId;

  explicit StateIterator(const ConstFst<A, U> &fst)
      : nstates_(fst.GetImpl()->NumStates()), s_(0) {}

  bool Done() const { return s_ >= nstates_; }

  StateId Value() const { return s_; }

  void Next() { ++s_; }

  void Reset() { s_ = 0; }

 private:
  StateId nstates_;
  StateId s_;

  DISALLOW_COPY_AND_ASSIGN(StateIterator);
};


// Specialization for ConstFst; see generic version in fst.h
// for sample usage (but use the ConstFst type!). This version
// should inline.
template <class A, class U>
class ArcIterator< ConstFst<A, U> > {
 public:
  typedef typename A::StateId StateId;

  ArcIterator(const ConstFst<A, U> &fst, StateId s)
      : arcs_(fst.GetImpl()->Arcs(s)),
        narcs_(fst.GetImpl()->NumArcs(s)), i_(0) {}

  bool Done() const { return i_ >= narcs_; }

  const A& Value() const { return arcs_[i_]; }

  void Next() { ++i_; }

  size_t Position() const { return i_; }

  void Reset() { i_ = 0; }

  void Seek(size_t a) { i_ = a; }

  uint32 Flags() const {
    return kArcValueFlags;
  }

  void SetFlags(uint32 f, uint32 m) {}

 private:
  const A *arcs_;
  size_t narcs_;
  size_t i_;

  DISALLOW_COPY_AND_ASSIGN(ArcIterator);
};

// A useful alias when using StdArc.
typedef ConstFst<StdArc> StdConstFst;

}  // namespace fst

#endif  // FST_LIB_CONST_FST_H__