C++程序  |  291行  |  7.79 KB

// partition.h

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
// Author: johans@google.com (Johan Schalkwyk)
//
// \file Functions and classes to create a partition of states
//

#ifndef FST_LIB_PARTITION_H__
#define FST_LIB_PARTITION_H__

#include <vector>
using std::vector;
#include <algorithm>


#include <fst/queue.h>



namespace fst {

template <typename T> class PartitionIterator;

// \class Partition
// \brief Defines a partitioning of states. Typically used to represent
//        equivalence classes for Fst operations like minimization.
//
template <typename T>
class Partition {
  friend class PartitionIterator<T>;

  struct Element {
   Element() : value(0), next(0), prev(0) {}
   Element(T v) : value(v), next(0), prev(0) {}

   T        value;
   Element* next;
   Element* prev;
  };

 public:
  Partition() {}

  Partition(T num_states) {
    Initialize(num_states);
  }

  ~Partition() {
    for (size_t i = 0; i < elements_.size(); ++i)
      delete elements_[i];
  }

  // Create an empty partition for num_states. At initialization time
  // all elements are not assigned to a class (i.e class_index = -1).
  // Initialize just creates num_states of elements. All element
  // operations are then done by simply disconnecting the element from
  // it current class and placing it at the head of the next class.
  void Initialize(size_t num_states) {
    for (size_t i = 0; i < elements_.size(); ++i)
      delete elements_[i];
    elements_.clear();
    classes_.clear();
    class_index_.clear();

    elements_.resize(num_states);
    class_index_.resize(num_states, -1);
    class_size_.reserve(num_states);
    for (size_t i = 0; i < num_states; ++i)
      elements_[i] = new Element(i);
    num_states_ = num_states;
  }

  // Add a class, resize classes_ and class_size_ resource by 1.
  size_t AddClass() {
    size_t num_classes = classes_.size();
    classes_.resize(num_classes + 1, 0);
    class_size_.resize(num_classes + 1, 0);
    class_split_.resize(num_classes + 1, 0);
    split_size_.resize(num_classes + 1, 0);
    return num_classes;
  }

  void AllocateClasses(T num_classes) {
    size_t n = classes_.size() + num_classes;
    classes_.resize(n, 0);
    class_size_.resize(n, 0);
    class_split_.resize(n, 0);
    split_size_.resize(n, 0);
  }

  // Add element_id to class_id. The Add method is used to initialize
  // partition. Once elements have been added to a class, you need to
  // use the Move() method move an element from once class to another.
  void Add(T element_id, T class_id) {
    Element* element = elements_[element_id];

    if (classes_[class_id])
      classes_[class_id]->prev = element;
    element->next = classes_[class_id];
    element->prev = 0;
    classes_[class_id] = element;

    class_index_[element_id] = class_id;
    class_size_[class_id]++;
  }

  // Move and element_id to class_id. Disconnects (removes) element
  // from it current class and
  void Move(T element_id, T class_id) {
    T old_class_id = class_index_[element_id];

    Element* element = elements_[element_id];
    if (element->next) element->next->prev = element->prev;
    if (element->prev) element->prev->next = element->next;
    else               classes_[old_class_id] = element->next;

    Add(element_id, class_id);
    class_size_[old_class_id]--;
  }

  // split class on the element_id
  void SplitOn(T element_id) {
    T class_id = class_index_[element_id];
    if (class_size_[class_id] == 1) return;

    // first time class is split
    if (split_size_[class_id] == 0)
      visited_classes_.push_back(class_id);

    // increment size of split (set of element at head of chain)
    split_size_[class_id]++;

    // update split point
    if (class_split_[class_id] == 0)
      class_split_[class_id] = classes_[class_id];
    if (class_split_[class_id] == elements_[element_id])
      class_split_[class_id] = elements_[element_id]->next;

    // move to head of chain in same class
    Move(element_id, class_id);
  }

  // Finalize class_id, split if required, and update class_splits,
  // class indices of the newly created class. Returns the new_class id
  // or -1 if no new class was created.
  T SplitRefine(T class_id) {
    // only split if necessary
    if (class_size_[class_id] == split_size_[class_id]) {
      class_split_[class_id] = 0;
      split_size_[class_id] = 0;
      return -1;
    } else {

      T new_class = AddClass();
      size_t remainder = class_size_[class_id] - split_size_[class_id];
      if (remainder < split_size_[class_id]) {  // add smaller
        Element* split_el   = class_split_[class_id];
        classes_[new_class] = split_el;
        class_size_[class_id] = split_size_[class_id];
        class_size_[new_class] = remainder;
        split_el->prev->next = 0;
        split_el->prev = 0;
      } else {
        Element* split_el   = class_split_[class_id];
        classes_[new_class] = classes_[class_id];
        class_size_[class_id] = remainder;
        class_size_[new_class] = split_size_[class_id];
        split_el->prev->next = 0;
        split_el->prev = 0;
        classes_[class_id] = split_el;
      }

      // update class index for element in new class
      for (Element* el = classes_[new_class]; el; el = el->next)
        class_index_[el->value] = new_class;

      class_split_[class_id] = 0;
      split_size_[class_id] = 0;

      return new_class;
    }
  }

  // Once all states have been processed for a particular class C, we
  // can finalize the split. FinalizeSplit() will update each block in the
  // partition, create new once and update the queue of active classes
  // that require further refinement.
  template <class Queue>
  void FinalizeSplit(Queue* L) {
    for (size_t i = 0; i < visited_classes_.size(); ++i) {
      T new_class = SplitRefine(visited_classes_[i]);
      if (new_class != -1 && L)
        L->Enqueue(new_class);
    }
    visited_classes_.clear();
  }


  const T class_id(T element_id) const {
    return class_index_[element_id];
  }

  const vector<T>& class_sizes() const {
    return class_size_;
  }

  const size_t class_size(T class_id)  const {
    return class_size_[class_id];
  }

  const T num_classes() const {
    return classes_.size();
  }


 private:
  int num_states_;

  // container of all elements (owner of ptrs)
  vector<Element*> elements_;

  // linked list of elements belonging to class
  vector<Element*> classes_;

  // pointer to split point for each class
  vector<Element*> class_split_;

  // class index of element
  vector<T> class_index_;

  // class sizes
  vector<T> class_size_;

  // size of split for each class
  vector<T> split_size_;

  // set of visited classes to be used in split refine
  vector<T> visited_classes_;
};


// iterate over members of a class in a partition
template <typename T>
class PartitionIterator {
  typedef typename Partition<T>::Element Element;
 public:
  PartitionIterator(const Partition<T>& partition, T class_id)
      : p_(partition),
        element_(p_.classes_[class_id]),
        class_id_(class_id) {}

  bool Done() {
    return (element_ == 0);
  }

  const T Value() {
    return (element_->value);
  }

  void Next() {
    element_ = element_->next;
  }

  void Reset() {
    element_ = p_.classes_[class_id_];
  }

 private:
  const Partition<T>& p_;

  const Element* element_;

  T class_id_;
};
}  // namespace fst

#endif  // FST_LIB_PARTITION_H__