普通文本  |  197行  |  6.3 KB

#!/usr/bin/env python
# Copyright 2014 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

from Queue import Empty
from multiprocessing import Event, Process, Queue
import traceback


class NormalResult():
  def __init__(self, result):
    self.result = result
    self.exception = False
    self.break_now = False


class ExceptionResult():
  def __init__(self):
    self.exception = True
    self.break_now = False


class BreakResult():
  def __init__(self):
    self.exception = False
    self.break_now = True


class MaybeResult():
  def __init__(self, heartbeat, value):
    self.heartbeat = heartbeat
    self.value = value

  @staticmethod
  def create_heartbeat():
    return MaybeResult(True, None)

  @staticmethod
  def create_result(value):
    return MaybeResult(False, value)


def Worker(fn, work_queue, done_queue, done,
           process_context_fn=None, process_context_args=None):
  """Worker to be run in a child process.
  The worker stops on two conditions. 1. When the poison pill "STOP" is
  reached or 2. when the event "done" is set."""
  try:
    kwargs = {}
    if process_context_fn and process_context_args is not None:
      kwargs.update(process_context=process_context_fn(*process_context_args))
    for args in iter(work_queue.get, "STOP"):
      if done.is_set():
        break
      try:
        done_queue.put(NormalResult(fn(*args, **kwargs)))
      except Exception, e:
        traceback.print_exc()
        print(">>> EXCEPTION: %s" % e)
        done_queue.put(ExceptionResult())
  except KeyboardInterrupt:
    done_queue.put(BreakResult())


class Pool():
  """Distributes tasks to a number of worker processes.
  New tasks can be added dynamically even after the workers have been started.
  Requirement: Tasks can only be added from the parent process, e.g. while
  consuming the results generator."""

  # Factor to calculate the maximum number of items in the work/done queue.
  # Necessary to not overflow the queue's pipe if a keyboard interrupt happens.
  BUFFER_FACTOR = 4

  def __init__(self, num_workers, heartbeat_timeout=30):
    self.num_workers = num_workers
    self.processes = []
    self.terminated = False

    # Invariant: count >= #work_queue + #done_queue. It is greater when a
    # worker takes an item from the work_queue and before the result is
    # submitted to the done_queue. It is equal when no worker is working,
    # e.g. when all workers have finished, and when no results are processed.
    # Count is only accessed by the parent process. Only the parent process is
    # allowed to remove items from the done_queue and to add items to the
    # work_queue.
    self.count = 0
    self.work_queue = Queue()
    self.done_queue = Queue()
    self.done = Event()
    self.heartbeat_timeout = heartbeat_timeout

  def imap_unordered(self, fn, gen,
                     process_context_fn=None, process_context_args=None):
    """Maps function "fn" to items in generator "gen" on the worker processes
    in an arbitrary order. The items are expected to be lists of arguments to
    the function. Returns a results iterator. A result value of type
    MaybeResult either indicates a heartbeat of the runner, i.e. indicating
    that the runner is still waiting for the result to be computed, or it wraps
    the real result.

    Args:
      process_context_fn: Function executed once by each worker. Expected to
          return a process-context object. If present, this object is passed
          as additional argument to each call to fn.
      process_context_args: List of arguments for the invocation of
          process_context_fn. All arguments will be pickled and sent beyond the
          process boundary.
    """
    try:
      internal_error = False
      gen = iter(gen)
      self.advance = self._advance_more

      for w in xrange(self.num_workers):
        p = Process(target=Worker, args=(fn,
                                         self.work_queue,
                                         self.done_queue,
                                         self.done,
                                         process_context_fn,
                                         process_context_args))
        self.processes.append(p)
        p.start()

      self.advance(gen)
      while self.count > 0:
        while True:
          try:
            result = self.done_queue.get(timeout=self.heartbeat_timeout)
            break
          except Empty:
            # Indicate a heartbeat. The iterator will continue fetching the
            # next result.
            yield MaybeResult.create_heartbeat()
        self.count -= 1
        if result.exception:
          # TODO(machenbach): Handle a few known types of internal errors
          # gracefully, e.g. missing test files.
          internal_error = True
          continue
        elif result.break_now:
          # A keyboard interrupt happened in one of the worker processes.
          raise KeyboardInterrupt
        else:
          yield MaybeResult.create_result(result.result)
        self.advance(gen)
    finally:
      self.terminate()
    if internal_error:
      raise Exception("Internal error in a worker process.")

  def _advance_more(self, gen):
    while self.count < self.num_workers * self.BUFFER_FACTOR:
      try:
        self.work_queue.put(gen.next())
        self.count += 1
      except StopIteration:
        self.advance = self._advance_empty
        break

  def _advance_empty(self, gen):
    pass

  def add(self, args):
    """Adds an item to the work queue. Can be called dynamically while
    processing the results from imap_unordered."""
    self.work_queue.put(args)
    self.count += 1

  def terminate(self):
    if self.terminated:
      return
    self.terminated = True

    # For exceptional tear down set the "done" event to stop the workers before
    # they empty the queue buffer.
    self.done.set()

    for p in self.processes:
      # During normal tear down the workers block on get(). Feed a poison pill
      # per worker to make them stop.
      self.work_queue.put("STOP")

    for p in self.processes:
      p.join()

    # Drain the queues to prevent failures when queues are garbage collected.
    try:
      while True: self.work_queue.get(False)
    except:
      pass
    try:
      while True: self.done_queue.get(False)
    except:
      pass