Java程序  |  264行  |  8.17 KB

/*
 * Copyright (C) 2010 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.streamhtmlparser.util;

import com.google.common.base.Preconditions;

import java.util.Arrays;

/**
 * Implements a circular (ring) buffer of characters with specialized
 * application logic in order to determine the context of some
 * Javascript content that is being parsed.
 *
 * This is a specialized class - of no use to external code -
 * which aims to be 100% compatible with the corresponding logic
 * in the C-version of the HtmlParser, specifically
 * <code>jsparser.c</code>. In particular:
 * <ul>
 *   <li> The API is odd, using negative indexes to access content in
 *        the buffer. Changing the API would mean changing the test
 *        cases and have more difficulty determining whether we are
 *        remaining compatible with the C-version. It is left as an
 *        exercise for once the code is very stable and proven.
 *   <li> Repeated whitespace is folded into just one character to
 *        use the space available efficiently.
 *   <li> The buffer size is fixed. There is currently no need to
 *        make it variable so we avoid the need for constructors.
 * </ul>
 */
public class JavascriptTokenBuffer {

  /**
   * Size of the ring buffer used to lookup the last token in the javascript
   * stream. The size is somewhat arbitrary but must be larger than
   * the biggest token we want to lookup plus three: Two delimiters plus
   * an empty ring buffer slot.
   */
  private static final int BUFFER_SIZE = 18;

  /** Storage implementing the circular buffer. */
  private final char[] buffer;

  /** Index of the first item in our circular buffer. */
  private int startIndex;

  /** Index of the last item in our circular buffer. */
  private int endIndex;

  /**
   * Constructs an empty javascript token buffer. The size is fixed,
   * see {@link #BUFFER_SIZE}.
   */
  public JavascriptTokenBuffer() {
    buffer = new char[BUFFER_SIZE];
    startIndex = 0;
    endIndex = 0;
  }

  /**
   * Constructs a javascript token buffer that is identical to
   * the one given. In particular, it has the same size and contents.
   *
   * @param aJavascriptTokenBuffer the {@code JavascriptTokenBuffer} to copy
   */
  public JavascriptTokenBuffer(JavascriptTokenBuffer aJavascriptTokenBuffer) {
    buffer = Arrays.copyOf(aJavascriptTokenBuffer.buffer,
                           aJavascriptTokenBuffer.buffer.length);
    startIndex = aJavascriptTokenBuffer.startIndex;
    endIndex = aJavascriptTokenBuffer.endIndex;
  }

  /**
   * A simple wrapper over <code>appendChar</code>, it appends a string
   * to the buffer. Sequences of whitespace and newlines
   * are folded into one character to save space. Null strings are
   * not allowed.
   *
   * @param input the {@code String} to append, cannot be {@code null}
   */
  // TODO: Move to testing since not used in code.
  public void appendString(String input) {
    if (input == null) {
      throw new NullPointerException("input == null is not allowed");
    }
    for (int i = 0; i < input.length(); i++) {
      appendChar(input.charAt(i));
    }
  }

  /**
   * Appends a character to the buffer. We fold sequences of whitespace and
   * newlines into one to save space.
   *
   * @param input the {@code char} to append
   */
  public void appendChar(char input) {
    if (HtmlUtils.isJavascriptWhitespace(input) &&
        HtmlUtils.isJavascriptWhitespace(getChar(-1))) {
      return;
    }
    buffer[endIndex] = input;
    endIndex = (endIndex + 1) % buffer.length;
    if (endIndex == startIndex) {
      startIndex = (endIndex + 1) % buffer.length;
    }
  }

  /**
   * Returns the last character in the buffer and removes it from the buffer
   * or the NUL character '\0' if the buffer is empty.
   *
   * @return last character in the buffer or '\0' if the buffer is empty
   */
  public char popChar() {
    if (startIndex == endIndex) {
      return '\0';
    }
    endIndex--;
    if (endIndex < 0) {
      endIndex += buffer.length;
    }
    return buffer[endIndex];
  }

  /**
   * Returns the character at a given index in the buffer or nul ('\0')
   * if the index is outside the range of the buffer. Such could happen
   * if the buffer is not filled enough or the index is larger than the
   * size of the buffer.
   *
   * <p>Position must be negative where -1 is the index of the last
   * character in the buffer.
   *
   * @param position The index into the buffer
   *
   * @return character at the requested index
   */
  public char getChar(int position) {
    assert(position < 0);   // Developer error if it triggers.

    int absolutePosition = getAbsolutePosition(position);
    if (absolutePosition < 0) {
      return '\0';
    }

    return buffer[absolutePosition];
  }

  /**
   * Sets the given {@code input} at the given {@code position} of the buffer.
   * Returns {@code true} if we succeeded or {@code false} if we
   * failed (i.e. the write was beyond the buffer boundary).
   *
   * <p>Index positions are negative where -1 is the index of the
   * last character in the buffer.
   *
   * @param position The index at which to set the character
   * @param input The character to set in the buffer
   * @return {@code true} if we succeeded, {@code false} otherwise
   */
  public boolean setChar(int position, char input) {
    assert(position < 0);   // Developer error if it triggers.

    int absolutePosition = getAbsolutePosition(position);
    if (absolutePosition < 0) {
      return false;
    }

    buffer[absolutePosition] = input;
    return true;
  }


  /**
   * Returns the last javascript identifier/keyword in the buffer.
   *
   * @return the last identifier or {@code null} if none was found
   */
  public String getLastIdentifier() {
    int end = -1;

    if (HtmlUtils.isJavascriptWhitespace(getChar(-1))) {
      end--;
    }
    int position;
    for (position = end; HtmlUtils.isJavascriptIdentifier(getChar(position));
         position--) {
    }
    if ((position + 1) >= end) {
      return null;
    }
    return slice(position + 1, end);
  }

  /**
   * Returns a slice of the buffer delimited by the given indices.
   *
   * The start and end indexes represent the start and end of the
   * slice to copy. If the start argument extends beyond the beginning
   * of the buffer, the slice will only contain characters
   * starting from the beginning of the buffer.
   *
   * @param start The index of the first character the copy
   * @param end the index of the last character to copy
   *
   * @return {@code String} between the given indices
   */
  public String slice(int start, int end) {
    // Developer error if any of the asserts below fail.
    Preconditions.checkArgument(start <= end);
    Preconditions.checkArgument(start < 0);
    Preconditions.checkArgument(end < 0);
    
    StringBuffer output = new StringBuffer();
    for (int position = start; position <= end; position++) {
      char c = getChar(position);
      if (c != '\0') {
        output.append(c);
      }
    }
    return new String(output);
  }

  /**
   * Returns the position relative to the start of the buffer or -1
   * if the position is past the size of the buffer.
   *
   * @param position the index to be translated
   * @return the position relative to the start of the buffer
   */
  private int getAbsolutePosition(int position) {
    assert (position < 0);   // Developer error if it triggers.
    if (position <= -buffer.length) {
      return -1;
    }
    int len = endIndex - startIndex;
    if (len < 0) {
      len += buffer.length;
    }
    if (position < -len) {
      return -1;
    }
    int absolutePosition = (position + endIndex) % buffer.length;
    if (absolutePosition < 0) {
      absolutePosition += buffer.length;
    }
    return absolutePosition;
  }
}