/*
* Copyright (C) 2008 Apple Inc. All Rights Reserved.
* Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
* Copyright (C) 2010 Google, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "HTMLTokenizer.h"
#include "HTMLEntityParser.h"
#include "HTMLToken.h"
#include "HTMLTreeBuilder.h"
#include "HTMLNames.h"
#include "NotImplemented.h"
#include <wtf/ASCIICType.h>
#include <wtf/CurrentTime.h>
#include <wtf/UnusedParam.h>
#include <wtf/text/AtomicString.h>
#include <wtf/text/CString.h>
#include <wtf/unicode/Unicode.h>
using namespace WTF;
namespace WebCore {
using namespace HTMLNames;
const UChar HTMLTokenizer::InputStreamPreprocessor::endOfFileMarker = 0;
namespace {
inline UChar toLowerCase(UChar cc)
{
ASSERT(isASCIIUpper(cc));
const int lowerCaseOffset = 0x20;
return cc + lowerCaseOffset;
}
inline bool isTokenizerWhitespace(UChar cc)
{
return cc == ' ' || cc == '\x0A' || cc == '\x09' || cc == '\x0C';
}
inline void advanceStringAndASSERTIgnoringCase(SegmentedString& source, const char* expectedCharacters)
{
while (*expectedCharacters)
source.advanceAndASSERTIgnoringCase(*expectedCharacters++);
}
inline void advanceStringAndASSERT(SegmentedString& source, const char* expectedCharacters)
{
while (*expectedCharacters)
source.advanceAndASSERT(*expectedCharacters++);
}
inline bool vectorEqualsString(const Vector<UChar, 32>& vector, const String& string)
{
if (vector.size() != string.length())
return false;
const UChar* stringData = string.characters();
const UChar* vectorData = vector.data();
// FIXME: Is there a higher-level function we should be calling here?
return !memcmp(stringData, vectorData, vector.size() * sizeof(UChar));
}
inline bool isEndTagBufferingState(HTMLTokenizer::State state)
{
switch (state) {
case HTMLTokenizer::RCDATAEndTagOpenState:
case HTMLTokenizer::RCDATAEndTagNameState:
case HTMLTokenizer::RAWTEXTEndTagOpenState:
case HTMLTokenizer::RAWTEXTEndTagNameState:
case HTMLTokenizer::ScriptDataEndTagOpenState:
case HTMLTokenizer::ScriptDataEndTagNameState:
case HTMLTokenizer::ScriptDataEscapedEndTagOpenState:
case HTMLTokenizer::ScriptDataEscapedEndTagNameState:
return true;
default:
return false;
}
}
}
HTMLTokenizer::HTMLTokenizer(bool usePreHTML5ParserQuirks)
: m_inputStreamPreprocessor(this)
, m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
{
reset();
}
HTMLTokenizer::~HTMLTokenizer()
{
}
void HTMLTokenizer::reset()
{
m_state = DataState;
m_token = 0;
m_lineNumber = 0;
m_skipLeadingNewLineForListing = false;
m_forceNullCharacterReplacement = false;
m_shouldAllowCDATA = false;
m_additionalAllowedCharacter = '\0';
}
inline bool HTMLTokenizer::processEntity(SegmentedString& source)
{
bool notEnoughCharacters = false;
Vector<UChar, 16> decodedEntity;
bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters);
if (notEnoughCharacters)
return false;
if (!success) {
ASSERT(decodedEntity.isEmpty());
bufferCharacter('&');
} else {
Vector<UChar>::const_iterator iter = decodedEntity.begin();
for (; iter != decodedEntity.end(); ++iter)
bufferCharacter(*iter);
}
return true;
}
#if COMPILER(MSVC)
// We need to disable the "unreachable code" warning because we want to assert
// that some code points aren't reached in the state machine.
#pragma warning(disable: 4702)
#endif
#define BEGIN_STATE(stateName) case stateName: stateName:
#define END_STATE() ASSERT_NOT_REACHED(); break;
// We use this macro when the HTML5 spec says "reconsume the current input
// character in the <mumble> state."
#define RECONSUME_IN(stateName) \
do { \
m_state = stateName; \
goto stateName; \
} while (false)
// We use this macro when the HTML5 spec says "consume the next input
// character ... and switch to the <mumble> state."
#define ADVANCE_TO(stateName) \
do { \
m_state = stateName; \
if (!m_inputStreamPreprocessor.advance(source, m_lineNumber)) \
return haveBufferedCharacterToken(); \
cc = m_inputStreamPreprocessor.nextInputCharacter(); \
goto stateName; \
} while (false)
// Sometimes there's more complicated logic in the spec that separates when
// we consume the next input character and when we switch to a particular
// state. We handle those cases by advancing the source directly and using
// this macro to switch to the indicated state.
#define SWITCH_TO(stateName) \
do { \
m_state = stateName; \
if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) \
return haveBufferedCharacterToken(); \
cc = m_inputStreamPreprocessor.nextInputCharacter(); \
goto stateName; \
} while (false)
inline void HTMLTokenizer::saveEndTagNameIfNeeded()
{
ASSERT(m_token->type() != HTMLToken::Uninitialized);
if (m_token->type() == HTMLToken::StartTag)
m_appropriateEndTagName = m_token->name();
}
// We use this function when the HTML5 spec says "Emit the current <mumble>
// token. Switch to the <mumble> state." We use the word "resume" instead of
// switch to indicate that this macro actually returns and that we'll end up
// in the state when we "resume" (i.e., are called again).
bool HTMLTokenizer::emitAndResumeIn(SegmentedString& source, State state)
{
m_state = state;
source.advance(m_lineNumber);
saveEndTagNameIfNeeded();
return true;
}
// Identical to emitAndResumeIn, except does not advance.
bool HTMLTokenizer::emitAndReconsumeIn(SegmentedString&, State state)
{
m_state = state;
saveEndTagNameIfNeeded();
return true;
}
// Used to emit the EndOfFile token.
// Check if we have buffered characters to emit first before emitting the EOF.
bool HTMLTokenizer::emitEndOfFile(SegmentedString& source)
{
if (haveBufferedCharacterToken())
return true;
m_state = DataState;
source.advance(m_lineNumber);
m_token->clear();
m_token->makeEndOfFile();
return true;
}
bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source)
{
ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized);
source.advance(m_lineNumber);
if (m_token->type() == HTMLToken::Character)
return true;
m_token->beginEndTag(m_bufferedEndTagName);
m_bufferedEndTagName.clear();
return false;
}
#define FLUSH_AND_ADVANCE_TO(stateName) \
do { \
m_state = stateName; \
if (flushBufferedEndTag(source)) \
return true; \
if (source.isEmpty() \
|| !m_inputStreamPreprocessor.peek(source, m_lineNumber)) \
return haveBufferedCharacterToken(); \
cc = m_inputStreamPreprocessor.nextInputCharacter(); \
goto stateName; \
} while (false)
bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, State state)
{
m_state = state;
flushBufferedEndTag(source);
return true;
}
bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
{
// If we have a token in progress, then we're supposed to be called back
// with the same token so we can finish it.
ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized);
m_token = &token;
if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) {
// FIXME: This should call flushBufferedEndTag().
// We started an end tag during our last iteration.
m_token->beginEndTag(m_bufferedEndTagName);
m_bufferedEndTagName.clear();
if (m_state == DataState) {
// We're back in the data state, so we must be done with the tag.
return true;
}
}
if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber))
return haveBufferedCharacterToken();
UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
// Note that this logic is different than the generic \r\n collapsing
// handled in the input stream preprocessor. This logic is here as an
// "authoring convenience" so folks can write:
//
// <pre>
// lorem ipsum
// lorem ipsum
// </pre>
//
// without getting an extra newline at the start of their <pre> element.
if (m_skipLeadingNewLineForListing) {
m_skipLeadingNewLineForListing = false;
if (cc == '\n') {
if (m_state == DataState)
ADVANCE_TO(DataState);
if (m_state == RCDATAState)
ADVANCE_TO(RCDATAState);
// When parsing text/plain documents, we run the tokenizer in the
// PLAINTEXTState and ignore m_skipLeadingNewLineForListing.
ASSERT(m_state == PLAINTEXTState);
}
}
// Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
switch (m_state) {
BEGIN_STATE(DataState) {
if (cc == '&')
ADVANCE_TO(CharacterReferenceInDataState);
else if (cc == '<') {
if (m_token->type() == HTMLToken::Character) {
// We have a bunch of character tokens queued up that we
// are emitting lazily here.
return true;
}
ADVANCE_TO(TagOpenState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker)
return emitEndOfFile(source);
else {
bufferCharacter(cc);
ADVANCE_TO(DataState);
}
}
END_STATE()
BEGIN_STATE(CharacterReferenceInDataState) {
if (!processEntity(source))
return haveBufferedCharacterToken();
SWITCH_TO(DataState);
}
END_STATE()
BEGIN_STATE(RCDATAState) {
if (cc == '&')
ADVANCE_TO(CharacterReferenceInRCDATAState);
else if (cc == '<')
ADVANCE_TO(RCDATALessThanSignState);
else if (cc == InputStreamPreprocessor::endOfFileMarker)
return emitEndOfFile(source);
else {
bufferCharacter(cc);
ADVANCE_TO(RCDATAState);
}
}
END_STATE()
BEGIN_STATE(CharacterReferenceInRCDATAState) {
if (!processEntity(source))
return haveBufferedCharacterToken();
SWITCH_TO(RCDATAState);
}
END_STATE()
BEGIN_STATE(RAWTEXTState) {
if (cc == '<')
ADVANCE_TO(RAWTEXTLessThanSignState);
else if (cc == InputStreamPreprocessor::endOfFileMarker)
return emitEndOfFile(source);
else {
bufferCharacter(cc);
ADVANCE_TO(RAWTEXTState);
}
}
END_STATE()
BEGIN_STATE(ScriptDataState) {
if (cc == '<')
ADVANCE_TO(ScriptDataLessThanSignState);
else if (cc == InputStreamPreprocessor::endOfFileMarker)
return emitEndOfFile(source);
else {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataState);
}
}
END_STATE()
BEGIN_STATE(PLAINTEXTState) {
if (cc == InputStreamPreprocessor::endOfFileMarker)
return emitEndOfFile(source);
else
bufferCharacter(cc);
ADVANCE_TO(PLAINTEXTState);
}
END_STATE()
BEGIN_STATE(TagOpenState) {
if (cc == '!')
ADVANCE_TO(MarkupDeclarationOpenState);
else if (cc == '/')
ADVANCE_TO(EndTagOpenState);
else if (isASCIIUpper(cc)) {
m_token->beginStartTag(toLowerCase(cc));
ADVANCE_TO(TagNameState);
} else if (isASCIILower(cc)) {
m_token->beginStartTag(cc);
ADVANCE_TO(TagNameState);
} else if (cc == '?') {
parseError();
// The spec consumes the current character before switching
// to the bogus comment state, but it's easier to implement
// if we reconsume the current character.
RECONSUME_IN(BogusCommentState);
} else {
parseError();
bufferCharacter('<');
RECONSUME_IN(DataState);
}
}
END_STATE()
BEGIN_STATE(EndTagOpenState) {
if (isASCIIUpper(cc)) {
m_token->beginEndTag(toLowerCase(cc));
ADVANCE_TO(TagNameState);
} else if (isASCIILower(cc)) {
m_token->beginEndTag(cc);
ADVANCE_TO(TagNameState);
} else if (cc == '>') {
parseError();
ADVANCE_TO(DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
bufferCharacter('<');
bufferCharacter('/');
RECONSUME_IN(DataState);
} else {
parseError();
RECONSUME_IN(BogusCommentState);
}
}
END_STATE()
BEGIN_STATE(TagNameState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BeforeAttributeNameState);
else if (cc == '/')
ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
else if (m_usePreHTML5ParserQuirks && cc == '<')
return emitAndReconsumeIn(source, DataState);
else if (isASCIIUpper(cc)) {
m_token->appendToName(toLowerCase(cc));
ADVANCE_TO(TagNameState);
} if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
RECONSUME_IN(DataState);
} else {
m_token->appendToName(cc);
ADVANCE_TO(TagNameState);
}
}
END_STATE()
BEGIN_STATE(RCDATALessThanSignState) {
if (cc == '/') {
m_temporaryBuffer.clear();
ASSERT(m_bufferedEndTagName.isEmpty());
ADVANCE_TO(RCDATAEndTagOpenState);
} else {
bufferCharacter('<');
RECONSUME_IN(RCDATAState);
}
}
END_STATE()
BEGIN_STATE(RCDATAEndTagOpenState) {
if (isASCIIUpper(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(toLowerCase(cc));
ADVANCE_TO(RCDATAEndTagNameState);
} else if (isASCIILower(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(cc);
ADVANCE_TO(RCDATAEndTagNameState);
} else {
bufferCharacter('<');
bufferCharacter('/');
RECONSUME_IN(RCDATAState);
}
}
END_STATE()
BEGIN_STATE(RCDATAEndTagNameState) {
if (isASCIIUpper(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(toLowerCase(cc));
ADVANCE_TO(RCDATAEndTagNameState);
} else if (isASCIILower(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(cc);
ADVANCE_TO(RCDATAEndTagNameState);
} else {
if (isTokenizerWhitespace(cc)) {
if (isAppropriateEndTag())
FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
} else if (cc == '/') {
if (isAppropriateEndTag())
FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
} else if (cc == '>') {
if (isAppropriateEndTag())
return flushEmitAndResumeIn(source, DataState);
}
bufferCharacter('<');
bufferCharacter('/');
m_token->appendToCharacter(m_temporaryBuffer);
m_bufferedEndTagName.clear();
RECONSUME_IN(RCDATAState);
}
}
END_STATE()
BEGIN_STATE(RAWTEXTLessThanSignState) {
if (cc == '/') {
m_temporaryBuffer.clear();
ASSERT(m_bufferedEndTagName.isEmpty());
ADVANCE_TO(RAWTEXTEndTagOpenState);
} else {
bufferCharacter('<');
RECONSUME_IN(RAWTEXTState);
}
}
END_STATE()
BEGIN_STATE(RAWTEXTEndTagOpenState) {
if (isASCIIUpper(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(toLowerCase(cc));
ADVANCE_TO(RAWTEXTEndTagNameState);
} else if (isASCIILower(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(cc);
ADVANCE_TO(RAWTEXTEndTagNameState);
} else {
bufferCharacter('<');
bufferCharacter('/');
RECONSUME_IN(RAWTEXTState);
}
}
END_STATE()
BEGIN_STATE(RAWTEXTEndTagNameState) {
if (isASCIIUpper(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(toLowerCase(cc));
ADVANCE_TO(RAWTEXTEndTagNameState);
} else if (isASCIILower(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(cc);
ADVANCE_TO(RAWTEXTEndTagNameState);
} else {
if (isTokenizerWhitespace(cc)) {
if (isAppropriateEndTag())
FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
} else if (cc == '/') {
if (isAppropriateEndTag())
FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
} else if (cc == '>') {
if (isAppropriateEndTag())
return flushEmitAndResumeIn(source, DataState);
}
bufferCharacter('<');
bufferCharacter('/');
m_token->appendToCharacter(m_temporaryBuffer);
m_bufferedEndTagName.clear();
RECONSUME_IN(RAWTEXTState);
}
}
END_STATE()
BEGIN_STATE(ScriptDataLessThanSignState) {
if (cc == '/') {
m_temporaryBuffer.clear();
ASSERT(m_bufferedEndTagName.isEmpty());
ADVANCE_TO(ScriptDataEndTagOpenState);
} else if (cc == '!') {
bufferCharacter('<');
bufferCharacter('!');
ADVANCE_TO(ScriptDataEscapeStartState);
} else {
bufferCharacter('<');
RECONSUME_IN(ScriptDataState);
}
}
END_STATE()
BEGIN_STATE(ScriptDataEndTagOpenState) {
if (isASCIIUpper(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(toLowerCase(cc));
ADVANCE_TO(ScriptDataEndTagNameState);
} else if (isASCIILower(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(cc);
ADVANCE_TO(ScriptDataEndTagNameState);
} else {
bufferCharacter('<');
bufferCharacter('/');
RECONSUME_IN(ScriptDataState);
}
}
END_STATE()
BEGIN_STATE(ScriptDataEndTagNameState) {
if (isASCIIUpper(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(toLowerCase(cc));
ADVANCE_TO(ScriptDataEndTagNameState);
} else if (isASCIILower(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(cc);
ADVANCE_TO(ScriptDataEndTagNameState);
} else {
if (isTokenizerWhitespace(cc)) {
if (isAppropriateEndTag())
FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
} else if (cc == '/') {
if (isAppropriateEndTag())
FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
} else if (cc == '>') {
if (isAppropriateEndTag())
return flushEmitAndResumeIn(source, DataState);
}
bufferCharacter('<');
bufferCharacter('/');
m_token->appendToCharacter(m_temporaryBuffer);
m_bufferedEndTagName.clear();
RECONSUME_IN(ScriptDataState);
}
}
END_STATE()
BEGIN_STATE(ScriptDataEscapeStartState) {
if (cc == '-') {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataEscapeStartDashState);
} else
RECONSUME_IN(ScriptDataState);
}
END_STATE()
BEGIN_STATE(ScriptDataEscapeStartDashState) {
if (cc == '-') {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataEscapedDashDashState);
} else
RECONSUME_IN(ScriptDataState);
}
END_STATE()
BEGIN_STATE(ScriptDataEscapedState) {
if (cc == '-') {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataEscapedDashState);
} else if (cc == '<')
ADVANCE_TO(ScriptDataEscapedLessThanSignState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
RECONSUME_IN(DataState);
} else {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataEscapedState);
}
}
END_STATE()
BEGIN_STATE(ScriptDataEscapedDashState) {
if (cc == '-') {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataEscapedDashDashState);
} else if (cc == '<')
ADVANCE_TO(ScriptDataEscapedLessThanSignState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
RECONSUME_IN(DataState);
} else {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataEscapedState);
}
}
END_STATE()
BEGIN_STATE(ScriptDataEscapedDashDashState) {
if (cc == '-') {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataEscapedDashDashState);
} else if (cc == '<')
ADVANCE_TO(ScriptDataEscapedLessThanSignState);
else if (cc == '>') {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataState);
} if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
RECONSUME_IN(DataState);
} else {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataEscapedState);
}
}
END_STATE()
BEGIN_STATE(ScriptDataEscapedLessThanSignState) {
if (cc == '/') {
m_temporaryBuffer.clear();
ASSERT(m_bufferedEndTagName.isEmpty());
ADVANCE_TO(ScriptDataEscapedEndTagOpenState);
} else if (isASCIIUpper(cc)) {
bufferCharacter('<');
bufferCharacter(cc);
m_temporaryBuffer.clear();
m_temporaryBuffer.append(toLowerCase(cc));
ADVANCE_TO(ScriptDataDoubleEscapeStartState);
} else if (isASCIILower(cc)) {
bufferCharacter('<');
bufferCharacter(cc);
m_temporaryBuffer.clear();
m_temporaryBuffer.append(cc);
ADVANCE_TO(ScriptDataDoubleEscapeStartState);
} else {
bufferCharacter('<');
RECONSUME_IN(ScriptDataEscapedState);
}
}
END_STATE()
BEGIN_STATE(ScriptDataEscapedEndTagOpenState) {
if (isASCIIUpper(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(toLowerCase(cc));
ADVANCE_TO(ScriptDataEscapedEndTagNameState);
} else if (isASCIILower(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(cc);
ADVANCE_TO(ScriptDataEscapedEndTagNameState);
} else {
bufferCharacter('<');
bufferCharacter('/');
RECONSUME_IN(ScriptDataEscapedState);
}
}
END_STATE()
BEGIN_STATE(ScriptDataEscapedEndTagNameState) {
if (isASCIIUpper(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(toLowerCase(cc));
ADVANCE_TO(ScriptDataEscapedEndTagNameState);
} else if (isASCIILower(cc)) {
m_temporaryBuffer.append(cc);
addToPossibleEndTag(cc);
ADVANCE_TO(ScriptDataEscapedEndTagNameState);
} else {
if (isTokenizerWhitespace(cc)) {
if (isAppropriateEndTag())
FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
} else if (cc == '/') {
if (isAppropriateEndTag())
FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
} else if (cc == '>') {
if (isAppropriateEndTag())
return flushEmitAndResumeIn(source, DataState);
}
bufferCharacter('<');
bufferCharacter('/');
m_token->appendToCharacter(m_temporaryBuffer);
m_bufferedEndTagName.clear();
RECONSUME_IN(ScriptDataEscapedState);
}
}
END_STATE()
BEGIN_STATE(ScriptDataDoubleEscapeStartState) {
if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
bufferCharacter(cc);
if (temporaryBufferIs(scriptTag.localName()))
ADVANCE_TO(ScriptDataDoubleEscapedState);
else
ADVANCE_TO(ScriptDataEscapedState);
} else if (isASCIIUpper(cc)) {
bufferCharacter(cc);
m_temporaryBuffer.append(toLowerCase(cc));
ADVANCE_TO(ScriptDataDoubleEscapeStartState);
} else if (isASCIILower(cc)) {
bufferCharacter(cc);
m_temporaryBuffer.append(cc);
ADVANCE_TO(ScriptDataDoubleEscapeStartState);
} else
RECONSUME_IN(ScriptDataEscapedState);
}
END_STATE()
BEGIN_STATE(ScriptDataDoubleEscapedState) {
if (cc == '-') {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataDoubleEscapedDashState);
} else if (cc == '<') {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
RECONSUME_IN(DataState);
} else {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataDoubleEscapedState);
}
}
END_STATE()
BEGIN_STATE(ScriptDataDoubleEscapedDashState) {
if (cc == '-') {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
} else if (cc == '<') {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
RECONSUME_IN(DataState);
} else {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataDoubleEscapedState);
}
}
END_STATE()
BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) {
if (cc == '-') {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
} else if (cc == '<') {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
} else if (cc == '>') {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
RECONSUME_IN(DataState);
} else {
bufferCharacter(cc);
ADVANCE_TO(ScriptDataDoubleEscapedState);
}
}
END_STATE()
BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) {
if (cc == '/') {
bufferCharacter(cc);
m_temporaryBuffer.clear();
ADVANCE_TO(ScriptDataDoubleEscapeEndState);
} else
RECONSUME_IN(ScriptDataDoubleEscapedState);
}
END_STATE()
BEGIN_STATE(ScriptDataDoubleEscapeEndState) {
if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
bufferCharacter(cc);
if (temporaryBufferIs(scriptTag.localName()))
ADVANCE_TO(ScriptDataEscapedState);
else
ADVANCE_TO(ScriptDataDoubleEscapedState);
} else if (isASCIIUpper(cc)) {
bufferCharacter(cc);
m_temporaryBuffer.append(toLowerCase(cc));
ADVANCE_TO(ScriptDataDoubleEscapeEndState);
} else if (isASCIILower(cc)) {
bufferCharacter(cc);
m_temporaryBuffer.append(cc);
ADVANCE_TO(ScriptDataDoubleEscapeEndState);
} else
RECONSUME_IN(ScriptDataDoubleEscapedState);
}
END_STATE()
BEGIN_STATE(BeforeAttributeNameState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BeforeAttributeNameState);
else if (cc == '/')
ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
else if (m_usePreHTML5ParserQuirks && cc == '<')
return emitAndReconsumeIn(source, DataState);
else if (isASCIIUpper(cc)) {
m_token->addNewAttribute();
m_token->beginAttributeName(source.numberOfCharactersConsumed());
m_token->appendToAttributeName(toLowerCase(cc));
ADVANCE_TO(AttributeNameState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
RECONSUME_IN(DataState);
} else {
if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
parseError();
m_token->addNewAttribute();
m_token->beginAttributeName(source.numberOfCharactersConsumed());
m_token->appendToAttributeName(cc);
ADVANCE_TO(AttributeNameState);
}
}
END_STATE()
BEGIN_STATE(AttributeNameState) {
if (isTokenizerWhitespace(cc)) {
m_token->endAttributeName(source.numberOfCharactersConsumed());
ADVANCE_TO(AfterAttributeNameState);
} else if (cc == '/') {
m_token->endAttributeName(source.numberOfCharactersConsumed());
ADVANCE_TO(SelfClosingStartTagState);
} else if (cc == '=') {
m_token->endAttributeName(source.numberOfCharactersConsumed());
ADVANCE_TO(BeforeAttributeValueState);
} else if (cc == '>') {
m_token->endAttributeName(source.numberOfCharactersConsumed());
return emitAndResumeIn(source, DataState);
} else if (m_usePreHTML5ParserQuirks && cc == '<') {
m_token->endAttributeName(source.numberOfCharactersConsumed());
return emitAndReconsumeIn(source, DataState);
} else if (isASCIIUpper(cc)) {
m_token->appendToAttributeName(toLowerCase(cc));
ADVANCE_TO(AttributeNameState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->endAttributeName(source.numberOfCharactersConsumed());
RECONSUME_IN(DataState);
} else {
if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
parseError();
m_token->appendToAttributeName(cc);
ADVANCE_TO(AttributeNameState);
}
}
END_STATE()
BEGIN_STATE(AfterAttributeNameState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(AfterAttributeNameState);
else if (cc == '/')
ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '=')
ADVANCE_TO(BeforeAttributeValueState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
else if (m_usePreHTML5ParserQuirks && cc == '<')
return emitAndReconsumeIn(source, DataState);
else if (isASCIIUpper(cc)) {
m_token->addNewAttribute();
m_token->beginAttributeName(source.numberOfCharactersConsumed());
m_token->appendToAttributeName(toLowerCase(cc));
ADVANCE_TO(AttributeNameState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
RECONSUME_IN(DataState);
} else {
if (cc == '"' || cc == '\'' || cc == '<')
parseError();
m_token->addNewAttribute();
m_token->beginAttributeName(source.numberOfCharactersConsumed());
m_token->appendToAttributeName(cc);
ADVANCE_TO(AttributeNameState);
}
}
END_STATE()
BEGIN_STATE(BeforeAttributeValueState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BeforeAttributeValueState);
else if (cc == '"') {
m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
ADVANCE_TO(AttributeValueDoubleQuotedState);
} else if (cc == '&') {
m_token->beginAttributeValue(source.numberOfCharactersConsumed());
RECONSUME_IN(AttributeValueUnquotedState);
} else if (cc == '\'') {
m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
ADVANCE_TO(AttributeValueSingleQuotedState);
} else if (cc == '>') {
parseError();
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
RECONSUME_IN(DataState);
} else {
if (cc == '<' || cc == '=' || cc == '`')
parseError();
m_token->beginAttributeValue(source.numberOfCharactersConsumed());
m_token->appendToAttributeValue(cc);
ADVANCE_TO(AttributeValueUnquotedState);
}
}
END_STATE()
BEGIN_STATE(AttributeValueDoubleQuotedState) {
if (cc == '"') {
m_token->endAttributeValue(source.numberOfCharactersConsumed());
ADVANCE_TO(AfterAttributeValueQuotedState);
} else if (cc == '&') {
m_additionalAllowedCharacter = '"';
ADVANCE_TO(CharacterReferenceInAttributeValueState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->endAttributeValue(source.numberOfCharactersConsumed());
RECONSUME_IN(DataState);
} else {
m_token->appendToAttributeValue(cc);
ADVANCE_TO(AttributeValueDoubleQuotedState);
}
}
END_STATE()
BEGIN_STATE(AttributeValueSingleQuotedState) {
if (cc == '\'') {
m_token->endAttributeValue(source.numberOfCharactersConsumed());
ADVANCE_TO(AfterAttributeValueQuotedState);
} else if (cc == '&') {
m_additionalAllowedCharacter = '\'';
ADVANCE_TO(CharacterReferenceInAttributeValueState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->endAttributeValue(source.numberOfCharactersConsumed());
RECONSUME_IN(DataState);
} else {
m_token->appendToAttributeValue(cc);
ADVANCE_TO(AttributeValueSingleQuotedState);
}
}
END_STATE()
BEGIN_STATE(AttributeValueUnquotedState) {
if (isTokenizerWhitespace(cc)) {
m_token->endAttributeValue(source.numberOfCharactersConsumed());
ADVANCE_TO(BeforeAttributeNameState);
} else if (cc == '&') {
m_additionalAllowedCharacter = '>';
ADVANCE_TO(CharacterReferenceInAttributeValueState);
} else if (cc == '>') {
m_token->endAttributeValue(source.numberOfCharactersConsumed());
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->endAttributeValue(source.numberOfCharactersConsumed());
RECONSUME_IN(DataState);
} else {
if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
parseError();
m_token->appendToAttributeValue(cc);
ADVANCE_TO(AttributeValueUnquotedState);
}
}
END_STATE()
BEGIN_STATE(CharacterReferenceInAttributeValueState) {
bool notEnoughCharacters = false;
Vector<UChar, 16> decodedEntity;
bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter);
if (notEnoughCharacters)
return haveBufferedCharacterToken();
if (!success) {
ASSERT(decodedEntity.isEmpty());
m_token->appendToAttributeValue('&');
} else {
Vector<UChar>::const_iterator iter = decodedEntity.begin();
for (; iter != decodedEntity.end(); ++iter)
m_token->appendToAttributeValue(*iter);
}
// We're supposed to switch back to the attribute value state that
// we were in when we were switched into this state. Rather than
// keeping track of this explictly, we observe that the previous
// state can be determined by m_additionalAllowedCharacter.
if (m_additionalAllowedCharacter == '"')
SWITCH_TO(AttributeValueDoubleQuotedState);
else if (m_additionalAllowedCharacter == '\'')
SWITCH_TO(AttributeValueSingleQuotedState);
else if (m_additionalAllowedCharacter == '>')
SWITCH_TO(AttributeValueUnquotedState);
else
ASSERT_NOT_REACHED();
}
END_STATE()
BEGIN_STATE(AfterAttributeValueQuotedState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BeforeAttributeNameState);
else if (cc == '/')
ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
else if (m_usePreHTML5ParserQuirks && cc == '<')
return emitAndReconsumeIn(source, DataState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
RECONSUME_IN(DataState);
} else {
parseError();
RECONSUME_IN(BeforeAttributeNameState);
}
}
END_STATE()
BEGIN_STATE(SelfClosingStartTagState) {
if (cc == '>') {
m_token->setSelfClosing();
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
RECONSUME_IN(DataState);
} else {
parseError();
RECONSUME_IN(BeforeAttributeNameState);
}
}
END_STATE()
BEGIN_STATE(BogusCommentState) {
m_token->beginComment();
RECONSUME_IN(ContinueBogusCommentState);
}
END_STATE()
BEGIN_STATE(ContinueBogusCommentState) {
if (cc == '>')
return emitAndResumeIn(source, DataState);
else if (cc == InputStreamPreprocessor::endOfFileMarker)
return emitAndReconsumeIn(source, DataState);
else {
m_token->appendToComment(cc);
ADVANCE_TO(ContinueBogusCommentState);
}
}
END_STATE()
BEGIN_STATE(MarkupDeclarationOpenState) {
DEFINE_STATIC_LOCAL(String, dashDashString, ("--"));
DEFINE_STATIC_LOCAL(String, doctypeString, ("doctype"));
DEFINE_STATIC_LOCAL(String, cdataString, ("[CDATA["));
if (cc == '-') {
SegmentedString::LookAheadResult result = source.lookAhead(dashDashString);
if (result == SegmentedString::DidMatch) {
source.advanceAndASSERT('-');
source.advanceAndASSERT('-');
m_token->beginComment();
SWITCH_TO(CommentStartState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
} else if (cc == 'D' || cc == 'd') {
SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERTIgnoringCase(source, "doctype");
SWITCH_TO(DOCTYPEState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
} else if (cc == '[' && shouldAllowCDATA()) {
SegmentedString::LookAheadResult result = source.lookAhead(cdataString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERT(source, "[CDATA[");
SWITCH_TO(CDATASectionState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
}
parseError();
RECONSUME_IN(BogusCommentState);
}
END_STATE()
BEGIN_STATE(CommentStartState) {
if (cc == '-')
ADVANCE_TO(CommentStartDashState);
else if (cc == '>') {
parseError();
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, DataState);
} else {
m_token->appendToComment(cc);
ADVANCE_TO(CommentState);
}
}
END_STATE()
BEGIN_STATE(CommentStartDashState) {
if (cc == '-')
ADVANCE_TO(CommentEndState);
else if (cc == '>') {
parseError();
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, DataState);
} else {
m_token->appendToComment('-');
m_token->appendToComment(cc);
ADVANCE_TO(CommentState);
}
}
END_STATE()
BEGIN_STATE(CommentState) {
if (cc == '-')
ADVANCE_TO(CommentEndDashState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, DataState);
} else {
m_token->appendToComment(cc);
ADVANCE_TO(CommentState);
}
}
END_STATE()
BEGIN_STATE(CommentEndDashState) {
if (cc == '-')
ADVANCE_TO(CommentEndState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, DataState);
} else {
m_token->appendToComment('-');
m_token->appendToComment(cc);
ADVANCE_TO(CommentState);
}
}
END_STATE()
BEGIN_STATE(CommentEndState) {
if (cc == '>')
return emitAndResumeIn(source, DataState);
else if (cc == '!') {
parseError();
ADVANCE_TO(CommentEndBangState);
} else if (cc == '-') {
parseError();
m_token->appendToComment('-');
ADVANCE_TO(CommentEndState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, DataState);
} else {
parseError();
m_token->appendToComment('-');
m_token->appendToComment('-');
m_token->appendToComment(cc);
ADVANCE_TO(CommentState);
}
}
END_STATE()
BEGIN_STATE(CommentEndBangState) {
if (cc == '-') {
m_token->appendToComment('-');
m_token->appendToComment('-');
m_token->appendToComment('!');
ADVANCE_TO(CommentEndDashState);
} else if (cc == '>')
return emitAndResumeIn(source, DataState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, DataState);
} else {
m_token->appendToComment('-');
m_token->appendToComment('-');
m_token->appendToComment('!');
m_token->appendToComment(cc);
ADVANCE_TO(CommentState);
}
}
END_STATE()
BEGIN_STATE(DOCTYPEState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BeforeDOCTYPENameState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->beginDOCTYPE();
m_token->setForceQuirks();
return emitAndReconsumeIn(source, DataState);
} else {
parseError();
RECONSUME_IN(BeforeDOCTYPENameState);
}
}
END_STATE()
BEGIN_STATE(BeforeDOCTYPENameState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BeforeDOCTYPENameState);
else if (isASCIIUpper(cc)) {
m_token->beginDOCTYPE(toLowerCase(cc));
ADVANCE_TO(DOCTYPENameState);
} else if (cc == '>') {
parseError();
m_token->beginDOCTYPE();
m_token->setForceQuirks();
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->beginDOCTYPE();
m_token->setForceQuirks();
return emitAndReconsumeIn(source, DataState);
} else {
m_token->beginDOCTYPE(cc);
ADVANCE_TO(DOCTYPENameState);
}
}
END_STATE()
BEGIN_STATE(DOCTYPENameState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(AfterDOCTYPENameState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
else if (isASCIIUpper(cc)) {
m_token->appendToName(toLowerCase(cc));
ADVANCE_TO(DOCTYPENameState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->setForceQuirks();
return emitAndReconsumeIn(source, DataState);
} else {
m_token->appendToName(cc);
ADVANCE_TO(DOCTYPENameState);
}
}
END_STATE()
BEGIN_STATE(AfterDOCTYPENameState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(AfterDOCTYPENameState);
if (cc == '>')
return emitAndResumeIn(source, DataState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->setForceQuirks();
return emitAndReconsumeIn(source, DataState);
} else {
DEFINE_STATIC_LOCAL(String, publicString, ("public"));
DEFINE_STATIC_LOCAL(String, systemString, ("system"));
if (cc == 'P' || cc == 'p') {
SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERTIgnoringCase(source, "public");
SWITCH_TO(AfterDOCTYPEPublicKeywordState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
} else if (cc == 'S' || cc == 's') {
SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERTIgnoringCase(source, "system");
SWITCH_TO(AfterDOCTYPESystemKeywordState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
}
parseError();
m_token->setForceQuirks();
ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
else if (cc == '"') {
parseError();
m_token->setPublicIdentifierToEmptyString();
ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
} else if (cc == '\'') {
parseError();
m_token->setPublicIdentifierToEmptyString();
ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
} else if (cc == '>') {
parseError();
m_token->setForceQuirks();
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->setForceQuirks();
return emitAndReconsumeIn(source, DataState);
} else {
parseError();
m_token->setForceQuirks();
ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
else if (cc == '"') {
m_token->setPublicIdentifierToEmptyString();
ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
} else if (cc == '\'') {
m_token->setPublicIdentifierToEmptyString();
ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
} else if (cc == '>') {
parseError();
m_token->setForceQuirks();
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->setForceQuirks();
return emitAndReconsumeIn(source, DataState);
} else {
parseError();
m_token->setForceQuirks();
ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
if (cc == '"')
ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
else if (cc == '>') {
parseError();
m_token->setForceQuirks();
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->setForceQuirks();
return emitAndReconsumeIn(source, DataState);
} else {
m_token->appendToPublicIdentifier(cc);
ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
}
}
END_STATE()
BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
if (cc == '\'')
ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
else if (cc == '>') {
parseError();
m_token->setForceQuirks();
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->setForceQuirks();
return emitAndReconsumeIn(source, DataState);
} else {
m_token->appendToPublicIdentifier(cc);
ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
}
}
END_STATE()
BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
else if (cc == '"') {
parseError();
m_token->setSystemIdentifierToEmptyString();
ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
} else if (cc == '\'') {
parseError();
m_token->setSystemIdentifierToEmptyString();
ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->setForceQuirks();
return emitAndReconsumeIn(source, DataState);
} else {
parseError();
m_token->setForceQuirks();
ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
else if (cc == '"') {
m_token->setSystemIdentifierToEmptyString();
ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
} else if (cc == '\'') {
m_token->setSystemIdentifierToEmptyString();
ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->setForceQuirks();
return emitAndReconsumeIn(source, DataState);
} else {
parseError();
m_token->setForceQuirks();
ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
else if (cc == '"') {
parseError();
m_token->setSystemIdentifierToEmptyString();
ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
} else if (cc == '\'') {
parseError();
m_token->setSystemIdentifierToEmptyString();
ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
} else if (cc == '>') {
parseError();
m_token->setForceQuirks();
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->setForceQuirks();
return emitAndReconsumeIn(source, DataState);
} else {
parseError();
m_token->setForceQuirks();
ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
if (cc == '"') {
m_token->setSystemIdentifierToEmptyString();
ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
} else if (cc == '\'') {
m_token->setSystemIdentifierToEmptyString();
ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
} else if (cc == '>') {
parseError();
m_token->setForceQuirks();
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->setForceQuirks();
return emitAndReconsumeIn(source, DataState);
} else {
parseError();
m_token->setForceQuirks();
ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
if (cc == '"')
ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
else if (cc == '>') {
parseError();
m_token->setForceQuirks();
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->setForceQuirks();
return emitAndReconsumeIn(source, DataState);
} else {
m_token->appendToSystemIdentifier(cc);
ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
}
}
END_STATE()
BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
if (cc == '\'')
ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
else if (cc == '>') {
parseError();
m_token->setForceQuirks();
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->setForceQuirks();
return emitAndReconsumeIn(source, DataState);
} else {
m_token->appendToSystemIdentifier(cc);
ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
}
}
END_STATE()
BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->setForceQuirks();
return emitAndReconsumeIn(source, DataState);
} else {
parseError();
ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
BEGIN_STATE(BogusDOCTYPEState) {
if (cc == '>')
return emitAndResumeIn(source, DataState);
else if (cc == InputStreamPreprocessor::endOfFileMarker)
return emitAndReconsumeIn(source, DataState);
ADVANCE_TO(BogusDOCTYPEState);
}
END_STATE()
BEGIN_STATE(CDATASectionState) {
if (cc == ']')
ADVANCE_TO(CDATASectionRightSquareBracketState);
else if (cc == InputStreamPreprocessor::endOfFileMarker)
RECONSUME_IN(DataState);
else {
bufferCharacter(cc);
ADVANCE_TO(CDATASectionState);
}
}
END_STATE()
BEGIN_STATE(CDATASectionRightSquareBracketState) {
if (cc == ']')
ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
else {
bufferCharacter(']');
RECONSUME_IN(CDATASectionState);
}
}
BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
if (cc == '>')
ADVANCE_TO(DataState);
else {
bufferCharacter(']');
bufferCharacter(']');
RECONSUME_IN(CDATASectionState);
}
}
END_STATE()
}
ASSERT_NOT_REACHED();
return false;
}
void HTMLTokenizer::updateStateFor(const AtomicString& tagName, Frame* frame)
{
if (tagName == textareaTag || tagName == titleTag)
setState(RCDATAState);
else if (tagName == plaintextTag)
setState(PLAINTEXTState);
else if (tagName == scriptTag)
setState(ScriptDataState);
else if (tagName == styleTag
|| tagName == iframeTag
|| tagName == xmpTag
|| (tagName == noembedTag && HTMLTreeBuilder::pluginsEnabled(frame))
|| tagName == noframesTag
|| (tagName == noscriptTag && HTMLTreeBuilder::scriptEnabled(frame)))
setState(RAWTEXTState);
}
inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)
{
return vectorEqualsString(m_temporaryBuffer, expectedString);
}
inline void HTMLTokenizer::addToPossibleEndTag(UChar cc)
{
ASSERT(isEndTagBufferingState(m_state));
m_bufferedEndTagName.append(cc);
}
inline bool HTMLTokenizer::isAppropriateEndTag()
{
return m_bufferedEndTagName == m_appropriateEndTagName;
}
inline void HTMLTokenizer::bufferCharacter(UChar character)
{
ASSERT(character != InputStreamPreprocessor::endOfFileMarker);
m_token->ensureIsCharacterToken();
m_token->appendToCharacter(character);
}
inline void HTMLTokenizer::parseError()
{
notImplemented();
}
inline bool HTMLTokenizer::haveBufferedCharacterToken()
{
return m_token->type() == HTMLToken::Character;
}
}