/* Copyright (C) 1997 Martin Jones (mjones@kde.org) (C) 1997 Torben Weis (weis@kde.org) (C) 1998 Waldo Bastian (bastian@kde.org) (C) 1999 Lars Knoll (knoll@kde.org) Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef HTMLParser_h #define HTMLParser_h #include "QualifiedName.h" #include <wtf/Forward.h> #include <wtf/RefPtr.h> #include "HTMLParserErrorCodes.h" namespace WebCore { class DoctypeToken; class Document; class DocumentFragment; class HTMLDocument; class HTMLFormElement; class HTMLHeadElement; class HTMLMapElement; class Node; struct HTMLStackElem; struct Token; /** * The parser for HTML. It receives a stream of tokens from the HTMLTokenizer, and * builds up the Document structure from it. */ class HTMLParser : Noncopyable { public: HTMLParser(HTMLDocument*, bool reportErrors); HTMLParser(DocumentFragment*); virtual ~HTMLParser(); /** * parses one token delivered by the tokenizer */ PassRefPtr<Node> parseToken(Token*); // Parses a doctype token. void parseDoctypeToken(DoctypeToken*); /** * tokenizer says it's not going to be sending us any more tokens */ void finished(); /** * resets the parser */ void reset(); bool skipMode() const { return !m_skipModeTag.isNull(); } bool isHandlingResidualStyleAcrossBlocks() const { return m_handlingResidualStyleAcrossBlocks; } private: void setCurrent(Node*); void derefCurrent(); void setSkipMode(const QualifiedName& qName) { m_skipModeTag = qName.localName(); } PassRefPtr<Node> getNode(Token*); bool bodyCreateErrorCheck(Token*, RefPtr<Node>&); bool canvasCreateErrorCheck(Token*, RefPtr<Node>&); bool commentCreateErrorCheck(Token*, RefPtr<Node>&); bool ddCreateErrorCheck(Token*, RefPtr<Node>&); bool dtCreateErrorCheck(Token*, RefPtr<Node>&); bool formCreateErrorCheck(Token*, RefPtr<Node>&); bool framesetCreateErrorCheck(Token*, RefPtr<Node>&); bool headCreateErrorCheck(Token*, RefPtr<Node>&); bool iframeCreateErrorCheck(Token*, RefPtr<Node>&); bool isindexCreateErrorCheck(Token*, RefPtr<Node>&); bool mapCreateErrorCheck(Token*, RefPtr<Node>&); bool nestedCreateErrorCheck(Token*, RefPtr<Node>&); bool nestedPCloserCreateErrorCheck(Token*, RefPtr<Node>&); bool nestedStyleCreateErrorCheck(Token*, RefPtr<Node>&); bool noembedCreateErrorCheck(Token*, RefPtr<Node>&); bool noframesCreateErrorCheck(Token*, RefPtr<Node>&); bool nolayerCreateErrorCheck(Token*, RefPtr<Node>&); bool noscriptCreateErrorCheck(Token*, RefPtr<Node>&); bool pCloserCreateErrorCheck(Token*, RefPtr<Node>&); bool pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&); bool selectCreateErrorCheck(Token*, RefPtr<Node>&); bool tableCellCreateErrorCheck(Token*, RefPtr<Node>&); bool tableSectionCreateErrorCheck(Token*, RefPtr<Node>&); bool textCreateErrorCheck(Token*, RefPtr<Node>&); void processCloseTag(Token*); bool insertNode(Node*, bool flat = false); bool handleError(Node*, bool flat, const AtomicString& localName, int tagPriority); void pushBlock(const AtomicString& tagName, int level); void popBlock(const AtomicString& tagName, bool reportErrors = false); void popBlock(const QualifiedName& qName, bool reportErrors = false) { return popBlock(qName.localName(), reportErrors); } // Convenience function for readability. void popOneBlock(); void moveOneBlockToStack(HTMLStackElem*& head); inline HTMLStackElem* popOneBlockCommon(); void popInlineBlocks(); void freeBlock(); void createHead(); static bool isResidualStyleTag(const AtomicString& tagName); static bool isAffectedByResidualStyle(const AtomicString& tagName); void handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem*); void reopenResidualStyleTags(HTMLStackElem*, Node* malformedTableParent); bool allowNestedRedundantTag(const AtomicString& tagName); static bool isHeaderTag(const AtomicString& tagName); void popNestedHeaderTag(); bool isInline(Node*) const; void startBody(); // inserts the isindex element PassRefPtr<Node> handleIsindex(Token*); void checkIfHasPElementInScope(); bool hasPElementInScope() { if (m_hasPElementInScope == Unknown) checkIfHasPElementInScope(); return m_hasPElementInScope == InScope; } void reportError(HTMLParserErrorCode errorCode, const AtomicString* tagName1 = 0, const AtomicString* tagName2 = 0, bool closeTags = false) { if (!m_reportErrors) return; reportErrorToConsole(errorCode, tagName1, tagName2, closeTags); } void reportErrorToConsole(HTMLParserErrorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags); Document* document; // The currently active element (the one new elements will be added to). Can be a document fragment, a document or an element. Node* current; // We can't ref a document, but we don't want to constantly check if a node is a document just to decide whether to deref. bool didRefCurrent; HTMLStackElem* blockStack; // The number of tags with priority minBlockLevelTagPriority or higher // currently in m_blockStack. The parser enforces a cap on this value by // adding such new elements as siblings instead of children once it is reached. size_t m_blocksInStack; enum ElementInScopeState { NotInScope, InScope, Unknown }; ElementInScopeState m_hasPElementInScope; RefPtr<HTMLFormElement> m_currentFormElement; // currently active form RefPtr<HTMLMapElement> m_currentMapElement; // current map HTMLHeadElement* head; // head element; needed for HTML which defines <base> after </head> RefPtr<Node> m_isindexElement; // a possible <isindex> element in the head bool inBody; bool haveContent; bool haveFrameSet; AtomicString m_skipModeTag; // tells the parser to discard all tags until it reaches the one specified bool m_isParsingFragment; bool m_reportErrors; bool m_handlingResidualStyleAcrossBlocks; int inStrayTableContent; }; } #endif // HTMLParser_h