# -*- coding: utf-8 -*- # markdown is released under the BSD license # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later) # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) # Copyright 2004 Manfred Stienstra (the original version) # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the <organization> nor the # names of its contributors may be used to endorse or promote products # derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. from __future__ import unicode_literals import re import sys """ Python 3 Stuff ============================================================================= """ PY3 = sys.version_info[0] == 3 if PY3: string_type = str text_type = str int2str = chr else: string_type = basestring text_type = unicode int2str = unichr """ Constants you might want to modify ----------------------------------------------------------------------------- """ BLOCK_LEVEL_ELEMENTS = re.compile("^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" "|script|noscript|form|fieldset|iframe|math" "|hr|hr/|style|li|dt|dd|thead|tbody" "|tr|th|td|section|footer|header|group|figure" "|figcaption|aside|article|canvas|output" "|progress|video)$", re.IGNORECASE) # Placeholders STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})') AMP_SUBSTITUTE = STX+"amp"+ETX """ Constants you probably do not need to change ----------------------------------------------------------------------------- """ RTL_BIDI_RANGES = ( ('\u0590', '\u07FF'), # Hebrew (0590-05FF), Arabic (0600-06FF), # Syriac (0700-074F), Arabic supplement (0750-077F), # Thaana (0780-07BF), Nko (07C0-07FF). ('\u2D30', '\u2D7F'), # Tifinagh ) # Extensions should use "markdown.util.etree" instead of "etree" (or do `from # markdown.util import etree`). Do not import it by yourself. try: # Is the C implemenation of ElementTree available? import xml.etree.cElementTree as etree from xml.etree.ElementTree import Comment # Serializers (including ours) test with non-c Comment etree.test_comment = Comment if etree.VERSION < "1.0.5": raise RuntimeError("cElementTree version 1.0.5 or higher is required.") except (ImportError, RuntimeError): # Use the Python implementation of ElementTree? import xml.etree.ElementTree as etree if etree.VERSION < "1.1": raise RuntimeError("ElementTree version 1.1 or higher is required") """ AUXILIARY GLOBAL FUNCTIONS ============================================================================= """ def isBlockLevel(tag): """Check if the tag is a block level HTML tag.""" if isinstance(tag, string_type): return BLOCK_LEVEL_ELEMENTS.match(tag) # Some ElementTree tags are not strings, so return False. return False """ MISC AUXILIARY CLASSES ============================================================================= """ class AtomicString(text_type): """A string which should not be further processed.""" pass class Processor(object): def __init__(self, markdown_instance=None): if markdown_instance: self.markdown = markdown_instance class HtmlStash(object): """ This class is used for stashing HTML objects that we extract in the beginning and replace with place-holders. """ def __init__ (self): """ Create a HtmlStash. """ self.html_counter = 0 # for counting inline html segments self.rawHtmlBlocks=[] def store(self, html, safe=False): """ Saves an HTML segment for later reinsertion. Returns a placeholder string that needs to be inserted into the document. Keyword arguments: * html: an html segment * safe: label an html segment as safe for safemode Returns : a placeholder string """ self.rawHtmlBlocks.append((html, safe)) placeholder = self.get_placeholder(self.html_counter) self.html_counter += 1 return placeholder def reset(self): self.html_counter = 0 self.rawHtmlBlocks = [] def get_placeholder(self, key): return "%swzxhzdk:%d%s" % (STX, key, ETX)