import formatter import unittest from test import test_support htmllib = test_support.import_module('htmllib', deprecated=True) class AnchorCollector(htmllib.HTMLParser): def __init__(self, *args, **kw): self.__anchors = [] htmllib.HTMLParser.__init__(self, *args, **kw) def get_anchor_info(self): return self.__anchors def anchor_bgn(self, *args): self.__anchors.append(args) class DeclCollector(htmllib.HTMLParser): def __init__(self, *args, **kw): self.__decls = [] htmllib.HTMLParser.__init__(self, *args, **kw) def get_decl_info(self): return self.__decls def unknown_decl(self, data): self.__decls.append(data) class HTMLParserTestCase(unittest.TestCase): def test_anchor_collection(self): # See SF bug #467059. parser = AnchorCollector(formatter.NullFormatter(), verbose=1) parser.feed( """<a href='http://foo.org/' name='splat'> </a> <a href='http://www.python.org/'> </a> <a name='frob'> </a> """) parser.close() self.assertEqual(parser.get_anchor_info(), [('http://foo.org/', 'splat', ''), ('http://www.python.org/', '', ''), ('', 'frob', ''), ]) def test_decl_collection(self): # See SF patch #545300 parser = DeclCollector(formatter.NullFormatter(), verbose=1) parser.feed( """<html> <body> hallo <![if !supportEmptyParas]> <![endif]> </body> </html> """) parser.close() self.assertEqual(parser.get_decl_info(), ["if !supportEmptyParas", "endif" ]) def test_main(): test_support.run_unittest(HTMLParserTestCase) if __name__ == "__main__": test_main()