普通文本  |  67行  |  1.18 KB

#!/usr/bin/python

import os
import re
import sys

link_re = re.compile('\[' + '[^\[\]]+' + '\]' + '\(' + '([^\(\)]+)' + '\)')

if len(sys.argv) < 3:
	print 'Usage: <root_dir> <doc_files>...'
	sys.exit(1)

root = sys.argv[1]
docs = sys.argv[2:]

links = []

for doc in docs:
	with open(doc) as f:
		data = f.read()
		r = link_re.findall(data)
		for link in r:
			links += [(doc, link)]

def filter_link((doc, link)):
	if link.startswith('http'):
		return False
	if link.startswith('#'):
		return False
	return True

links = filter(filter_link, links)

def fix_link((doc, link)):
	link = link.split('#')[0]
	link = link.split('?')[0]
	return (doc, link)

links = map(fix_link, links)

errors = []

def check_link((doc, link)):
	path = os.path.dirname(doc)
	full_link = None
	if link[0] == '/':
		link = link[1:]
		full_link = os.path.join(root, link)
	else:
		full_link = os.path.join(root, path, link)
	if not os.path.exists(full_link):
		return False
	return True

for link in links:
	if not check_link(link):
		errors += [link]

if len(errors) == 0:
	print '%d links checked: OK' % (len(links),)
	sys.exit(0)

for (doc, link) in errors:
	print 'File %s linked from %s not found' % (link, doc)

sys.exit(2)