comparison make/doclinkcheck.py @ 25:fe01baf8e2a0

Fix a bug (the attempt to filter out index.html from the list of files nothing links to was looking at the wrong variable), and add comments.
author Rob Landley <rob@landley.net>
date Mon, 13 Aug 2007 16:25:58 -0500
parents 8dc7c0651c06
children
comparison
equal deleted inserted replaced
24:67ccf3866142 25:fe01baf8e2a0
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 import os,sys 3 import os,sys
4
5 # Get a list of files under the Documentation directory,
6 # filtering out instances of index.html
4 7
5 dirlist = [] 8 dirlist = []
6 for i in os.walk("Documentation"): 9 for i in os.walk("Documentation"):
7 for j in i[1]: dirlist.append("%s/%s/" % (i[0], j)) 10 for j in i[1]: dirlist.append("%s/%s/" % (i[0], j))
8 for j in i[2]: 11 for j in i[2]:
9 if i!="index.html": dirlist.append("%s/%s" % (i[0], j)) 12 if j!="index.html": dirlist.append("%s/%s" % (i[0], j))
10 dirlist.sort() 13 dirlist.sort()
11 14
15 # Function to parse a relative link and append it to a list.
12 taglist = [] 16 taglist = []
13 def handletag(path, tag, data): 17 def handletag(path, tag, data):
14 tag = tag.split() 18 tag = tag.split()
15 if tag[0]=="a": 19 if tag[0]=="a":
16 for i in tag: 20 for i in tag:
17 if i.startswith("href="): 21 if i.startswith("href="):
18 i = i[5:] 22 i = i[5:]
19 if i[0]=='"' and i[-1]=='"': i=i[1:-1] 23 if i[0]=='"' and i[-1]=='"': i=i[1:-1]
20 taglist.append("%s/%s" % (path, i)) 24 taglist.append("%s/%s" % (path, i))
21 25
26 # Find all the index.html files under Documentation, read each one,
27 # iterate through the html tags and call handletag() for each.
28
22 for dir in os.walk("Documentation"): 29 for dir in os.walk("Documentation"):
23 if "index.html" in dir[2]: 30 if "index.html" in dir[2]:
24 data = open("%s/index.html" % dir[0]).read() 31 data = open("%s/index.html" % dir[0]).read()
25 data = data.split("<")[1:] 32 data = data.split("<")[1:]
26 for i in data: 33 for i in data:
27 i = i.split(">") 34 i = i.split(">")
28 handletag(dir[0], i[0], i[1]) 35 handletag(dir[0], i[0], i[1])
29 #if len(i)<2 or len(i[0])<2: continue
30 #if i[0][0]=="h" and i[0][1].isdigit():
31 36
37 # Display the links with no files, and the files nothing linked to.
32 print "404 errors:" 38 print "404 errors:"
33 for i in filter(lambda a: a not in dirlist, taglist): print i 39 for i in filter(lambda a: a not in dirlist, taglist): print i
34 print "Unlinked documents:" 40 print "Unlinked documents:"
35 for i in filter(lambda a: a not in taglist, dirlist): print i 41 for i in filter(lambda a: a not in taglist, dirlist): print i