annotate make/doclinkcheck.py @ 25:fe01baf8e2a0

Fix a bug (the attempt to filter out index.html from the list of files nothing links to was looking at the wrong variable), and add comments.
author Rob Landley <rob@landley.net>
date Mon, 13 Aug 2007 16:25:58 -0500
parents 8dc7c0651c06
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
21
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
1 #!/usr/bin/python
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
2
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
3 import os,sys
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
4
25
fe01baf8e2a0 Fix a bug (the attempt to filter out index.html from the list of files
Rob Landley <rob@landley.net>
parents: 21
diff changeset
5 # Get a list of files under the Documentation directory,
fe01baf8e2a0 Fix a bug (the attempt to filter out index.html from the list of files
Rob Landley <rob@landley.net>
parents: 21
diff changeset
6 # filtering out instances of index.html
fe01baf8e2a0 Fix a bug (the attempt to filter out index.html from the list of files
Rob Landley <rob@landley.net>
parents: 21
diff changeset
7
21
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
8 dirlist = []
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
9 for i in os.walk("Documentation"):
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
10 for j in i[1]: dirlist.append("%s/%s/" % (i[0], j))
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
11 for j in i[2]:
25
fe01baf8e2a0 Fix a bug (the attempt to filter out index.html from the list of files
Rob Landley <rob@landley.net>
parents: 21
diff changeset
12 if j!="index.html": dirlist.append("%s/%s" % (i[0], j))
21
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
13 dirlist.sort()
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
14
25
fe01baf8e2a0 Fix a bug (the attempt to filter out index.html from the list of files
Rob Landley <rob@landley.net>
parents: 21
diff changeset
15 # Function to parse a relative link and append it to a list.
21
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
16 taglist = []
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
17 def handletag(path, tag, data):
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
18 tag = tag.split()
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
19 if tag[0]=="a":
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
20 for i in tag:
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
21 if i.startswith("href="):
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
22 i = i[5:]
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
23 if i[0]=='"' and i[-1]=='"': i=i[1:-1]
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
24 taglist.append("%s/%s" % (path, i))
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
25
25
fe01baf8e2a0 Fix a bug (the attempt to filter out index.html from the list of files
Rob Landley <rob@landley.net>
parents: 21
diff changeset
26 # Find all the index.html files under Documentation, read each one,
fe01baf8e2a0 Fix a bug (the attempt to filter out index.html from the list of files
Rob Landley <rob@landley.net>
parents: 21
diff changeset
27 # iterate through the html tags and call handletag() for each.
fe01baf8e2a0 Fix a bug (the attempt to filter out index.html from the list of files
Rob Landley <rob@landley.net>
parents: 21
diff changeset
28
21
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
29 for dir in os.walk("Documentation"):
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
30 if "index.html" in dir[2]:
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
31 data = open("%s/index.html" % dir[0]).read()
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
32 data = data.split("<")[1:]
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
33 for i in data:
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
34 i = i.split(">")
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
35 handletag(dir[0], i[0], i[1])
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
36
25
fe01baf8e2a0 Fix a bug (the attempt to filter out index.html from the list of files
Rob Landley <rob@landley.net>
parents: 21
diff changeset
37 # Display the links with no files, and the files nothing linked to.
21
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
38 print "404 errors:"
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
39 for i in filter(lambda a: a not in dirlist, taglist): print i
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
40 print "Unlinked documents:"
8dc7c0651c06 Link checker for Documentation, shows 404 errors and files not linked to.
Rob Landley <rob@landley.net>
parents:
diff changeset
41 for i in filter(lambda a: a not in taglist, dirlist): print i