changeset 21:8dc7c0651c06

Link checker for Documentation, shows 404 errors and files not linked to.
author Rob Landley <rob@landley.net>
date Thu, 09 Aug 2007 22:16:32 -0500
parents 8e9357f5cb1b
children b5481d0c89c1
files make/doclinkcheck.py
diffstat 1 files changed, 35 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/doclinkcheck.py	Thu Aug 09 22:16:32 2007 -0500
@@ -0,0 +1,35 @@
+#!/usr/bin/python
+
+import os,sys
+
+dirlist = []
+for i in os.walk("Documentation"):
+  for j in i[1]: dirlist.append("%s/%s/" % (i[0], j))
+  for j in i[2]:
+    if i!="index.html": dirlist.append("%s/%s" % (i[0], j))
+dirlist.sort()
+
+taglist = []
+def handletag(path, tag, data):
+  tag = tag.split()
+  if tag[0]=="a":
+    for i in tag:
+      if i.startswith("href="):
+        i = i[5:]
+        if i[0]=='"' and i[-1]=='"': i=i[1:-1]
+        taglist.append("%s/%s" % (path, i))
+
+for dir in os.walk("Documentation"):
+  if "index.html" in dir[2]:
+    data = open("%s/index.html" % dir[0]).read()
+    data = data.split("<")[1:]
+    for i in data:
+      i = i.split(">")
+      handletag(dir[0], i[0], i[1])
+      #if len(i)<2 or len(i[0])<2: continue
+      #if i[0][0]=="h" and i[0][1].isdigit():
+
+print "404 errors:"
+for i in filter(lambda a: a not in dirlist, taglist): print i
+print "Unlinked documents:"
+for i in filter(lambda a: a not in taglist, dirlist): print i