view make/doclinkcheck.py @ 21:8dc7c0651c06

Link checker for Documentation, shows 404 errors and files not linked to.
author Rob Landley <rob@landley.net>
date Thu, 09 Aug 2007 22:16:32 -0500
parents
children fe01baf8e2a0
line wrap: on
line source

#!/usr/bin/python

import os,sys

dirlist = []
for i in os.walk("Documentation"):
  for j in i[1]: dirlist.append("%s/%s/" % (i[0], j))
  for j in i[2]:
    if i!="index.html": dirlist.append("%s/%s" % (i[0], j))
dirlist.sort()

taglist = []
def handletag(path, tag, data):
  tag = tag.split()
  if tag[0]=="a":
    for i in tag:
      if i.startswith("href="):
        i = i[5:]
        if i[0]=='"' and i[-1]=='"': i=i[1:-1]
        taglist.append("%s/%s" % (path, i))

for dir in os.walk("Documentation"):
  if "index.html" in dir[2]:
    data = open("%s/index.html" % dir[0]).read()
    data = data.split("<")[1:]
    for i in data:
      i = i.split(">")
      handletag(dir[0], i[0], i[1])
      #if len(i)<2 or len(i[0])<2: continue
      #if i[0][0]=="h" and i[0][1].isdigit():

print "404 errors:"
for i in filter(lambda a: a not in dirlist, taglist): print i
print "Unlinked documents:"
for i in filter(lambda a: a not in taglist, dirlist): print i