Tue, 06 Nov 2007

Python - Recursive Directory Crawl Using Generators

I was looking for an os.walk example to crawl through a file system and found the locate function below on ActiveState's Python Cookbook site. I incorporated it into a simple routine that dumps the output to an XML file that can then be transformed using XSLT to sort and tally the results.

#!/usr/bin/env python

import os
import fnmatch
import time
from xml.dom import minidom

def locate(pattern, root=os.curdir):
    for path, dirs, files in os.walk(os.path.abspath(root)):
        for filename in fnmatch.filter(files, pattern):
            yield os.path.join(path, filename)
 
def main():
    doc = minidom.Document()
    files = doc.createElement("files")
    doc.appendChild(files)
    comment = doc.createComment("Size attribute is reported in bytes.")
    files.appendChild(comment)
    
    for i, file in enumerate(locate("*.*", "\\\\SERVER\\Share")):
        try:
            item = doc.createElement("filename")
            item.setAttribute("id", "%s" % (i))
            item.setAttribute("path", file)
            item.setAttribute("ext", os.path.splitext(file)[1].lower())
            item.setAttribute("size", "%s" % os.stat(file).st_size)
            item.setAttribute("last_modified", time.ctime(os.stat(file).st_mtime))
            files.appendChild(item)
        except OSError, e:
            print "%s => %s" % (file, e.strerror)

    fp = open('myfiles.xml', 'w')
    doc.writexml(fp, "", "  ", "\n", "iso-8859-1")
    fp.close()
    
    return    

if __name__ == "__main__":
    main()

The locate function takes two parameters; the first is a file pattern to match and the second is the directory to start the crawl from.



posted: 23:25 | 0 comments | tags: , ,


© 2008 PlatosCave.net