# -*- coding: UTF-8 -*- import sys, os, shutil from xml.dom.minidom import parse TEXT_NODE = 3 def fileHeader(outputFile): print >> outputFile, """
""" def fileFooter(outputFile): print >> outputFile, """
""" def digItem(tehItem, level, inheritedStyle=''): itemStyles = findStyles(tehItem) while len(itemStyles) < 2: itemStyles.append('') applicableStyles = "" applicableStyles += itemStyles[0] applicableStyles += inheritedStyle print >> outputFile print >> outputFile, " " + " "*level + '
' % (level, applicableStyles), for valuesNode in findSubNodes(tehItem, 'values'): for textNode in findSubNodes(valuesNode, 'text'): for pNode in findSubNodes(textNode, 'p'): for runNode in findSubNodes(pNode, 'run'): runStyles = findStyles(runNode) if runStyles: print >> outputFile, '' % (runStyles[0]), for litNode in findSubNodes(runNode, 'lit'): for outputNode in litNode.childNodes: outputElement(outputNode) if runStyles: print >> outputFile, '' print >> outputFile for childrenNode in findSubNodes(tehItem, 'children'): for itemNode in findSubNodes(childrenNode, 'item'): digItem(itemNode, level+1, itemStyles[1]) print >> outputFile, " " + " "*level + "
" def findSubNodes(tehParent, kind): # get all child nodes with certain tagName foundNodes = [] for subNode in tehParent.childNodes: if (subNode.nodeType != TEXT_NODE) and (subNode.tagName == kind): foundNodes.append(subNode) return foundNodes def outputElement(tehElement): if (tehElement.nodeType == TEXT_NODE): print >> outputFile, tehElement.toxml().encode('utf-8'), elif (tehElement.tagName == 'cell'): if tehElement.getAttribute('href'): print >> outputFile, '%s' % (tehElement.getAttribute('href'), tehElement.getAttribute('name')), else: fileName = attachments[tehElement.getAttribute('refid')] extension = fileName.split('.')[-1].lower() if extension == 'png' or extension == 'jpg': print >> outputFile, '' % (attachments[tehElement.getAttribute('refid')]), else: print >> outputFile, '%s' % (fileName, tehElement.getAttribute('name')) def findStyles(tehElement): itemStyles = [] for styleNode in findSubNodes(tehElement, 'style'): nextStyle = '' for inheritedStyleNode in findSubNodes(styleNode, 'inherited-style'): if nextStyle: nextStyle = ' '.join([nextStyle, inheritedStyleNode.getAttribute('name')]) else: nextStyle = inheritedStyleNode.getAttribute('name') itemStyles.append(nextStyle) return itemStyles if len(sys.argv) >= 2: inputPath = sys.argv[1] inputTitle = inputPath.split('/')[-1].split('.')[0] outputPath = inputPath + '/../%s/' % (inputTitle) if not os.access(outputPath, os.F_OK): os.mkdir(outputPath) outputFile = open(outputPath + '/index.html', 'w') fileHeader(outputFile) tehTree = parse(inputPath + '/contents.xml') docNode = tehTree.documentElement attachments = {} #print tehTree.documentElement.tagName #print tehTree.documentElement.getAttribute('crap') rootNode = None for oneNode in findSubNodes(docNode, 'root'): rootNode = oneNode for attachmentsNode in findSubNodes(docNode, 'attachments'): for attachmentNode in findSubNodes(attachmentsNode, 'attachment'): if attachmentNode.getAttribute('href'): attachments[attachmentNode.getAttribute('id')] = attachmentNode.getAttribute('href') shutil.copyfile((inputPath + '/' + attachmentNode.getAttribute('href')), outputPath + '/' + attachmentNode.getAttribute('href')) for oneNode in rootNode.childNodes: if (oneNode.nodeType != TEXT_NODE) and (oneNode.tagName == 'item'): digItem(oneNode, 1) fileFooter(outputFile)