jacktrainor Newbie

Joined: 21 May 2007 Posts: 1
|
Posted: Mon May 21, 2007 10:10 pm Post subject: Simple XML handling in Python script |
|
|
| Code: | """
PythonXmlSample
Simple, self-contained code for reading XML into a tree of nodes
and using that tree to drive text in an OOWriter document.
This is not a comprehensive XML solution, but good for basic
tasks.
To use, copy module and add your code into final function.
Jack Trainor 2007-05-21
"""
import uno
import xml.sax
import xml.sax.handler
import xml.sax.saxutils
import StringIO
UnoTrue = uno.Bool(1)
UnoFalse = uno.Bool(0)
PARAGRAPH_BREAK = uno.getConstantByName( "com.sun.star.text.ControlCharacter.PARAGRAPH_BREAK" )
PAGE_BEFORE = uno.getConstantByName( "com.sun.star.style.BreakType.PAGE_BEFORE" )
""" Node is a node in an XML tree with its tag, text and attributes """
class Node:
def __init__(self, tag, text="", attributes={}, parent=None):
self.tag = tag
self.attributes = attributes
self.setText(text)
self.parent = parent # usually set by Node#addChild
self.nodes = []
def filterIllegalAscii(text):
# Note: discards controls chars and chars > 127 -- doesn't represent them
return "".join([c for c in text if ((c >= " " and c <= "~") or c in "\t\r\n")])
filterIllegalAscii = staticmethod(filterIllegalAscii)
def escape(text):
return xml.sax.saxutils.escape(text)
escape = staticmethod(escape)
def clean(text):
text = Node.filterIllegalAscii(text)
text = Node.escape(text)
return text
clean = staticmethod(clean)
def cleanAttr(text):
text = text.replace('"', ""e;")
text = Node.clean(text)
return text
cleanAttr = staticmethod(cleanAttr)
def setText(self, text):
self.text = ""
self.appendText(text)
def appendText(self, text):
self.text +=Node.clean(text)
def addChild(self, node):
if node:
self.nodes.append(node)
node.parent = self
""" NodeHandler interfaces with Python SAX to build tree of Nodes """
class NodeHandler( xml.sax.handler.ContentHandler ):
def __init__( self ):
self.stack = []
self.root = None
def getCurNode( self ):
node = None
nodeCount = len( self.stack )
if nodeCount > 0:
node = self.stack[ nodeCount-1 ]
return node
def pushNode( self, node ):
self.stack.append( node )
def popNode( self ):
node = self.getCurNode()
assert node != None
if node != None:
self.stack = self.stack[ :-1 ]
return node
def startElement( self, name, attributes=None ):
node = Node( name, '', attributes )
if self.root == None:
self.root = node
curNode = self.getCurNode()
if curNode != None:
curNode.addChild( node )
self.pushNode( node )
def characters( self, data ):
node = self.getCurNode()
assert node != None
node.appendText(data)
def endElement( self, name="" ):
node = self.popNode()
node.text = node.text.strip()
if name:
assert node.tag == name
def parseSource( source ):
parser = xml.sax.make_parser()
handler = NodeHandler()
parser.setContentHandler( handler )
parser.parse( source )
return handler.root
def parseString(s):
return parseSource(StringIO.StringIO(s))
def parseFile(path):
return parseSource(path)
def PythonXmlSample( ):
document = XSCRIPTCONTEXT.getDocument()
text = document.Text
cursor = text.createTextCursor()
cursor.gotoEnd(UnoFalse )
# Add your code here...
# The code below reads a file with a hard-coded path for an XML file
# containing titles and texts, and inserts them into current OO document.
path = r"C:\Docs\Outlines\Book.xml" # hard-coded path for convenience
root = parseFile(path) # parsess xml file at path into one root node
"""
XML format in this example:
<root>
<item title="This Is the Item's Title">
This is the item's text.
</item>
[ ... more items ... ]
</root>
"""
for node in root.nodes:
# print title in Heading 1 style
cursor.setPropertyValue("ParaStyleName", "Heading 1")
text.insertString(cursor, node.attributes.get("title","..."), UnoFalse)
text.insertControlCharacter(cursor, PARAGRAPH_BREAK, UnoFalse)
# print text in Text body style followed by a page break
cursor.setPropertyValue("ParaStyleName", "Text body")
text.insertString(cursor, "\n\n", UnoFalse)
text.insertString(cursor, node.text, UnoFalse)
text.insertControlCharacter(cursor, PARAGRAPH_BREAK, UnoFalse)
cursor.BreakType = PAGE_BEFORE
return None |
|
|