Forum at OOoForum.orgThe Forum
 [Home]   [FAQ]   [Search]   [Memberlist]   [Usergroups]   [Register
 [Profile]   [Log in to check your private messages]   [Log in

Simple XML handling in Python script

Post new topic   Reply to topic Forum Index -> Code Snippets
View previous topic :: View next topic  
Author Message

Joined: 21 May 2007
Posts: 1

PostPosted: Mon May 21, 2007 10:10 pm    Post subject: Simple XML handling in Python script Reply with quote

    Simple, self-contained code for reading XML into a tree of nodes
    and using that tree to drive text in an OOWriter document.
    This is not a comprehensive XML solution, but good for basic
    To use, copy module and add your code into final function.
    Jack Trainor    2007-05-21

import uno
import xml.sax
import xml.sax.handler
import xml.sax.saxutils
import StringIO

UnoTrue = uno.Bool(1)
UnoFalse = uno.Bool(0)

PARAGRAPH_BREAK  = uno.getConstantByName( "" )
PAGE_BEFORE = uno.getConstantByName( "" )

""" Node is a node in an XML tree with its tag, text and attributes """
class Node:
    def __init__(self, tag, text="", attributes={}, parent=None):
        self.tag = tag
        self.attributes = attributes
        self.parent = parent  # usually set by Node#addChild
        self.nodes = []
    def filterIllegalAscii(text):
        # Note: discards controls chars and chars > 127 -- doesn't represent them
        return "".join([c for c in text if ((c >= " "  and c <= "~") or c in "\t\r\n")])
    filterIllegalAscii = staticmethod(filterIllegalAscii)
    def escape(text):
        return xml.sax.saxutils.escape(text)
    escape = staticmethod(escape)
    def clean(text):
        text = Node.filterIllegalAscii(text)
        text = Node.escape(text)
        return text
    clean = staticmethod(clean)
    def cleanAttr(text):
        text = text.replace('"', "&quote;")
        text = Node.clean(text)
        return text
    cleanAttr = staticmethod(cleanAttr)
    def setText(self, text):
        self.text = ""

    def appendText(self, text):
        self.text +=Node.clean(text)

    def addChild(self, node):
        if node:
            node.parent = self
""" NodeHandler interfaces with Python SAX to build tree of Nodes  """
class NodeHandler( xml.sax.handler.ContentHandler ):       
    def __init__( self ):
        self.stack = []
        self.root = None

    def getCurNode( self ):
        node = None
        nodeCount = len( self.stack )
        if nodeCount > 0:
            node = self.stack[ nodeCount-1 ]
        return node
    def pushNode( self, node ):
        self.stack.append( node )
    def popNode( self ):
        node = self.getCurNode()
        assert node != None
        if node != None:
            self.stack = self.stack[ :-1 ]
        return node
    def startElement( self, name, attributes=None ):
        node = Node( name, '', attributes )       
        if self.root == None:
            self.root = node           
        curNode = self.getCurNode()
        if curNode != None:
            curNode.addChild( node )           
        self.pushNode( node )

    def characters( self, data ):
        node = self.getCurNode()
        assert node != None

    def endElement( self, name="" ):
        node = self.popNode()
        node.text = node.text.strip()
        if name:
            assert node.tag == name

def parseSource( source ):
    parser = xml.sax.make_parser()
    handler = NodeHandler()
    parser.setContentHandler( handler )   
    parser.parse( source )
    return handler.root

def parseString(s):
    return parseSource(StringIO.StringIO(s))

def parseFile(path):
    return parseSource(path)

def PythonXmlSample( ):
    document = XSCRIPTCONTEXT.getDocument()
    text = document.Text 
    cursor = text.createTextCursor()
    cursor.gotoEnd(UnoFalse )
    # Add your code here...
    # The code  below reads a file with a hard-coded path for an XML file
    # containing titles and texts, and inserts them into current OO document.
    path = r"C:\Docs\Outlines\Book.xml"  # hard-coded path for convenience
    root = parseFile(path)  # parsess xml file at path into one root node

    XML format in this example:
        <item title="This Is the Item's Title">
        This is the item's text.
        [ ... more items ... ]
    for node in root.nodes:
         # print title in Heading 1 style
        cursor.setPropertyValue("ParaStyleName", "Heading 1")
        text.insertString(cursor, node.attributes.get("title","..."), UnoFalse)
        text.insertControlCharacter(cursor, PARAGRAPH_BREAK, UnoFalse)

        # print text in Text body style followed by a page break
        cursor.setPropertyValue("ParaStyleName", "Text body")
        text.insertString(cursor, "\n\n", UnoFalse)
        text.insertString(cursor, node.text, UnoFalse)
        text.insertControlCharacter(cursor, PARAGRAPH_BREAK, UnoFalse)
        cursor.BreakType = PAGE_BEFORE     
    return None
Back to top
View user's profile Send private message
Display posts from previous:   
Post new topic   Reply to topic Forum Index -> Code Snippets All times are GMT - 8 Hours
Page 1 of 1

Jump to:  
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum

Powered by phpBB © 2001, 2005 phpBB Group