xmpp.simplexml

1 ## simplexml.py based on Mattew Allum's xmlstream.py 2 ## 3 ## Copyright (C) 2003-2005 Alexey "Snake" Nezhdanov 4 ## 5 ## This program is free software; you can redistribute it and/or modify 6 ## it under the terms of the GNU General Public License as published by 7 ## the Free Software Foundation; either version 2, or (at your option) 8 ## any later version. 9 ## 10 ## This program is distributed in the hope that it will be useful, 11 ## but WITHOUT ANY WARRANTY; without even the implied warranty of 12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 ## GNU General Public License for more details. 14 15 # $Id$ 16 17 """Simplexml module provides xmpppy library with all needed tools to handle XML nodes and XML streams. 18 I'm personally using it in many other separate projects. It is designed to be as standalone as possible.""" 19 20 import xml.parsers.expat 21 import weakref 22

23 -def XMLescape(txt):

24 """Returns provided string with symbols & < > " replaced by their respective XML entities.""" 25 # replace also FORM FEED and ESC, because they are not valid XML chars 26 return txt.replace("&", "&").replace("<", "<").replace(">", ">").replace('"', """).replace(u'\x0C', "").replace(u'\x1B', "")

27 28 ENCODING='utf-8'

29 -def ustr(what):

30 """Converts object "what" to unicode string using it's own __str__ method if accessible or unicode method otherwise.""" 31 if isinstance(what, unicode): return what 32 try: r=what.__str__() 33 except AttributeError: r=str(what) 34 if not isinstance(r, unicode): return unicode(r,ENCODING) 35 return r

36

37 -class Node(object):

38 """ Node class describes syntax of separate XML Node. It have a constructor that permits node creation 39 from set of "namespace name", attributes and payload of text strings and other nodes. 40 It does not natively support building node from text string and uses NodeBuilder class for that purpose. 41 After creation node can be mangled in many ways so it can be completely changed. 42 Also node can be serialised into string in one of two modes: default (where the textual representation 43 of node describes it exactly) and "fancy" - with whitespace added to make indentation and thus make 44 result more readable by human. 45 46 Node class have attribute FORCE_NODE_RECREATION that is defaults to False thus enabling fast node 47 replication from the some other node. The drawback of the fast way is that new node shares some 48 info with the "original" node that is changing the one node may influence the other. Though it is 49 rarely needed (in xmpppy it is never needed at all since I'm usually never using original node after 50 replication (and using replication only to move upwards on the classes tree). 51 """ 52 FORCE_NODE_RECREATION=0

53 - def __init__(self, tag=None, attrs={}, payload=[], parent=None, nsp=None, node_built=False, node=None):

54 """ Takes "tag" argument as the name of node (prepended by namespace, if needed and separated from it 55 by a space), attrs dictionary as the set of arguments, payload list as the set of textual strings 56 and child nodes that this node carries within itself and "parent" argument that is another node 57 that this one will be the child of. Also the __init__ can be provided with "node" argument that is 58 either a text string containing exactly one node or another Node instance to begin with. If both 59 "node" and other arguments is provided then the node initially created as replica of "node" 60 provided and then modified to be compliant with other arguments.""" 61 if node: 62 if self.FORCE_NODE_RECREATION and isinstance(node, Node): 63 node=str(node) 64 if not isinstance(node, Node): 65 node=NodeBuilder(node,self) 66 node_built = True 67 else: 68 self.name,self.namespace,self.attrs,self.data,self.kids,self.parent,self.nsd = node.name,node.namespace,{},[],[],node.parent,{} 69 for key in node.attrs.keys(): self.attrs[key]=node.attrs[key] 70 for data in node.data: self.data.append(data) 71 for kid in node.kids: self.kids.append(kid) 72 for k,v in node.nsd.items(): self.nsd[k] = v 73 else: self.name,self.namespace,self.attrs,self.data,self.kids,self.parent,self.nsd = 'tag','',{},[],[],None,{} 74 if parent: 75 self.parent = parent 76 self.nsp_cache = {} 77 if nsp: 78 for k,v in nsp.items(): self.nsp_cache[k] = v 79 for attr,val in attrs.items(): 80 if attr == 'xmlns': 81 self.nsd[u''] = val 82 elif attr.startswith('xmlns:'): 83 self.nsd[attr[6:]] = val 84 self.attrs[attr]=attrs[attr] 85 if tag: 86 if node_built: 87 pfx,self.name = (['']+tag.split(':'))[-2:] 88 self.namespace = self.lookup_nsp(pfx) 89 else: 90 if ' ' in tag: 91 self.namespace,self.name = tag.split() 92 else: 93 self.name = tag 94 if isinstance(payload, basestring): payload=[payload] 95 for i in payload: 96 if isinstance(i, Node): self.addChild(node=i) 97 else: self.data.append(ustr(i))

98

99 - def lookup_nsp(self,pfx=''):

100 ns = self.nsd.get(pfx,None) 101 if ns is None: 102 ns = self.nsp_cache.get(pfx,None) 103 if ns is None: 104 if self.parent: 105 ns = self.parent.lookup_nsp(pfx) 106 self.nsp_cache[pfx] = ns 107 else: 108 return 'http://www.gajim.org/xmlns/undeclared' 109 return ns

110

111 - def __str__(self,fancy=0):

112 """ Method used to dump node into textual representation. 113 if "fancy" argument is set to True produces indented output for readability.""" 114 s = (fancy-1) * 2 * ' ' + "<" + self.name 115 if self.namespace: 116 if not self.parent or self.parent.namespace!=self.namespace: 117 if 'xmlns' not in self.attrs: 118 s = s + ' xmlns="%s"'%self.namespace 119 for key in self.attrs.keys(): 120 val = ustr(self.attrs[key]) 121 s = s + ' %s="%s"' % ( key, XMLescape(val) ) 122 s = s + ">" 123 cnt = 0 124 if self.kids: 125 if fancy: s = s + "\n" 126 for a in self.kids: 127 if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt]) 128 elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip()) 129 if isinstance(a, Node): 130 s = s + a.__str__(fancy and fancy+1) 131 elif a: 132 s = s + a.__str__() 133 cnt=cnt+1 134 if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt]) 135 elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip()) 136 if not self.kids and s.endswith('>'): 137 s=s[:-1]+' />' 138 if fancy: s = s + "\n" 139 else: 140 if fancy and not self.data: s = s + (fancy-1) * 2 * ' ' 141 s = s + "</" + self.name + ">" 142 if fancy: s = s + "\n" 143 return s

144 - def getCDATA(self):

145 """ Serialise node, dropping all tags and leaving CDATA intact. 146 That is effectively kills all formatiing, leaving only text were contained in XML. 147 """ 148 s = "" 149 cnt = 0 150 if self.kids: 151 for a in self.kids: 152 s=s+self.data[cnt] 153 if a: s = s + a.getCDATA() 154 cnt=cnt+1 155 if (len(self.data)-1) >= cnt: s = s + self.data[cnt] 156 return s

157 - def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None):

158 """ If "node" argument is provided, adds it as child node. Else creates new node from 159 the other arguments' values and adds it as well.""" 160 if 'xmlns' in attrs: 161 raise AttributeError("Use namespace=x instead of attrs={'xmlns':x}") 162 if node: 163 newnode=node 164 node.parent = weakref.proxy(self) 165 else: newnode=Node(tag=name, parent=weakref.proxy(self), attrs=attrs, payload=payload) 166 if namespace: 167 newnode.setNamespace(namespace) 168 self.kids.append(newnode) 169 self.data.append(u'') 170 return newnode

171 - def addData(self, data):

172 """ Adds some CDATA to node. """ 173 self.data.append(ustr(data)) 174 self.kids.append(None)

175 - def clearData(self):

176 """ Removes all CDATA from the node. """ 177 self.data=[]

178 - def delAttr(self, key):

179 """ Deletes an attribute "key" """ 180 del self.attrs[key]

181 - def delChild(self, node, attrs={}):

182 """ Deletes the "node" from the node's childs list, if "node" is an instance. 183 Else deletes the first node that have specified name and (optionally) attributes. """ 184 if not isinstance(node, Node): node=self.getTag(node,attrs) 185 self.kids[self.kids.index(node)]=None 186 return node

187 - def getAttrs(self):

188 """ Returns all node's attributes as dictionary. """ 189 return self.attrs

190 - def getAttr(self, key):

191 """ Returns value of specified attribute. """ 192 try: return self.attrs[key] 193 except: return None

194 - def getChildren(self):

195 """ Returns all node's child nodes as list. """ 196 return self.kids

197 - def getData(self):

198 """ Returns all node CDATA as string (concatenated). """ 199 return ''.join(self.data)

200 - def getName(self):

201 """ Returns the name of node """ 202 return self.name

203 - def getNamespace(self):

204 """ Returns the namespace of node """ 205 return self.namespace

206 - def getParent(self):

207 """ Returns the parent of node (if present). """ 208 return self.parent

209 - def getPayload(self):

210 """ Return the payload of node i.e. list of child nodes and CDATA entries. 211 F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned list: 212 ['text1', <nodea instance>, <nodeb instance>, ' text2']. """ 213 ret=[] 214 for i in range(max(len(self.data),len(self.kids))): 215 if i < len(self.data) and self.data[i]: ret.append(self.data[i]) 216 if i < len(self.kids) and self.kids[i]: ret.append(self.kids[i]) 217 return ret

218 - def getTag(self, name, attrs={}, namespace=None):

219 """ Filters all child nodes using specified arguments as filter. 220 Returns the first found or None if not found. """ 221 return self.getTags(name, attrs, namespace, one=1)

222 - def getTagAttr(self,tag,attr):

223 """ Returns attribute value of the child with specified name (or None if no such attribute).""" 224 try: return self.getTag(tag).attrs[attr] 225 except: return None

226 - def getTagData(self,tag):

227 """ Returns cocatenated CDATA of the child with specified name.""" 228 try: return self.getTag(tag).getData() 229 except: return None

230 - def getTags(self, name, attrs={}, namespace=None, one=0):

231 """ Filters all child nodes using specified arguments as filter. 232 Returns the list of nodes found. """ 233 nodes=[] 234 for node in self.kids: 235 if not node: continue 236 if namespace and namespace!=node.getNamespace(): continue 237 if node.getName() == name: 238 for key in attrs.keys(): 239 if key not in node.attrs or node.attrs[key]!=attrs[key]: break 240 else: nodes.append(node) 241 if one and nodes: return nodes[0] 242 if not one: return nodes

243

244 - def iterTags(self, name, attrs={}, namespace=None):

245 """ Iterate over all children using specified arguments as filter. """ 246 for node in self.kids: 247 if not node: continue 248 if namespace is not None and namespace!=node.getNamespace(): continue 249 if node.getName() == name: 250 for key in attrs.keys(): 251 if key not in node.attrs or \ 252 node.attrs[key]!=attrs[key]: break 253 else: 254 yield node

255

256 - def setAttr(self, key, val):

257 """ Sets attribute "key" with the value "val". """ 258 self.attrs[key]=val

259 - def setData(self, data):

260 """ Sets node's CDATA to provided string. Resets all previous CDATA!""" 261 self.data=[ustr(data)]

262 - def setName(self,val):

263 """ Changes the node name. """ 264 self.name = val

265 - def setNamespace(self, namespace):

266 """ Changes the node namespace. """ 267 self.namespace=namespace

268 - def setParent(self, node):

269 """ Sets node's parent to "node". WARNING: do not checks if the parent already present 270 and not removes the node from the list of childs of previous parent. """ 271 self.parent = weakref.proxy(node) if node else None

272 - def setPayload(self,payload,add=0):

273 """ Sets node payload according to the list specified. WARNING: completely replaces all node's 274 previous content. If you wish just to add child or CDATA - use addData or addChild methods. """ 275 if isinstance(payload, basestring): payload=[payload] 276 if add: self.kids+=payload 277 else: self.kids=payload

278 - def setTag(self, name, attrs={}, namespace=None):

279 """ Same as getTag but if the node with specified namespace/attributes not found, creates such 280 node and returns it. """ 281 node=self.getTags(name, attrs, namespace=namespace, one=1) 282 if node: return node 283 else: return self.addChild(name, attrs, namespace=namespace)

284 - def setTagAttr(self,tag,attr,val):

285 """ Creates new node (if not already present) with name "tag" 286 and sets it's attribute "attr" to value "val". """ 287 try: self.getTag(tag).attrs[attr]=val 288 except: self.addChild(tag,attrs={attr:val})

289 - def setTagData(self,tag,val,attrs={}):

290 """ Creates new node (if not already present) with name "tag" and (optionally) attributes "attrs" 291 and sets it's CDATA to string "val". """ 292 try: self.getTag(tag,attrs).setData(ustr(val)) 293 except: self.addChild(tag,attrs,payload=[ustr(val)])

294 - def has_attr(self,key):

295 """ Checks if node have attribute "key".""" 296 return key in self.attrs

297 - def __getitem__(self,item):

298 """ Returns node's attribute "item" value. """ 299 return self.getAttr(item)

300 - def __setitem__(self,item,val):

301 """ Sets node's attribute "item" value. """ 302 return self.setAttr(item,val)

303 - def __delitem__(self,item):

304 """ Deletes node's attribute "item". """ 305 return self.delAttr(item)

306 - def __getattr__(self,attr):

307 """ Reduce memory usage caused by T/NT classes - use memory only when needed. """ 308 if attr=='T': 309 self.T=T(self) 310 return self.T 311 if attr=='NT': 312 self.NT=NT(self) 313 return self.NT 314 raise AttributeError

315

316 -class T:

317 """ Auxiliary class used to quick access to node's child nodes. """

318 - def __init__(self,node): self.__dict__['node']=node

319 - def __getattr__(self,attr): return self.node.getTag(attr)

320 - def __setattr__(self,attr,val):

321 if isinstance(val,Node): Node.__init__(self.node.setTag(attr),node=val) 322 else: return self.node.setTagData(attr,val)

323 - def __delattr__(self,attr): return self.node.delChild(attr)

324

325 -class NT(T):

326 """ Auxiliary class used to quick create node's child nodes. """

327 - def __getattr__(self,attr): return self.node.addChild(attr)

328 - def __setattr__(self,attr,val):

329 if isinstance(val,Node): self.node.addChild(attr,node=val) 330 else: return self.node.addChild(attr,payload=[val])

331 332 DBG_NODEBUILDER = 'nodebuilder'

333 -class NodeBuilder:

334 """ Builds a Node class minidom from data parsed to it. This class used for two purposes: 335 1. Creation an XML Node from a textual representation. F.e. reading a config file. See an XML2Node method. 336 2. Handling an incoming XML stream. This is done by mangling 337 the __dispatch_depth parameter and redefining the dispatch method. 338 You do not need to use this class directly if you do not designing your own XML handler."""

339 - def __init__(self,data=None,initial_node=None):

340 """ Takes two optional parameters: "data" and "initial_node". 341 By default class initialised with empty Node class instance. 342 Though, if "initial_node" is provided it used as "starting point". 343 You can think about it as of "node upgrade". 344 "data" (if provided) feeded to parser immidiatedly after instance init. 345 """ 346 self.DEBUG(DBG_NODEBUILDER, "Preparing to handle incoming XML stream.", 'start') 347 self._parser = xml.parsers.expat.ParserCreate() 348 self._parser.StartElementHandler = self.starttag 349 self._parser.EndElementHandler = self.endtag 350 self._parser.CharacterDataHandler = self.handle_cdata 351 self._parser.StartNamespaceDeclHandler = self.handle_namespace_start 352 self._parser.buffer_text = True 353 self.Parse = self._parser.Parse 354 355 self.__depth = 0 356 self.__last_depth = 0 357 self.__max_depth = 0 358 self._dispatch_depth = 1 359 self._document_attrs = None 360 self._document_nsp = None 361 self._mini_dom=initial_node 362 self.last_is_data = 1 363 self._ptr=None 364 self.data_buffer = None 365 self.streamError = '' 366 if data: 367 self._parser.Parse(data,1)

368

369 - def check_data_buffer(self):

370 if self.data_buffer: 371 self._ptr.data.append(''.join(self.data_buffer)) 372 del self.data_buffer[:] 373 self.data_buffer = None

374

375 - def destroy(self):

376 """ Method used to allow class instance to be garbage-collected. """ 377 self.check_data_buffer() 378 self._parser.StartElementHandler = None 379 self._parser.EndElementHandler = None 380 self._parser.CharacterDataHandler = None 381 self._parser.StartNamespaceDeclHandler = None

382

383 - def starttag(self, tag, attrs):

384 """XML Parser callback. Used internally""" 385 self.check_data_buffer() 386 self._inc_depth() 387 self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s, attrs -> %s" % (self.__depth, tag, `attrs`), 'down') 388 if self.__depth == self._dispatch_depth: 389 if not self._mini_dom : 390 self._mini_dom = Node(tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True) 391 else: 392 Node.__init__(self._mini_dom,tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True) 393 self._ptr = self._mini_dom 394 elif self.__depth > self._dispatch_depth: 395 self._ptr.kids.append(Node(tag=tag,parent=self._ptr,attrs=attrs, node_built=True)) 396 self._ptr = self._ptr.kids[-1] 397 if self.__depth == 1: 398 self._document_attrs = {} 399 self._document_nsp = {} 400 nsp, name = (['']+tag.split(':'))[-2:] 401 for attr,val in attrs.items(): 402 if attr == 'xmlns': 403 self._document_nsp[u''] = val 404 elif attr.startswith('xmlns:'): 405 self._document_nsp[attr[6:]] = val 406 else: 407 self._document_attrs[attr] = val 408 ns = self._document_nsp.get(nsp, 'http://www.gajim.org/xmlns/undeclared-root') 409 try: 410 self.stream_header_received(ns, name, attrs) 411 except ValueError, e: 412 self._document_attrs = None 413 raise ValueError(str(e)) 414 if not self.last_is_data and self._ptr.parent: 415 self._ptr.parent.data.append('') 416 self.last_is_data = 0

417

418 - def endtag(self, tag ):

419 """XML Parser callback. Used internally""" 420 self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s" % (self.__depth, tag), 'up') 421 self.check_data_buffer() 422 if self.__depth == self._dispatch_depth: 423 if self._mini_dom.getName() == 'error': 424 self.streamError = self._mini_dom.getChildren()[0].getName() 425 self.dispatch(self._mini_dom) 426 elif self.__depth > self._dispatch_depth: 427 self._ptr = self._ptr.parent 428 else: 429 self.DEBUG(DBG_NODEBUILDER, "Got higher than dispatch level. Stream terminated?", 'stop') 430 self._dec_depth() 431 self.last_is_data = 0 432 if self.__depth == 0: self.stream_footer_received()

433

434 - def handle_cdata(self, data):

435 """XML Parser callback. Used internally""" 436 self.DEBUG(DBG_NODEBUILDER, data, 'data') 437 if self.last_is_data: 438 if self.data_buffer: 439 self.data_buffer.append(data) 440 elif self._ptr: 441 self.data_buffer = [data] 442 self.last_is_data = 1

443

444 - def handle_namespace_start(self, prefix, uri):

445 """XML Parser callback. Used internally""" 446 self.check_data_buffer()

447

448 - def DEBUG(self, level, text, comment=None):

449 """ Gets all NodeBuilder walking events. Can be used for debugging if redefined."""

450 - def getDom(self):

451 """ Returns just built Node. """ 452 self.check_data_buffer() 453 return self._mini_dom

454 - def dispatch(self,stanza):

455 """ Gets called when the NodeBuilder reaches some level of depth on it's way up with the built 456 node as argument. Can be redefined to convert incoming XML stanzas to program events. """

457 - def stream_header_received(self,ns,tag,attrs):

458 """ Method called when stream just opened. """ 459 self.check_data_buffer()

460 - def stream_footer_received(self):

461 """ Method called when stream just closed. """ 462 self.check_data_buffer()

463

464 - def has_received_endtag(self, level=0):

465 """ Return True if at least one end tag was seen (at level) """ 466 return self.__depth <= level and self.__max_depth > level

467

468 - def _inc_depth(self):

469 self.__last_depth = self.__depth 470 self.__depth += 1 471 self.__max_depth = max(self.__depth, self.__max_depth)

472

473 - def _dec_depth(self):

474 self.__last_depth = self.__depth 475 self.__depth -= 1

476

477 -def XML2Node(xml):

478 """ Converts supplied textual string into XML node. Handy f.e. for reading configuration file. 479 Raises xml.parser.expat.parsererror if provided string is not well-formed XML. """ 480 return NodeBuilder(xml).getDom()

481

482 -def BadXML2Node(xml):

483 """ Converts supplied textual string into XML node. Survives if xml data is cutted half way round. 484 I.e. "<html>some text <br>some more text". Will raise xml.parser.expat.parsererror on misplaced 485 tags though. F.e. "<b>some text <br>some more text</b>" will not work.""" 486 return NodeBuilder(xml).getDom()

487

Source Code for Module xmpp.simplexml