1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 """Simplexml module provides xmpppy library with all needed tools to handle XML nodes and XML streams.
18 I'm personally using it in many other separate projects. It is designed to be as standalone as possible."""
19
20 import xml.parsers.expat
21 import weakref
22
24 """Returns provided string with symbols & < > " replaced by their respective XML entities."""
25
26 return txt.replace("&", "&").replace("<", "<").replace(">", ">").replace('"', """).replace(u'\x0C', "").replace(u'\x1B', "")
27
28 ENCODING='utf-8'
30 """Converts object "what" to unicode string using it's own __str__ method if accessible or unicode method otherwise."""
31 if isinstance(what, unicode): return what
32 try: r=what.__str__()
33 except AttributeError: r=str(what)
34 if not isinstance(r, unicode): return unicode(r,ENCODING)
35 return r
36
38 """ Node class describes syntax of separate XML Node. It have a constructor that permits node creation
39 from set of "namespace name", attributes and payload of text strings and other nodes.
40 It does not natively support building node from text string and uses NodeBuilder class for that purpose.
41 After creation node can be mangled in many ways so it can be completely changed.
42 Also node can be serialised into string in one of two modes: default (where the textual representation
43 of node describes it exactly) and "fancy" - with whitespace added to make indentation and thus make
44 result more readable by human.
45
46 Node class have attribute FORCE_NODE_RECREATION that is defaults to False thus enabling fast node
47 replication from the some other node. The drawback of the fast way is that new node shares some
48 info with the "original" node that is changing the one node may influence the other. Though it is
49 rarely needed (in xmpppy it is never needed at all since I'm usually never using original node after
50 replication (and using replication only to move upwards on the classes tree).
51 """
52 FORCE_NODE_RECREATION=0
53 - def __init__(self, tag=None, attrs={}, payload=[], parent=None, nsp=None, node_built=False, node=None):
54 """ Takes "tag" argument as the name of node (prepended by namespace, if needed and separated from it
55 by a space), attrs dictionary as the set of arguments, payload list as the set of textual strings
56 and child nodes that this node carries within itself and "parent" argument that is another node
57 that this one will be the child of. Also the __init__ can be provided with "node" argument that is
58 either a text string containing exactly one node or another Node instance to begin with. If both
59 "node" and other arguments is provided then the node initially created as replica of "node"
60 provided and then modified to be compliant with other arguments."""
61 if node:
62 if self.FORCE_NODE_RECREATION and isinstance(node, Node):
63 node=str(node)
64 if not isinstance(node, Node):
65 node=NodeBuilder(node,self)
66 node_built = True
67 else:
68 self.name,self.namespace,self.attrs,self.data,self.kids,self.parent,self.nsd = node.name,node.namespace,{},[],[],node.parent,{}
69 for key in node.attrs.keys(): self.attrs[key]=node.attrs[key]
70 for data in node.data: self.data.append(data)
71 for kid in node.kids: self.kids.append(kid)
72 for k,v in node.nsd.items(): self.nsd[k] = v
73 else: self.name,self.namespace,self.attrs,self.data,self.kids,self.parent,self.nsd = 'tag','',{},[],[],None,{}
74 if parent:
75 self.parent = parent
76 self.nsp_cache = {}
77 if nsp:
78 for k,v in nsp.items(): self.nsp_cache[k] = v
79 for attr,val in attrs.items():
80 if attr == 'xmlns':
81 self.nsd[u''] = val
82 elif attr.startswith('xmlns:'):
83 self.nsd[attr[6:]] = val
84 self.attrs[attr]=attrs[attr]
85 if tag:
86 if node_built:
87 pfx,self.name = (['']+tag.split(':'))[-2:]
88 self.namespace = self.lookup_nsp(pfx)
89 else:
90 if ' ' in tag:
91 self.namespace,self.name = tag.split()
92 else:
93 self.name = tag
94 if isinstance(payload, basestring): payload=[payload]
95 for i in payload:
96 if isinstance(i, Node): self.addChild(node=i)
97 else: self.data.append(ustr(i))
98
100 ns = self.nsd.get(pfx,None)
101 if ns is None:
102 ns = self.nsp_cache.get(pfx,None)
103 if ns is None:
104 if self.parent:
105 ns = self.parent.lookup_nsp(pfx)
106 self.nsp_cache[pfx] = ns
107 else:
108 return 'http://www.gajim.org/xmlns/undeclared'
109 return ns
110
112 """ Method used to dump node into textual representation.
113 if "fancy" argument is set to True produces indented output for readability."""
114 s = (fancy-1) * 2 * ' ' + "<" + self.name
115 if self.namespace:
116 if not self.parent or self.parent.namespace!=self.namespace:
117 if 'xmlns' not in self.attrs:
118 s = s + ' xmlns="%s"'%self.namespace
119 for key in self.attrs.keys():
120 val = ustr(self.attrs[key])
121 s = s + ' %s="%s"' % ( key, XMLescape(val) )
122 s = s + ">"
123 cnt = 0
124 if self.kids:
125 if fancy: s = s + "\n"
126 for a in self.kids:
127 if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt])
128 elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip())
129 if isinstance(a, Node):
130 s = s + a.__str__(fancy and fancy+1)
131 elif a:
132 s = s + a.__str__()
133 cnt=cnt+1
134 if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt])
135 elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip())
136 if not self.kids and s.endswith('>'):
137 s=s[:-1]+' />'
138 if fancy: s = s + "\n"
139 else:
140 if fancy and not self.data: s = s + (fancy-1) * 2 * ' '
141 s = s + "</" + self.name + ">"
142 if fancy: s = s + "\n"
143 return s
145 """ Serialise node, dropping all tags and leaving CDATA intact.
146 That is effectively kills all formatiing, leaving only text were contained in XML.
147 """
148 s = ""
149 cnt = 0
150 if self.kids:
151 for a in self.kids:
152 s=s+self.data[cnt]
153 if a: s = s + a.getCDATA()
154 cnt=cnt+1
155 if (len(self.data)-1) >= cnt: s = s + self.data[cnt]
156 return s
157 - def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None):
158 """ If "node" argument is provided, adds it as child node. Else creates new node from
159 the other arguments' values and adds it as well."""
160 if 'xmlns' in attrs:
161 raise AttributeError("Use namespace=x instead of attrs={'xmlns':x}")
162 if node:
163 newnode=node
164 node.parent = weakref.proxy(self)
165 else: newnode=Node(tag=name, parent=weakref.proxy(self), attrs=attrs, payload=payload)
166 if namespace:
167 newnode.setNamespace(namespace)
168 self.kids.append(newnode)
169 self.data.append(u'')
170 return newnode
172 """ Adds some CDATA to node. """
173 self.data.append(ustr(data))
174 self.kids.append(None)
176 """ Removes all CDATA from the node. """
177 self.data=[]
179 """ Deletes an attribute "key" """
180 del self.attrs[key]
182 """ Deletes the "node" from the node's childs list, if "node" is an instance.
183 Else deletes the first node that have specified name and (optionally) attributes. """
184 if not isinstance(node, Node): node=self.getTag(node,attrs)
185 self.kids[self.kids.index(node)]=None
186 return node
188 """ Returns all node's attributes as dictionary. """
189 return self.attrs
191 """ Returns value of specified attribute. """
192 try: return self.attrs[key]
193 except: return None
195 """ Returns all node's child nodes as list. """
196 return self.kids
198 """ Returns all node CDATA as string (concatenated). """
199 return ''.join(self.data)
201 """ Returns the name of node """
202 return self.name
204 """ Returns the namespace of node """
205 return self.namespace
207 """ Returns the parent of node (if present). """
208 return self.parent
210 """ Return the payload of node i.e. list of child nodes and CDATA entries.
211 F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned list:
212 ['text1', <nodea instance>, <nodeb instance>, ' text2']. """
213 ret=[]
214 for i in range(max(len(self.data),len(self.kids))):
215 if i < len(self.data) and self.data[i]: ret.append(self.data[i])
216 if i < len(self.kids) and self.kids[i]: ret.append(self.kids[i])
217 return ret
218 - def getTag(self, name, attrs={}, namespace=None):
219 """ Filters all child nodes using specified arguments as filter.
220 Returns the first found or None if not found. """
221 return self.getTags(name, attrs, namespace, one=1)
223 """ Returns attribute value of the child with specified name (or None if no such attribute)."""
224 try: return self.getTag(tag).attrs[attr]
225 except: return None
227 """ Returns cocatenated CDATA of the child with specified name."""
228 try: return self.getTag(tag).getData()
229 except: return None
243
255
257 """ Sets attribute "key" with the value "val". """
258 self.attrs[key]=val
260 """ Sets node's CDATA to provided string. Resets all previous CDATA!"""
261 self.data=[ustr(data)]
263 """ Changes the node name. """
264 self.name = val
266 """ Changes the node namespace. """
267 self.namespace=namespace
269 """ Sets node's parent to "node". WARNING: do not checks if the parent already present
270 and not removes the node from the list of childs of previous parent. """
271 self.parent = weakref.proxy(node) if node else None
273 """ Sets node payload according to the list specified. WARNING: completely replaces all node's
274 previous content. If you wish just to add child or CDATA - use addData or addChild methods. """
275 if isinstance(payload, basestring): payload=[payload]
276 if add: self.kids+=payload
277 else: self.kids=payload
278 - def setTag(self, name, attrs={}, namespace=None):
279 """ Same as getTag but if the node with specified namespace/attributes not found, creates such
280 node and returns it. """
281 node=self.getTags(name, attrs, namespace=namespace, one=1)
282 if node: return node
283 else: return self.addChild(name, attrs, namespace=namespace)
285 """ Creates new node (if not already present) with name "tag"
286 and sets it's attribute "attr" to value "val". """
287 try: self.getTag(tag).attrs[attr]=val
288 except: self.addChild(tag,attrs={attr:val})
290 """ Creates new node (if not already present) with name "tag" and (optionally) attributes "attrs"
291 and sets it's CDATA to string "val". """
292 try: self.getTag(tag,attrs).setData(ustr(val))
293 except: self.addChild(tag,attrs,payload=[ustr(val)])
295 """ Checks if node have attribute "key"."""
296 return key in self.attrs
298 """ Returns node's attribute "item" value. """
299 return self.getAttr(item)
301 """ Sets node's attribute "item" value. """
302 return self.setAttr(item,val)
304 """ Deletes node's attribute "item". """
305 return self.delAttr(item)
307 """ Reduce memory usage caused by T/NT classes - use memory only when needed. """
308 if attr=='T':
309 self.T=T(self)
310 return self.T
311 if attr=='NT':
312 self.NT=NT(self)
313 return self.NT
314 raise AttributeError
315
317 """ Auxiliary class used to quick access to node's child nodes. """
318 - def __init__(self,node): self.__dict__['node']=node
324
326 """ Auxiliary class used to quick create node's child nodes. """
329 if isinstance(val,Node): self.node.addChild(attr,node=val)
330 else: return self.node.addChild(attr,payload=[val])
331
332 DBG_NODEBUILDER = 'nodebuilder'
334 """ Builds a Node class minidom from data parsed to it. This class used for two purposes:
335 1. Creation an XML Node from a textual representation. F.e. reading a config file. See an XML2Node method.
336 2. Handling an incoming XML stream. This is done by mangling
337 the __dispatch_depth parameter and redefining the dispatch method.
338 You do not need to use this class directly if you do not designing your own XML handler."""
339 - def __init__(self,data=None,initial_node=None):
340 """ Takes two optional parameters: "data" and "initial_node".
341 By default class initialised with empty Node class instance.
342 Though, if "initial_node" is provided it used as "starting point".
343 You can think about it as of "node upgrade".
344 "data" (if provided) feeded to parser immidiatedly after instance init.
345 """
346 self.DEBUG(DBG_NODEBUILDER, "Preparing to handle incoming XML stream.", 'start')
347 self._parser = xml.parsers.expat.ParserCreate()
348 self._parser.StartElementHandler = self.starttag
349 self._parser.EndElementHandler = self.endtag
350 self._parser.CharacterDataHandler = self.handle_cdata
351 self._parser.StartNamespaceDeclHandler = self.handle_namespace_start
352 self._parser.buffer_text = True
353 self.Parse = self._parser.Parse
354
355 self.__depth = 0
356 self.__last_depth = 0
357 self.__max_depth = 0
358 self._dispatch_depth = 1
359 self._document_attrs = None
360 self._document_nsp = None
361 self._mini_dom=initial_node
362 self.last_is_data = 1
363 self._ptr=None
364 self.data_buffer = None
365 self.streamError = ''
366 if data:
367 self._parser.Parse(data,1)
368
370 if self.data_buffer:
371 self._ptr.data.append(''.join(self.data_buffer))
372 del self.data_buffer[:]
373 self.data_buffer = None
374
376 """ Method used to allow class instance to be garbage-collected. """
377 self.check_data_buffer()
378 self._parser.StartElementHandler = None
379 self._parser.EndElementHandler = None
380 self._parser.CharacterDataHandler = None
381 self._parser.StartNamespaceDeclHandler = None
382
384 """XML Parser callback. Used internally"""
385 self.check_data_buffer()
386 self._inc_depth()
387 self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s, attrs -> %s" % (self.__depth, tag, `attrs`), 'down')
388 if self.__depth == self._dispatch_depth:
389 if not self._mini_dom :
390 self._mini_dom = Node(tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True)
391 else:
392 Node.__init__(self._mini_dom,tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True)
393 self._ptr = self._mini_dom
394 elif self.__depth > self._dispatch_depth:
395 self._ptr.kids.append(Node(tag=tag,parent=self._ptr,attrs=attrs, node_built=True))
396 self._ptr = self._ptr.kids[-1]
397 if self.__depth == 1:
398 self._document_attrs = {}
399 self._document_nsp = {}
400 nsp, name = (['']+tag.split(':'))[-2:]
401 for attr,val in attrs.items():
402 if attr == 'xmlns':
403 self._document_nsp[u''] = val
404 elif attr.startswith('xmlns:'):
405 self._document_nsp[attr[6:]] = val
406 else:
407 self._document_attrs[attr] = val
408 ns = self._document_nsp.get(nsp, 'http://www.gajim.org/xmlns/undeclared-root')
409 try:
410 self.stream_header_received(ns, name, attrs)
411 except ValueError, e:
412 self._document_attrs = None
413 raise ValueError(str(e))
414 if not self.last_is_data and self._ptr.parent:
415 self._ptr.parent.data.append('')
416 self.last_is_data = 0
417
419 """XML Parser callback. Used internally"""
420 self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s" % (self.__depth, tag), 'up')
421 self.check_data_buffer()
422 if self.__depth == self._dispatch_depth:
423 if self._mini_dom.getName() == 'error':
424 self.streamError = self._mini_dom.getChildren()[0].getName()
425 self.dispatch(self._mini_dom)
426 elif self.__depth > self._dispatch_depth:
427 self._ptr = self._ptr.parent
428 else:
429 self.DEBUG(DBG_NODEBUILDER, "Got higher than dispatch level. Stream terminated?", 'stop')
430 self._dec_depth()
431 self.last_is_data = 0
432 if self.__depth == 0: self.stream_footer_received()
433
435 """XML Parser callback. Used internally"""
436 self.DEBUG(DBG_NODEBUILDER, data, 'data')
437 if self.last_is_data:
438 if self.data_buffer:
439 self.data_buffer.append(data)
440 elif self._ptr:
441 self.data_buffer = [data]
442 self.last_is_data = 1
443
447
448 - def DEBUG(self, level, text, comment=None):
449 """ Gets all NodeBuilder walking events. Can be used for debugging if redefined."""
451 """ Returns just built Node. """
452 self.check_data_buffer()
453 return self._mini_dom
455 """ Gets called when the NodeBuilder reaches some level of depth on it's way up with the built
456 node as argument. Can be redefined to convert incoming XML stanzas to program events. """
463
465 """ Return True if at least one end tag was seen (at level) """
466 return self.__depth <= level and self.__max_depth > level
467
469 self.__last_depth = self.__depth
470 self.__depth += 1
471 self.__max_depth = max(self.__depth, self.__max_depth)
472
474 self.__last_depth = self.__depth
475 self.__depth -= 1
476
478 """ Converts supplied textual string into XML node. Handy f.e. for reading configuration file.
479 Raises xml.parser.expat.parsererror if provided string is not well-formed XML. """
480 return NodeBuilder(xml).getDom()
481
483 """ Converts supplied textual string into XML node. Survives if xml data is cutted half way round.
484 I.e. "<html>some text <br>some more text". Will raise xml.parser.expat.parsererror on misplaced
485 tags though. F.e. "<b>some text <br>some more text</b>" will not work."""
486 return NodeBuilder(xml).getDom()
487