try:
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
utf8_parser = ET.XMLParser(encoding='utf-8')
tree = ET.parse(StringIO(r.text.encode('utf-8')), parser=utf8_parser)
root=tree.getroot()
Python 2.7.5 (default, May 15 2013, 22:43:36) [MSC v.1500 32 bit (Intel)] on win32
Type "copyright", "credits" or "license()" for more information.
>>> from SimpleXMLRPCServer import SimpleXMLRPCServer
>>> import xmlrpclib
>>> def a():
return xmlrpclib.Binary({'a':1})
>>> server = SimpleXMLRPCServer(("localhost", 8000))
>>> print "Listening on port 8000..."
Listening on port 8000...
>>> server.register_function(python_logo, 'python_logo')
Traceback (most recent call last):
File "", line 1, in
server.register_function(python_logo, 'python_logo')
NameError: name 'python_logo' is not defined
>>> server.register_function(a, 'a')
>>> server.serve_forever()
127.0.0.1 - - [22/Apr/2014 22:36:32] "POST / HTTP/1.1" 200 -
Traceback (most recent call last):
File "", line 1, in
server.serve_forever()
File "C:\Python27\lib\SocketServer.py", line 236, in serve_forever
poll_interval)
File "C:\Python27\lib\SocketServer.py", line 155, in _eintr_retry
return func(*args)
KeyboardInterrupt
>>> import json
>>> def a():
return xmlrpclib.Binary(json.dumps({'a':1}))
>>> server.register_function(a, 'a')
>>> server.serve_forever()
127.0.0.1 - - [22/Apr/2014 22:37:27] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [22/Apr/2014 22:37:39] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [22/Apr/2014 22:38:04] "POST / HTTP/1.1" 200 -
Python 2.7.5 (default, May 15 2013, 22:43:36) [MSC v.1500 32 bit (Intel)] on win32
Type "copyright", "credits" or "license()" for more information.
>>> import xmlrpclib
>>> proxy = xmlrpclib.ServerProxy("http://localhost:8000/")
>>> proxy.a()
Traceback (most recent call last):
File "", line 1, in
proxy.a()
File "C:\Python27\lib\xmlrpclib.py", line 1224, in __call__
return self.__send(self.__name, args)
File "C:\Python27\lib\xmlrpclib.py", line 1578, in __request
verbose=self.__verbose
File "C:\Python27\lib\xmlrpclib.py", line 1264, in request
return self.single_request(host, handler, request_body, verbose)
File "C:\Python27\lib\xmlrpclib.py", line 1297, in single_request
return self.parse_response(response)
File "C:\Python27\lib\xmlrpclib.py", line 1473, in parse_response
return u.close()
File "C:\Python27\lib\xmlrpclib.py", line 793, in close
raise Fault(**self._stack[0])
Fault: :must be string or buffer, not dict">
>>> proxy.a()
>>> proxy.a().data
'{"a": 1}'
>>> import json
>>> json.loads(proxy.a().data)
{u'a': 1}
>>>
# This and most other samples read in the Google copyright data infile = ‘copyright.xml’
results = etree.parse(infile, parser)
# When iterated over, ‘results’ will contain the output from # target parser’s close() method
out = open(‘titles.txt’, ‘w’) out.write(‘\n’.join(results)) out.close() 在运行版权数据时,代码运行时间为 54 秒。目标解析可以实现合理的速度并且不会生成消耗内存的解析树,但是在数据中为所有元素触发事件。对于特别大型的文档,如果只对其中一些元素感兴趣,那么这种方法并不理想,就像在这个例子中一样。能否将处理限制到选择的标记并获得较好的性能呢?
In [7]: f = StringIO.StringIO(r””” …: ① …: Mark …: http://diveintomark.org/ …: …:Dive into history, 2009 edition ② …: …: tag:diveintomark.org,2009-03-27:/archives/20090327172042 ④ …: 2009-03-27T21:56:07Z ⑤ …: 2009-03-27T17:20:42Z …: ⑥ …: …: …:
Putting an entire chapter on one page sounds ⑦ …: bloated, but consider this — my longest chapter so far …: would be 75 printed pages, and it loads in under 5 seconds… …: On dialup.
…: ⑧ …: “””)
http://lxml.de/parsing.html#parsers
The target parser interface
As in ElementTree, and similar to a SAX event handler, you can pass a target object to the parser: >>> class EchoTarget(object): … def start(self, tag, attrib): … print(“start %s %r” % (tag, dict(attrib))) … def end(self, tag): … print(“end %s” % tag) … def data(self, data): … print(“data %r” % data) … def comment(self, text): … print(“comment %s” % text) … def close(self): … print(“close”) … return “closed!”
>>> result = etree.XML(“sometext“, … parser) start element {} data u’some’ comment comment data u’text’ end element close
>>> print(result) closed! It is important for the .close() method to reset the parser target to a usable state, so that you can reuse the parser as often as you like: >>> result = etree.XML(“sometext“, … parser) start element {} data u’some’ comment comment data u’text’ end element close
>>> print(result) closed! Starting with lxml 2.3, the .close() method will also be called in the error case. This diverges(分歧) from the behaviour of ElementTree, but allows target objects to clean up their state in all situations, so that the parser can reuse them afterwards. >>> class CollectorTarget(object): … def __init__(self): … self.events = [] … def start(self, tag, attrib): … self.events.append(“start %s %r” % (tag, dict(attrib))) … def end(self, tag): … self.events.append(“end %s” % tag) … def data(self, data): … self.events.append(“data %r” % data) … def comment(self, text): … self.events.append(“comment %s” % text) … def close(self): … self.events.append(“close”) … return “closed!”
>>> result = etree.XML(“some“, … parser) # doctest: +ELLIPSIS Traceback (most recent call last): … lxml.etree.XMLSyntaxError: Opening and ending tag mismatch…
>>> for event in parser.target.events: … print(event) start element {} data u’some’ close Note that the parser does not build a tree when using a parser target. The result of the parser run is whatever the target object returns from its .close() method. If you want to return an XML tree here, you have to create it programmatically in the target object. An example for a parser target that builds a tree is the TreeBuilder: >>> parser = etree.XMLParser(target = etree.TreeBuilder())
>>> result = etree.XML(“sometext“, … parser)
>>> print(result.tag) element >>> print(result[0].text) comment
配置nginx 增加一段记录 location /site1 { uwsgi_pass 127.0.0.1:9000; include uwsgi_params; }
编译安装 uWSGI
sudo aptitude install python-dev libxml2-dev wget http://projects.unbit.it/downloads/uwsgi-0.9.6.8.tar.gz tar zxfv uwsgi-0.9.6.8.tar.gz cd uwsgi-0.9.6.8/ make
配置 django 使用 uWSGI gamexg@vps1:~/web$ mkdir site gamexg@vps1:~/web$ cd site gamexg@vps1:~/web/site$ mkdir site1 gamexg@vps1:~/web/site$ cd site1/ gamexg@vps1:~/web/site/site1$ mkdir www gamexg@vps1:~/web/site/site1$ mkdir www/static gamexg@vps1:~/web/site/site1$ django-admin startproject site1 gamexg@vps1:~/web/site/site1$ cd site1/ gamexg@vps1:~/web/site/site1/site1$ ./manage.py startapp app1 gamexg@vps1:~/web/site/site1/site1$ vi myapp.py gamexg@vps1:~/web/site/site1/site1$ cat myapp.py import os os.environ[‘DJANGO_SETTINGS_MODULE’] = ‘test1.settings’ import django.core.handlers.wsgi application = django.core.handlers.wsgi.WSGIHandler()
vi uwsgi.xml 127.0.0.1:9000 true/home/gamexg/web/var/uwsgi.pid3/home/gamexg/web/site/site1/site1/home/gamexg/web/site/site1myapp