From 2d7f701849af7e9364098f3854437348f3967a5e Mon Sep 17 00:00:00 2001 From: Brian Warner Date: Mon, 28 Sep 2015 00:36:25 -0700 Subject: [PATCH] eventsource_twisted: return unicode, not bytes This roughly parallels the way that blocking/eventsource.py and the pypi "requests" modules work: the server can set the encoding (with "Content-Type: text/event-stream; charset=utf-8"), and the EventSource parser will decode accordingly. However eventsource_twisted.py *always* returns unicode (on both py2/py3), even when the server hasn't set an encoding. blocking/eventsource.py returns bytes (on py3, and str on py2) when the server doesn't set an encoding. In the future, eventsource_twisted.py should return bytes when the server doesn't set an encoding. eventsource_twisted.py includes an alternate approach that might be necessary (a to_unicode() function instead of always using .decode), but I won't be sure until enough of Twisted has been ported to allow the EventSourceParser to be tested. Also fix demo.py for python3. --- src/wormhole/twisted/demo.py | 11 ++++++----- src/wormhole/twisted/eventsource_twisted.py | 19 ++++++++++++++++++- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/wormhole/twisted/demo.py b/src/wormhole/twisted/demo.py index a9313d6..3f8c1b3 100644 --- a/src/wormhole/twisted/demo.py +++ b/src/wormhole/twisted/demo.py @@ -1,3 +1,4 @@ +from __future__ import print_function import sys, json from twisted.internet import reactor from .transcribe import Wormhole @@ -12,15 +13,15 @@ if sys.argv[1] == "send-text": data = json.dumps({"message": message}).encode("utf-8") d = w.get_code() def _got_code(code): - print "code is:", code + print("code is:", code) return w.get_data(data) d.addCallback(_got_code) def _got_data(them_bytes): them_d = json.loads(them_bytes.decode("utf-8")) if them_d["message"] == "ok": - print "text sent" + print("text sent") else: - print "error sending text: %r" % (them_d,) + print("error sending text: %r" % (them_d,)) d.addCallback(_got_data) elif sys.argv[1] == "receive-text": code = sys.argv[2] @@ -30,9 +31,9 @@ elif sys.argv[1] == "receive-text": def _got_data(them_bytes): them_d = json.loads(them_bytes.decode("utf-8")) if "error" in them_d: - print >>sys.stderr, "ERROR: " + them_d["error"] + print("ERROR: " + them_d["error"], file=sys.stderr) return 1 - print them_d["message"] + print(them_d["message"]) d.addCallback(_got_data) else: raise ValueError("bad command") diff --git a/src/wormhole/twisted/eventsource_twisted.py b/src/wormhole/twisted/eventsource_twisted.py index 521ac76..939b0ba 100644 --- a/src/wormhole/twisted/eventsource_twisted.py +++ b/src/wormhole/twisted/eventsource_twisted.py @@ -1,11 +1,18 @@ +#import sys from twisted.python import log, failure from twisted.internet import reactor, defer, protocol from twisted.application import service from twisted.protocols import basic from twisted.web.client import Agent, ResponseDone from twisted.web.http_headers import Headers +from cgi import parse_header from ..util.eventual import eventually +#if sys.version_info[0] == 2: +# to_unicode = unicode +#else: +# to_unicode = str + class EventSourceParser(basic.LineOnlyReceiver): delimiter = "\n" @@ -15,6 +22,10 @@ class EventSourceParser(basic.LineOnlyReceiver): self.handler = handler self.done_deferred = defer.Deferred() self.eventtype = "message" + self.encoding = "utf-8" + + def set_encoding(self, encoding): + self.encoding = encoding def connectionLost(self, why): if why.check(ResponseDone): @@ -40,6 +51,8 @@ class EventSourceParser(basic.LineOnlyReceiver): self.current_field = None self.current_lines[:] = [] return + line = line.decode(self.encoding) + #line = to_unicode(line, self.encoding) if self.current_field is None: self.current_field, data = line.split(": ", 1) self.current_lines.append(data) @@ -90,7 +103,11 @@ class EventSource: # TODO: service.Service raise EventSourceError("%d: %s" % (resp.code, resp.phrase)) if self.when_connected: self.when_connected() - #if resp.headers.getRawHeaders("content-type") == ["text/event-stream"]: + default_ct = "text/event-stream; charset=utf-8" + ct_headers = resp.headers.getRawHeaders("content-type", [default_ct]) + ct, ct_params = parse_header(ct_headers[0]) + assert ct == "text/event-stream", ct + self.proto.set_encoding(ct_params.get("charset", "utf-8")) resp.deliverBody(self.proto) if self.cancelled: self.kill_connection()