magic-wormhole-transit-relay/src/wormhole_transit_relay/transit_server.py

from __future__ import print_function, unicode_literals
import re, time, json
from collections import defaultdict
from twisted.python import log
from twisted.internet import protocol
from twisted.protocols.basic import LineReceiver
from .database import get_db

SECONDS = 1.0
MINUTE = 60*SECONDS
HOUR = 60*MINUTE
DAY = 24*HOUR
MB = 1000*1000

def round_to(size, coarseness):
    return int(coarseness*(1+int((size-1)/coarseness)))

def blur_size(size):
    if size == 0:
        return 0
    if size < 1e6:
        return round_to(size, 10e3)
    if size < 1e9:
        return round_to(size, 1e6)
    return round_to(size, 100e6)

class TransitConnection(LineReceiver):
    delimiter = b'\n'
    # maximum length of a line we will accept before the handshake is complete.
    # This must be >= to the longest possible handshake message.

    MAX_LENGTH = 1024

    def __init__(self):
        self._got_token = False
        self._got_side = False
        self._sent_ok = False
        self._mood = None
        self._buddy = None
        self._total_sent = 0

    def describeToken(self):
        d = "-"
        if self._got_token:
            d = self._got_token[:16].decode("ascii")
        if self._got_side:
            d += "-" + self._got_side.decode("ascii")
        else:
            d += "-<unsided>"
        return d

    def connectionMade(self):
        self._started = time.time()
        self._log_requests = self.factory._log_requests
        self.transport.setTcpKeepAlive(True)

    def lineReceived(self, line):
        # old: "please relay {64}\n"
        old = re.fullmatch(br"please relay (\w{64})", line)
        if old:
            token = old.group(1)
            return self._got_handshake(token, None)

        # new: "please relay {64} for side {16}\n"
        new = re.fullmatch(br"please relay (\w{64}) for side (\w{16})", line)
        if new:
            token = new.group(1)
            side = new.group(2)
            return self._got_handshake(token, side)

        self.sendLine(b"bad handshake")
        if self._log_requests:
            log.msg("transit handshake failure")
        return self.disconnect_error()

    def rawDataReceived(self, data):
        if self._sent_ok:
            self._total_sent += len(data)
            self._buddy.transport.write(data)
            return

        self.sendLine(b"impatient")
        if self._log_requests:
            log.msg("transit impatience failure")

    def _got_handshake(self, token, side):
        self._got_token = token
        self._got_side = side
        self._mood = "lonely" # until buddy connects
        self.setRawMode()
        self.factory.connection_got_token(token, side, self)

    def buddy_connected(self, them):
        self._buddy = them
        self._mood = "happy"
        self.sendLine(b"ok")
        self._sent_ok = True
        # Connect the two as a producer/consumer pair. We use streaming=True,
        # so this expects the IPushProducer interface, and uses
        # pauseProducing() to throttle, and resumeProducing() to unthrottle.
        self._buddy.transport.registerProducer(self.transport, True)
        # The Transit object calls buddy_connected() on both protocols, so
        # there will be two producer/consumer pairs.

    def buddy_disconnected(self):
        if self._log_requests:
            log.msg("buddy_disconnected %s" % self.describeToken())
        self._buddy = None
        self._mood = "jilted"
        self.transport.loseConnection()

    def disconnect_error(self):
        # we haven't finished the handshake, so there are no tokens tracking
        # us
        self._mood = "errory"
        self.transport.loseConnection()
        if self.factory._debug_log:
            log.msg("transitFailed %r" % self)

    def disconnect_redundant(self):
        # this is called if a buddy connected and we were found unnecessary.
        # Any token-tracking cleanup will have been done before we're called.
        self._mood = "redundant"
        self.transport.loseConnection()

    def connectionLost(self, reason):
        finished = time.time()
        total_time = finished - self._started

        # Record usage. There are seven cases:
        # * n1: the handshake failed, not a real client (errory)
        # * n2: real client disconnected before any buddy appeared (lonely)
        # * n3: real client closed as redundant after buddy appears (redundant)
        # * n4: real client connected first, buddy closes first (jilted)
        # * n5: real client connected first, buddy close last (happy)
        # * n6: real client connected last, buddy closes first (jilted)
        # * n7: real client connected last, buddy closes last (happy)

        # * non-connected clients (1,2,3) always write a usage record
        # * for connected clients, whoever disconnects first gets to write the
        #   usage record (5, 7). The last disconnect doesn't write a record.

        if self._mood == "errory": # 1
            assert not self._buddy
            self.factory.recordUsage(self._started, "errory", 0,
                                     total_time, None)
        elif self._mood == "redundant": # 3
            assert not self._buddy
            self.factory.recordUsage(self._started, "redundant", 0,
                                     total_time, None)
        elif self._mood == "jilted": # 4 or 6
            # we were connected, but our buddy hung up on us. They record the
            # usage event, we do not
            pass
        elif self._mood == "lonely": # 2
            assert not self._buddy
            self.factory.recordUsage(self._started, "lonely", 0,
                                     total_time, None)
        else: # 5 or 7
            # we were connected, we hung up first. We record the event.
            assert self._mood == "happy", self._mood # TODO: mood==None
            assert self._buddy
            starts = [self._started, self._buddy._started]
            total_time = finished - min(starts)
            waiting_time = max(starts) - min(starts)
            total_bytes = self._total_sent + self._buddy._total_sent
            self.factory.recordUsage(self._started, "happy", total_bytes,
                                     total_time, waiting_time)

        if self._buddy:
            self._buddy.buddy_disconnected()
        self.factory.transitFinished(self, self._got_token, self._got_side,
                                     self.describeToken())

class Transit(protocol.ServerFactory):
    # I manage pairs of simultaneous connections to a secondary TCP port,
    # both forwarded to the other. Clients must begin each connection with
    # "please relay TOKEN for SIDE\n" (or a legacy form without the "for
    # SIDE"). Two connections match if they use the same TOKEN and have
    # different SIDEs (the redundant connections are dropped when a match is
    # made). Legacy connections match any with the same TOKEN, ignoring SIDE
    # (so two legacy connections will match each other).

    # I will send "ok\n" when the matching connection is established, or
    # disconnect if no matching connection is made within MAX_WAIT_TIME
    # seconds. I will disconnect if you send data before the "ok\n". All data
    # you get after the "ok\n" will be from the other side. You will not
    # receive "ok\n" until the other side has also connected and submitted a
    # matching token (and differing SIDE).

    # In addition, the connections will be dropped after MAXLENGTH bytes have
    # been sent by either side, or MAXTIME seconds have elapsed after the
    # matching connections were established. A future API will reveal these
    # limits to clients instead of causing mysterious spontaneous failures.

    # These relay connections are not half-closeable (unlike full TCP
    # connections, applications will not receive any data after half-closing
    # their outgoing side). Applications must negotiate shutdown with their
    # peer and not close the connection until all data has finished
    # transferring in both directions. Applications which only need to send
    # data in one direction can use close() as usual.

    MAX_WAIT_TIME = 30*SECONDS
    MAXLENGTH = 10*MB
    MAXTIME = 60*SECONDS
    protocol = TransitConnection

    def __init__(self, blur_usage, log_file, usage_db):
        self._blur_usage = blur_usage
        self._log_requests = blur_usage is None
        if self._blur_usage:
            log.msg("blurring access times to %d seconds" % self._blur_usage)
            log.msg("not logging Transit connections to Twisted log")
        else:
            log.msg("not blurring access times")
        self._debug_log = False
        self._log_file = log_file
        self._db = None
        if usage_db:
            self._db = get_db(usage_db)
        self._rebooted = time.time()
        # we don't track TransitConnections until they submit a token
        self._pending_requests = defaultdict(set) # token -> set((side, TransitConnection))
        self._active_connections = set() # TransitConnection

    def connection_got_token(self, token, new_side, new_tc):
        potentials = self._pending_requests[token]
        for old in potentials:
            (old_side, old_tc) = old
            if ((old_side is None)
                or (new_side is None)
                or (old_side != new_side)):
                # we found a match
                if self._debug_log:
                    log.msg("transit relay 2: %s" % new_tc.describeToken())

                # drop and stop tracking the rest
                potentials.remove(old)
                for (_, leftover_tc) in potentials:
                    # Don't record this as errory. It's just a spare connection
                    # from the same side as a connection that got used. This
                    # can happen if the connection hint contains multiple
                    # addresses (we don't currently support those, but it'd
                    # probably be useful in the future).
                    leftover_tc.disconnect_redundant()
                self._pending_requests.pop(token)

                # glue the two ends together
                self._active_connections.add(new_tc)
                self._active_connections.add(old_tc)
                new_tc.buddy_connected(old_tc)
                old_tc.buddy_connected(new_tc)
                return
        if self._debug_log:
            log.msg("transit relay 1: %s" % new_tc.describeToken())
        potentials.add((new_side, new_tc))
        # TODO: timer

    def transitFinished(self, tc, token, side, description):
        if token in self._pending_requests:
            side_tc = (side, tc)
            self._pending_requests[token].discard(side_tc)
            if not self._pending_requests[token]: # set is now empty
                del self._pending_requests[token]
        if self._debug_log:
            log.msg("transitFinished %s" % (description,))
        self._active_connections.discard(tc)
        # we could update the usage database "current" row immediately, or wait
        # until the 5-minute timer updates it. If we update it now, just after
        # losing a connection, we should probably also update it just after
        # establishing one (at the end of connection_got_token). For now I'm
        # going to omit these, but maybe someday we'll turn them both on. The
        # consequence is that a manual execution of the munin scripts ("munin
        # run wormhole_transit_active") will give the wrong value just after a
        # connect/disconnect event. Actual munin graphs should accurately
        # report connections that last longer than the 5-minute sampling
        # window, which is what we actually care about.
        #self.timerUpdateStats()

    def recordUsage(self, started, result, total_bytes,
                    total_time, waiting_time):
        if self._debug_log:
            log.msg(format="Transit.recordUsage {bytes}B", bytes=total_bytes)
        if self._blur_usage:
            started = self._blur_usage * (started // self._blur_usage)
            total_bytes = blur_size(total_bytes)
        if self._log_file is not None:
            data = {"started": started,
                    "total_time": total_time,
                    "waiting_time": waiting_time,
                    "total_bytes": total_bytes,
                    "mood": result,
                    }
            self._log_file.write(json.dumps(data)+"\n")
            self._log_file.flush()
        if self._db:
            self._db.execute("INSERT INTO `usage`"
                             " (`started`, `total_time`, `waiting_time`,"
                             "  `total_bytes`, `result`)"
                             " VALUES (?,?,?, ?,?)",
                             (started, total_time, waiting_time,
                              total_bytes, result))
            self._update_stats()
            self._db.commit()

    def timerUpdateStats(self):
        if self._db:
            self._update_stats()
            self._db.commit()

    def _update_stats(self):
        # current status: should be zero when idle
        rebooted = self._rebooted
        updated = time.time()
        connected = len(self._active_connections) / 2
        # TODO: when a connection is half-closed, len(active) will be odd. a
        # moment later (hopefully) the other side will disconnect, but
        # _update_stats isn't updated until later.
        waiting = len(self._pending_requests)
        # "waiting" doesn't count multiple parallel connections from the same
        # side
        incomplete_bytes = sum(tc._total_sent
                               for tc in self._active_connections)
        self._db.execute("DELETE FROM `current`")
        self._db.execute("INSERT INTO `current`"
                         " (`rebooted`, `updated`, `connected`, `waiting`,"
                         "  `incomplete_bytes`)"
                         " VALUES (?, ?, ?, ?, ?)",
                         (rebooted, updated, connected, waiting,
                          incomplete_bytes))