From a898a65b09e6fd3a4f292ee920ee9595919e7d81 Mon Sep 17 00:00:00 2001 From: Brian Warner Date: Sat, 4 Nov 2017 12:54:49 -0700 Subject: [PATCH] start on usage-db --- src/wormhole_transit_relay/database.py | 126 ++++++++++++++++++ src/wormhole_transit_relay/db-schemas/v1.sql | 30 +++++ src/wormhole_transit_relay/server_tap.py | 85 ++++++++---- src/wormhole_transit_relay/transit_server.py | 129 +++++++++---------- 4 files changed, 275 insertions(+), 95 deletions(-) create mode 100644 src/wormhole_transit_relay/database.py create mode 100644 src/wormhole_transit_relay/db-schemas/v1.sql diff --git a/src/wormhole_transit_relay/database.py b/src/wormhole_transit_relay/database.py new file mode 100644 index 0000000..65a2e6c --- /dev/null +++ b/src/wormhole_transit_relay/database.py @@ -0,0 +1,126 @@ +from __future__ import unicode_literals +import os +import sqlite3 +import tempfile +from pkg_resources import resource_string +from twisted.python import log + +class DBError(Exception): + pass + +def get_schema(version): + schema_bytes = resource_string("wormhole.server", + "db-schemas/v%d.sql" % version) + return schema_bytes.decode("utf-8") + +def get_upgrader(new_version): + schema_bytes = resource_string("wormhole.server", + "db-schemas/upgrade-to-v%d.sql" % new_version) + return schema_bytes.decode("utf-8") + +TARGET_VERSION = 1 + +def dict_factory(cursor, row): + d = {} + for idx, col in enumerate(cursor.description): + d[col[0]] = row[idx] + return d + +def _initialize_db_schema(db, target_version): + """Creates the application schema in the given database. + """ + log.msg("populating new database with schema v%s" % target_version) + schema = get_schema(target_version) + db.executescript(schema) + db.execute("INSERT INTO version (version) VALUES (?)", + (target_version,)) + db.commit() + +def _initialize_db_connection(db): + """Sets up the db connection object with a row factory and with necessary + foreign key settings. + """ + db.row_factory = dict_factory + db.execute("PRAGMA foreign_keys = ON") + problems = db.execute("PRAGMA foreign_key_check").fetchall() + if problems: + raise DBError("failed foreign key check: %s" % (problems,)) + +def _open_db_connection(dbfile): + """Open a new connection to the SQLite3 database at the given path. + """ + try: + db = sqlite3.connect(dbfile) + except (EnvironmentError, sqlite3.OperationalError) as e: + raise DBError("Unable to create/open db file %s: %s" % (dbfile, e)) + _initialize_db_connection(db) + return db + +def _get_temporary_dbfile(dbfile): + """Get a temporary filename near the given path. + """ + fd, name = tempfile.mkstemp( + prefix=os.path.basename(dbfile) + ".", + dir=os.path.dirname(dbfile) + ) + os.close(fd) + return name + +def _atomic_create_and_initialize_db(dbfile, target_version): + """Create and return a new database, initialized with the application + schema. + + If anything goes wrong, nothing is left at the ``dbfile`` path. + """ + temp_dbfile = _get_temporary_dbfile(dbfile) + db = _open_db_connection(temp_dbfile) + _initialize_db_schema(db, target_version) + db.close() + os.rename(temp_dbfile, dbfile) + return _open_db_connection(dbfile) + +def get_db(dbfile, target_version=TARGET_VERSION): + """Open or create the given db file. The parent directory must exist. + Returns the db connection object, or raises DBError. + """ + if dbfile == ":memory:": + db = _open_db_connection(dbfile) + _initialize_db_schema(db, target_version) + elif os.path.exists(dbfile): + db = _open_db_connection(dbfile) + else: + db = _atomic_create_and_initialize_db(dbfile, target_version) + + try: + version = db.execute("SELECT version FROM version").fetchone()["version"] + except sqlite3.DatabaseError as e: + # this indicates that the file is not a compatible database format. + # Perhaps it was created with an old version, or it might be junk. + raise DBError("db file is unusable: %s" % e) + + while version < target_version: + log.msg(" need to upgrade from %s to %s" % (version, target_version)) + try: + upgrader = get_upgrader(version+1) + except ValueError: # ResourceError?? + log.msg(" unable to upgrade %s to %s" % (version, version+1)) + raise DBError("Unable to upgrade %s to version %s, left at %s" + % (dbfile, version+1, version)) + log.msg(" executing upgrader v%s->v%s" % (version, version+1)) + db.executescript(upgrader) + db.commit() + version = version+1 + + if version != target_version: + raise DBError("Unable to handle db version %s" % version) + + return db + +def dump_db(db): + # to let _iterdump work, we need to restore the original row factory + orig = db.row_factory + try: + db.row_factory = sqlite3.Row + return "".join(db.iterdump()) + finally: + db.row_factory = orig diff --git a/src/wormhole_transit_relay/db-schemas/v1.sql b/src/wormhole_transit_relay/db-schemas/v1.sql new file mode 100644 index 0000000..9abb449 --- /dev/null +++ b/src/wormhole_transit_relay/db-schemas/v1.sql @@ -0,0 +1,30 @@ + +CREATE TABLE `version` -- contains one row +( + `version` INTEGER -- set to 1 +); + + +CREATE TABLE `current` -- contains one row +( + `reboot` INTEGER, -- seconds since epoch of most recent reboot + `last_update` INTEGER, -- when `current` was last updated + `connected` INTEGER, -- number of current paired connections + `waiting` INTEGER, -- number of not-yet-paired connections + `incomplete_bytes` INTEGER -- bytes sent through not-yet-complete connections +); + +CREATE TABLE `usage` +( + `started` INTEGER, -- seconds since epoch, rounded to "blur time" + `total_time` INTEGER, -- seconds from open to last close + `waiting_time` INTEGER, -- seconds from start to 2nd side appearing, or None + `total_bytes` INTEGER, -- total bytes relayed (both directions) + `result` VARCHAR -- happy, scary, lonely, errory, pruney + -- transit moods: + -- "errory": one side gave the wrong handshake + -- "lonely": good handshake, but the other side never showed up + -- "happy": both sides gave correct handshake +); +CREATE INDEX `transit_usage_idx` ON `transit_usage` (`started`); +CREATE INDEX `transit_usage_result_idx` ON `transit_usage` (`result`); diff --git a/src/wormhole_transit_relay/server_tap.py b/src/wormhole_transit_relay/server_tap.py index 1444a85..babc441 100644 --- a/src/wormhole_transit_relay/server_tap.py +++ b/src/wormhole_transit_relay/server_tap.py @@ -1,7 +1,8 @@ from . import transit_server from twisted.internet import reactor from twisted.python import usage -from twisted.application.internet import StreamServerEndpointService +from twisted.application.internet import (TimerService, + StreamServerEndpointService) from twisted.internet import endpoints LONGDESC = """\ @@ -9,10 +10,10 @@ This plugin sets up a 'Transit Relay' server for magic-wormhole. This service listens for TCP connections, finds pairs which present the same handshake, and glues the two TCP sockets together. -If --usage-logfile= is provided, a line will be written to the given file after -each connection is done. This line will be a complete JSON object (starting -with "{", ending with "}\n", and containing no internal newlines). The keys -will be: +If --log-stdout is provided, a line will be written to stdout after each +connection is done. This line will be a complete JSON object (starting with +"{", ending with "}\n", and containing no internal newlines). The keys will +be: * 'started': number, seconds since epoch * 'total_time': number, seconds from open to last close @@ -27,35 +28,62 @@ second matching side never appeared (and thus 'waiting_time' will be null). If --blur-usage= is provided, then 'started' will be rounded to the given time interval, and 'total_bytes' will be rounded as well. -If --stats-file is provided, the server will periodically write a simple JSON -dictionary to that file (atomically), with cumulative usage data (since last -reboot, and all-time). This information is *not* blurred (the assumption is -that it will be overwritten on a regular basis, and is aggregated anyways). The -keys are: +If --usage-db= is provided, the server will maintain a SQLite database in the +given file. Current, recent, and historical usage data will be written to the +database, and external tools can query the DB for metrics: the munin plugins +in misc/ may be useful. Timestamps and sizes in this file will respect +--blur-usage. The four tables are: -* active.connected: number of paired connections -* active.waiting: number of not-yet-paired connections -* since_reboot.bytes: sum of 'total_bytes' -* since_reboot.total: number of completed connections -* since_reboot.moods: dict mapping mood string to number of connections -* all_time.bytes: same -* all_time.total -* all_time.moods +"current" contains a single row, with these columns: -The server will write twistd.pid and twistd.log files as usual, if daemonized -by twistd. twistd.log will only contain startup, shutdown, and exception -messages. To record information about each connection, use --usage-logfile. +* connected: number of paired connections +* waiting: number of not-yet-paired connections +* partal_bytes: bytes transmitted over not-yet-complete connections + +"since_reboot" contains a single row, with these columns: + +* bytes: sum of 'total_bytes' +* connections: number of completed connections +* mood_happy: count of connections that finished "happy": both sides gave correct handshake +* mood_lonely: one side gave good handshake, other side never showed up +* mood_errory: one side gave a bad handshake + +"all_time" contains a single row, with these columns: + +* bytes: +* connections: +* mood_happy: +* mood_lonely: +* mood_errory: + +"usage" contains one row per closed connection, with these columns: + +* started: seconds since epoch, rounded to "blur time" +* total_time: seconds from first open to last close +* waiting_time: seconds from first open to second open, or None +* bytes: total bytes relayed (in both directions) +* result: (string) the mood: happy, lonely, errory + +All tables will be updated after each connection is finished. In addition, +the "current" table will be updated at least once every 5 minutes. + +If daemonized by twistd, the server will write twistd.pid and twistd.log +files as usual. By default twistd.log will only contain startup, shutdown, +and exception messages. Adding --log-stdout will add per-connection JSON +lines to twistd.log. """ class Options(usage.Options): - #synopsis = "[--port=] [--usage-logfile=] [--blur-usage=] [--stats-json=]" + #synopsis = "[--port=] [--log-stdout] [--blur-usage=] [--usage-db=]" longdesc = LONGDESC + optFlags = { + ("log-stdout", None, "write JSON usage logs to stdout"), + } optParameters = [ ("port", "p", "tcp:4001", "endpoint to listen on"), ("blur-usage", None, None, "blur timestamps and data sizes in logs"), - ("usage-logfile", None, None, "record usage data (JSON lines)"), - ("stats-file", None, None, "record usage in JSON format"), + ("usage-db", None, None, "record usage data (SQLite)"), ] def opt_blur_usage(self, arg): @@ -65,6 +93,9 @@ class Options(usage.Options): def makeService(config, reactor=reactor): ep = endpoints.serverFromString(reactor, config["port"]) # to listen f = transit_server.Transit(blur_usage=config["blur-usage"], - usage_logfile=config["usage-logfile"], - stats_file=config["stats-file"]) - return StreamServerEndpointService(ep, f) + log_stdout=config["log-stdout"], + usage_db=config["usage-db"]) + parent = service.MultiService() + StreamServerEndpointService(ep, f).setServiceParent(parent) + TimerService(5.0, f.timerUpdateStats).setServiceParent(parent) + return parent diff --git a/src/wormhole_transit_relay/transit_server.py b/src/wormhole_transit_relay/transit_server.py index 42f19ca..e915944 100644 --- a/src/wormhole_transit_relay/transit_server.py +++ b/src/wormhole_transit_relay/transit_server.py @@ -2,6 +2,7 @@ from __future__ import print_function, unicode_literals import os, re, time, json from twisted.python import log from twisted.internet import protocol +from .database import get_db SECONDS = 1.0 MINUTE = 60*SECONDS @@ -220,15 +221,16 @@ class Transit(protocol.ServerFactory): MAXTIME = 60*SECONDS protocol = TransitConnection - def __init__(self, blur_usage, usage_logfile, stats_file): + def __init__(self, blur_usage, log_stdout, usage_db): self._blur_usage = blur_usage - self._log_requests = blur_usage is None - self._usage_logfile = open(usage_logfile, "a") if usage_logfile else None - self._stats_file = stats_file + self._debug_log = False + self._log_stdout = log_stdout + self._db = None + if usage_db: + self._db = get_db(usage_db) + # we don't track TransitConnections until they submit a token self._pending_requests = {} # token -> set((side, TransitConnection)) self._active_connections = set() # TransitConnection - self._counts = {"lonely": 0, "happy": 0, "errory": 0} - self._count_bytes = 0 def connection_got_token(self, token, new_side, new_tc): if token not in self._pending_requests: @@ -240,7 +242,7 @@ class Transit(protocol.ServerFactory): or (new_side is None) or (old_side != new_side)): # we found a match - if self._log_requests: + if self._debug_log: log.msg("transit relay 2: %s" % new_tc.describeToken()) # drop and stop tracking the rest @@ -255,33 +257,11 @@ class Transit(protocol.ServerFactory): new_tc.buddy_connected(old_tc) old_tc.buddy_connected(new_tc) return - if self._log_requests: + if self._debug_log: log.msg("transit relay 1: %s" % new_tc.describeToken()) potentials.add((new_side, new_tc)) # TODO: timer - def recordUsage(self, started, result, total_bytes, - total_time, waiting_time): - self._counts[result] += 1 - self._count_bytes += total_bytes - if self._log_requests: - log.msg(format="Transit.recordUsage {bytes}B", bytes=total_bytes) - if self._blur_usage: - started = self._blur_usage * (started // self._blur_usage) - total_bytes = blur_size(total_bytes) - if self._usage_logfile: - data = {"started": started, - "total_time": total_time, - "waiting_time": waiting_time, - "total_bytes": total_bytes, - "mood": result, - } - self._usage_logfile.write(json.dumps(data)) - self._usage_logfile.write("\n") - self._usage_logfile.flush() - if self._stats_file: - self._update_stats(total_bytes, result) - def transitFinished(self, tc, token, side, description): if token in self._pending_requests: side_tc = (side, tc) @@ -289,50 +269,63 @@ class Transit(protocol.ServerFactory): self._pending_requests[token].remove(side_tc) if not self._pending_requests[token]: # set is now empty del self._pending_requests[token] - if self._log_requests: + if self._debug_log: log.msg("transitFinished %s" % (description,)) self._active_connections.discard(tc) def transitFailed(self, p): - if self._log_requests: + if self._debug_log: log.msg("transitFailed %r" % p) pass - def _update_stats(self, total_bytes, mood): - try: - with open(self._stats_file, "r") as f: - stats = json.load(f) - except (EnvironmentError, ValueError): - stats = {} + def recordUsage(self, started, result, total_bytes, + total_time, waiting_time): + if self._debug_log: + log.msg(format="Transit.recordUsage {bytes}B", bytes=total_bytes) + if self._blur_usage: + started = self._blur_usage * (started // self._blur_usage) + total_bytes = blur_size(total_bytes) + if self._log_stdout: + data = {"started": started, + "total_time": total_time, + "waiting_time": waiting_time, + "total_bytes": total_bytes, + "mood": result, + } + sys.stdout.write(json.dumps(data)) + sys.stdout.write("\n") + sys.stdout.flush() + if self._db: + self._db.execute("INSERT INTO `usage`" + " (`started`, `total_time`, `waiting_time`," + " `total_bytes`, `result`)" + " VALUES (?,?,?, ?,?)", + (started, total_time, waiting_time, + total_bytes, result)) + self._update_stats() + self._db.commit() - # current status: expected to be zero most of the time - stats["active"] = {"connected": len(self._active_connections) / 2, - "waiting": len(self._pending_requests), - } + def timerUpdateStats(self): + self._update_stats() + self._db.commit() - # usage since last reboot - rb = stats["since_reboot"] = {} - rb["bytes"] = self._count_bytes - rb["total"] = sum(self._counts.values(), 0) - rbm = rb["moods"] = {} - for result, count in self._counts.items(): - rbm[result] = count - - # historical usage (all-time) - if "all_time" not in stats: - stats["all_time"] = {} - u = stats["all_time"] - u["total"] = u.get("total", 0) + 1 - u["bytes"] = u.get("bytes", 0) + total_bytes - if "moods" not in u: - u["moods"] = {} - um = u["moods"] - for m in "happy", "lonely", "errory": - if m not in um: - um[m] = 0 - um[mood] += 1 - tmpfile = self._stats_file + ".tmp" - with open(tmpfile, "w") as f: - f.write(json.dumps(stats)) - f.write("\n") - os.rename(tmpfile, self._stats_file) + def _update_stats(self): + # current status: should be zero when idle + reboot = self._reboot + last_update = time.time() + connected = len(self._active_connections) / 2 + # TODO: when a connection is half-closed, len(active) will be odd. a + # moment later (hopefully) the other side will disconnect, but + # _update_stats isn't updated until later. + waiting = len(self._pending_tokens) + # "waiting" doesn't count multiple parallel connections from the same + # side + incomplete_bytes = sum(tc._total_sent + for tc in self._active_connections) + self._db.execute("DELETE FROM `current`") + self._db.execute("INSERT INTO `current`" + " (`reboot`, `last_update`, `connected`, `waiting`," + " `incomplete_bytes`)" + " VALUES (?, ?, ?, ?, ?)", + (reboot, last_update, connected, waiting, + incomplete_bytes))