First commit: better late than never

2021-05-31 21:59:24 +03:00 · 2021-05-31 21:59:24 +03:00 · a0a8093e12
commit a0a8093e12
6 changed files with 552 additions and 0 deletions
--- a/calibration.py
+++ b/calibration.py
@ -0,0 +1,38 @@
+import numpy as np
+
+
+# This function is a sklearn.calibration.calibration_curve modification
+def calibration_curve(y_true, y_prob, *, n_bins=5, strategy="uniform"):
+    y_true = np.array(y_true)
+    y_prob = np.array(y_prob)
+
+    if strategy == "quantile":  # Determine bin edges by distribution of data
+        quantiles = np.linspace(0, 1, n_bins + 1)
+        bins = np.percentile(y_prob, quantiles * 100)
+        bins[-1] = bins[-1] + 1e-8
+
+    elif strategy == "uniform":
+        bins = np.linspace(0.0, 1.0 + 1e-8, n_bins + 1)
+
+    else:
+        raise ValueError(
+            "Invalid entry to 'strategy' input. Strategy "
+            "must be either 'quantile' or 'uniform'."
+        )
+
+    binids = np.digitize(y_prob, bins) - 1
+
+    bin_sums = np.bincount(binids, weights=y_prob, minlength=len(bins))
+    bin_true = np.bincount(binids, weights=y_true, minlength=len(bins))
+    bin_total = np.bincount(binids, minlength=len(bins))
+
+    nonzero = bin_total != 0
+    prob_true = bin_true[nonzero] / bin_total[nonzero]
+    prob_pred = bin_sums[nonzero] / bin_total[nonzero]
+
+    return prob_true, prob_pred, bin_total[nonzero]
+
+
+def overconfidence(y_true, y_pred):
+    x = y_pred * y_true + (1 - y_pred) * (1 - y_true)
+    return np.mean((x - 1) * (x - 0.5)) / np.mean((x - 0.5) * (x - 0.5))
--- a/firebase_requests.py
+++ b/firebase_requests.py
@ -0,0 +1,43 @@
+import json
+
+from gjo_requests import request_forecasts, request_resolutions
+from google.cloud import firestore
+
+firestore_info = json.loads(st.secrets["firestore_info"])
+credentials = service_account.Credentials.from_service_account_info(firestore_info)
+db = firestore.Client(credentials=credentials, project="gjo-calibration")
+
+
+def get_forecasts(uid, questions, platform_url, headers, cookies):
+    db_forecasts = db.collection("users").document(uid).get().to_dict()
+    db_forecasts = dict() if db_forecasts is None else db_forecasts
+
+    missing_forecasts_qs = list(set(questions) - set(db_forecasts))
+    missing_forecasts = request_forecasts(
+        uid, missing_forecasts_qs, platform_url, headers, cookies
+    )
+
+    if missing_forecasts:
+        if not db_forecasts:
+            db.collection("users").add({}, uid)
+        db.collection("users").document(uid).update(missing_forecasts)
+
+    return {**db_forecasts, **missing_forecasts}
+
+
+def get_resolutions(questions, platform_url, headers, cookies):
+    db_resolutions = db.collection("questions").document("resolutions").get().to_dict()
+    db_resolutions = dict() if db_resolutions is None else db_resolutions
+    relevant_resolutions = {
+        key: value for key, value in db_resolutions.items() if key in set(questions)
+    }
+
+    missing_resolutions_qs = list(set(questions) - set(relevant_resolutions))
+    missing_resolutions = request_resolutions(
+        missing_resolutions_qs, platform_url, headers, cookies
+    )
+
+    if missing_resolutions:
+        db.collection("questions").document("resolutions").update(missing_resolutions)
+
+    return {**relevant_resolutions, **missing_resolutions}
--- a/gjo_requests.py
+++ b/gjo_requests.py
@ -0,0 +1,175 @@
+import asyncio
+import logging
+import re
+from itertools import count
+
+import aiohttp
+import aioitertools
+import requests
+import streamlit as st
+from bs4 import BeautifulSoup
+
+loop = asyncio.new_event_loop()
+asyncio.set_event_loop(loop)
+
+
+@st.cache
+def get_resolved_questions(uid, platform_url, headers, cookies):
+    logging.info(
+        f"[ ] get_resolved_questions for uid={uid}, platform_url={platform_url}"
+    )
+
+    questions = []  # [question_id]
+
+    for page_num in count(1):
+        url = f"{platform_url}/memberships/{uid}/scores/?page={page_num}"
+        page = requests.get(url, headers=headers, cookies=cookies).text
+
+        extracted_qs = re.findall("/questions/(\d+)", page)
+        questions.extend(extracted_qs)
+
+        if not extracted_qs:
+            break
+
+    logging.info(
+        f"[X] get_resolved_questions for uid={uid}, platform_url={platform_url}"
+    )
+
+    return questions
+
+
+async def get_question_resolution(qid, platform_url, session):
+    logging.info(
+        f"[ ] get_question_resolution for uid={uid}, platform_url={platform_url}"
+    )
+
+    url = f"{platform_url}/questions/{qid}"
+
+    async with session.get(url) as resp:
+        if resp.status != 200:
+            logging.error(
+                f"get_question_resolution for uid={uid}, platform_url={platform_url} | "
+                f"resp.status == {resp.status} → {resp.reason}"
+            )
+
+        page = await resp.text()
+
+        soup = BeautifulSoup(page, "html.parser")
+        soup = soup.find_all("div", {"id": "prediction-interface-container"})[0]
+
+        binary = soup.find_all("div", {"class": "binary-probability-value"})
+        if binary:
+            y_true = (0, 1) if re.search("Yes", binary[1].text) is None else (1, 0)
+        else:
+            tables = soup.find_all("table")
+            y_true = tuple(len(tr.findAll("i")) for tr in tables[0].findAll("tr")[1:])
+
+        logging.info(
+            f"[X] get_question_resolution for uid={uid}, platform_url={platform_url}"
+        )
+        return {"y_true": y_true}
+
+
+def _extract_forecasts_from_page(page):
+    soup = BeautifulSoup(page, "html.parser")
+    soup_predictions = soup.find_all("div", {"class": "prediction-values"})
+    predictions = [re.findall("\n\s*(\d+)%", p_tag.text) for p_tag in soup_predictions]
+    predictions = [tuple(int(prob) / 100 for prob in pred) for pred in predictions]
+    predictions = [
+        (pred[0], 1 - pred[0]) if len(pred) == 1 else pred for pred in predictions
+    ]
+
+    # I search for a line containing "made a forecast"
+    # I search for the next line containig <span data-localizable-timestamp="[^"]*">
+    # And graab a timestamp from it
+    timestamps = []
+    looking_for_a_forecast = True
+    for line in page.split("\n"):
+        if looking_for_a_forecast:
+            hit = re.findall("made a forecast", line)
+            if hit:
+                looking_for_a_forecast = False
+
+        else:
+            hit = re.findall('<span data-localizable-timestamp="([^"]+)">', line)
+            if hit:
+                timestamps.extend(hit)
+                looking_for_a_forecast = True
+
+    if len(timestamps) != len(predictions):
+        logging.error(
+            f"In _extract_forecasts_from_page with uid={uid}, qid={qid}, page_num={page_num} "
+            f"got different number of predictions ({len(timestamps)}) and timestamps ({len(predictions)})."
+        )
+
+    return [
+        {"y_pred": pred, "timestamp": timestamp}
+        for pred, timestamp in zip(predictions, timestamps)
+    ]
+
+
+async def get_forecasts_on_the_question(uid, qid, platform_url, session):
+    logging.info(
+        f"[ ] get_forecasts_on_the_question for uid={uid}, qid={qid}, platform_url={platform_url}"
+    )
+
+    forecasts = []  # [{"y_pred": (probs, ...), "timestamp": timestamp}, ...]
+
+    for page_num in count(1):
+        url = f"{platform_url}/questions/{qid}/prediction_sets?membership_id={uid}&page={page_num}"
+
+        async with session.get(url) as resp:
+            if resp.status != 200:
+                logging.error(
+                    f"get_forecasts_on_the_question for uid={uid}, qid={qid}, platform_url={platform_url} | "
+                    f"resp.status == {resp.status} → {resp.reason}"
+                )
+
+            page = await resp.text()
+
+            extracted_forecasts = _extract_forecasts_from_page(page)
+            forecasts.extend(extracted_forecasts)
+
+            if not extracted_forecasts:
+                break
+
+    logging.info(
+        f"[X] get_forecasts_on_the_question for uid={uid}, qid={qid}, platform_url={platform_url}"
+    )
+    return forecasts
+
+
+# ---
+
+
+async def async_get_forecasts(uid, questions, platform_url, headers, cookies):
+    async with aiohttp.ClientSession(headers=headers, cookies=cookies) as session:
+        forecasts_list = await aioitertools.asyncio.gather(
+            *[
+                get_forecasts_on_the_question(uid, q, platform_url, session)
+                for q in questions
+            ],
+            limit=5,
+        )
+        return {q: forecasts_list[i] for i, q in enumerate(questions)}
+
+
+async def async_get_resolutions(questions, platform_url, headers, cookies):
+    async with aiohttp.ClientSession(headers=headers, cookies=cookies) as session:
+        resolutions_list = await aioitertools.asyncio.gather(
+            *[get_question_resolution(q, platform_url, session) for q in questions],
+            limit=5,
+        )
+        return {q: resolutions_list[i] for i, q in enumerate(questions)}
+
+
+def request_forecasts(uid, missing_forecasts_qs, platform_url, headers, cookies):
+    return asyncio.run(
+        async_get_forecasts(uid, missing_forecasts_qs, platform_url, headers, cookies)
+    )
+
+
+def request_resolutions(missing_resolutions_qs, platform_url, headers, cookies):
+    return asyncio.run(
+        async_get_resolutions(missing_resolutions_qs, platform_url, headers, cookies)
+    )
--- a/plotting.py
+++ b/plotting.py
@ -0,0 +1,172 @@
+import numpy as np
+import plotly.graph_objects as go
+from calibration import calibration_curve
+
+
+def plotly_calibration(y_true, y_pred, n_bins, strategy="quantile"):
+    fraction_of_positives, mean_predicted_value, counts = calibration_curve(
+        y_true, y_pred, n_bins=n_bins, strategy=strategy
+    )
+    error_y = np.sqrt((fraction_of_positives) * (1 - fraction_of_positives) / counts)
+
+    fig = go.Figure()
+
+    fig.add_trace(
+        go.Scatter(
+            x=mean_predicted_value,
+            y=fraction_of_positives,
+            customdata=counts,
+            mode="markers",
+            error_y=dict(
+                type="data",
+                array=error_y,
+                thickness=1.5,
+                width=3,
+            ),
+            hovertemplate="<br>".join(
+                [
+                    "x: %{x:.3f}",
+                    "y: %{y:.3f}",
+                    "N: %{customdata}",
+                    "<extra></extra>",
+                ]
+            ),
+            showlegend=False,
+        )
+    )
+
+    fig.add_shape(
+        type="line",
+        x0=0,
+        y0=0,
+        x1=1,
+        y1=1,
+        line=dict(
+            color="LightSeaGreen",
+            width=2,
+            dash="dot",
+        ),
+        opacity=0.5,
+    )
+
+    fig.update_layout(
+        width=800,
+        height=800,
+        title="Calibration plot",
+        xaxis_title="Mean predicted value",
+        yaxis_title="Fraction of positives (± std)",
+    )
+
+    fig.update_xaxes(
+        range=[-0.05, 1.05],
+        constrain="domain",
+    )
+
+    fig.update_yaxes(
+        range=[-0.05, 1.05],
+        constrain="domain",
+        scaleanchor="x",
+        scaleratio=1,
+    )
+
+    return fig
+
+
+def plotly_calibration_odds(y_true, y_pred, n_bins, strategy="quantile"):
+    y_pred = np.clip(y_pred, 0.005, 0.995)  # clipping to avoid undefined odds
+    y_true = np.clip(y_true, 1e-3, 1 - 1e-3)
+    fraction_of_positives, mean_predicted_value, counts = calibration_curve(
+        y_true, y_pred, n_bins=n_bins, strategy=strategy
+    )
+    error_y = np.sqrt((fraction_of_positives) * (1 - fraction_of_positives) / counts)
+
+    fig = go.Figure()
+
+    transform = lambda x: np.log2(1 / (1 - x) - 1)  # 66.6% → 2^{1}:1 → 1
+
+    customdata = np.dstack(
+        [
+            counts,
+            [
+                f"{2**x:.1f} : 1" if x > 0 else f"1 : {2**-x:.1f}"
+                for x in transform(mean_predicted_value)
+            ],
+            [
+                f"{2**x:.1f} : 1" if x > 0 else f"1 : {2**-x:.1f}"
+                for x in transform(fraction_of_positives)
+            ],
+        ]
+    ).squeeze()
+
+    fig.add_trace(
+        go.Scatter(
+            x=transform(mean_predicted_value),
+            y=transform(fraction_of_positives),
+            customdata=customdata,
+            mode="markers",
+            error_y=dict(
+                type="data",
+                symmetric=False,
+                array=transform(fraction_of_positives + error_y)
+                - transform(fraction_of_positives),
+                arrayminus=transform(fraction_of_positives)
+                - transform(fraction_of_positives - error_y),
+                thickness=1.5,
+                width=3,
+            ),
+            hovertemplate="<br>".join(
+                [
+                    "x: %{customdata[1]}",
+                    "y: %{customdata[2]}",
+                    "N: %{customdata[0]}",
+                    "<extra></extra>",
+                ]
+            ),
+            showlegend=False,
+        )
+    )
+
+    fig.add_shape(
+        type="line",
+        x0=-8,
+        y0=-8,
+        x1=8,
+        y1=8,
+        line=dict(
+            color="LightSeaGreen",
+            width=2,
+            dash="dot",
+        ),
+        opacity=0.5,
+    )
+
+    fig.update_layout(
+        width=800,
+        height=800,
+        title="Calibration plot in terms of odds",
+        xaxis_title="Mean predicted value",
+        yaxis_title="Fraction of positives (± std)",
+    )
+
+    fig.update_xaxes(
+        range=[-8, 8],
+        constrain="domain",
+        tickmode="array",
+        tickvals=list(range(-10, 10)),
+        ticktext=[
+            f"{2**x} : 1" if x > 0 else f"1 : {2**-x}" for x in list(range(-10, 10))
+        ],
+    )
+
+    fig.update_yaxes(
+        range=[-8, 8],
+        constrain="domain",
+        scaleanchor="x",
+        scaleratio=1,
+        tickvals=list(range(-10, 10)),
+        ticktext=[
+            f"{2**x} : 1" if x > 0 else f"1 : {2**-x}" for x in list(range(-10, 10))
+        ],
+    )
+
+    return fig
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,24 @@
+lxml==4.6.1
+lockfile==0.12.2
+numpy==1.19.3
+keyring==21.5.0
+mypy_extensions==0.4.3
+pandas==1.1.3
+typing_extensions==3.7.4.3
+aiohttp==3.7.4.post0
+aioitertools==0.7.1
+beautifulsoup4==4.9.3
+brotli==1.0.9
+cryptography==3.4.7
+Cython==0.29.23
+docutils==0.17.1
+importlib_metadata==4.4.0
+ipaddr==2.2.0
+ordereddict==1.1
+plotly==4.14.3
+protobuf==3.17.1
+pyOpenSSL==20.0.1
+streamlit==0.82.0
+uncurl==0.0.11
+wincertstore==0.2
+zipp==3.4.1
--- a/strmlt.py
+++ b/strmlt.py
@ -0,0 +1,100 @@
+import numpy as np
+import pandas as pd
+import streamlit as st
+import uncurl
+from calibration import overconfidence
+from firebase_requests import get_forecasts, get_resolutions
+from gjo_requests import get_resolved_questions
+from plotting import plotly_calibration, plotly_calibration_odds
+
+if __name__ == "__main__":
+
+    st.title("Learn how calibrated are you?")
+
+    # ---
+
+    # if st.checkbox('I am new! Show me instructions.'):
+    #     st.write("""
+    #         Hey!
+    #     """)
+
+    # ---
+
+    platform = st.selectbox(
+        "Which platform are you using?",
+        ["Good Judgement Open", "CSET Foretell"],
+    )
+    platform_url = {
+        "Good Judgement Open": "https://www.gjopen.com",
+        "CSET Foretell": "https://www.cset-foretell.com",
+    }[platform]
+
+    uid = st.number_input("What is your user ID?", min_value=1, value=28899)
+    uid = str(uid)
+
+    curl_value = ""
+
+    curl_command = st.text_area(
+        "Ugh... Gimme your cURL info...", value=curl_value.strip()
+    )
+    curl_content = uncurl.parse_context(curl_command)
+    headers, cookies = curl_content.headers, curl_content.cookies
+
+    # ---
+
+    questions = get_resolved_questions(uid, platform_url, headers, cookies)
+
+    st.write(f"{len(questions)} questions you forecasted on have resolved.")
+
+    # ---
+    # TODO: Make a progress bar..?
+
+    forecasts = get_forecasts(uid, questions, platform_url, headers, cookies)
+    resolutions = get_resolutions(questions, platform_url, headers, cookies)
+
+    # ---
+
+    num_forecasts = sum(len(f) for f in forecasts.values())
+    st.write(
+        f"On these {len(questions)} questions you've made {num_forecasts} forecasts."
+    )
+
+    flatten = lambda t: [item for sublist in t for item in sublist]
+    y_true = flatten(resolutions[q]["y_true"] for q in questions for _ in forecasts[q])
+    y_pred = flatten(f["y_pred"] for q in questions for f in forecasts[q])
+
+    # Note that I am "double counting" each prediction.
+    if st.checkbox("Drop last"):
+        y_true = flatten(
+            resolutions[q]["y_true"][:-1] for q in questions for _ in forecasts[q]
+        )
+        y_pred = flatten(f["y_pred"][:-1] for q in questions for f in forecasts[q])
+
+    y_true, y_pred = np.array(y_true), np.array(y_pred)
+
+    st.write(f"Which gives us {len(y_pred)} datapoints to work with.")
+
+    # ---
+
+    strategy = st.selectbox(
+        "Which binning stranegy do you prefer?",
+        ["uniform", "quantile"],
+    )
+
+    recommended_n_bins = int(np.sqrt(len(y_pred))) if strategy == "quantile" else 20 + 1
+    n_bins = st.number_input(
+        "How many bins do you want me to display?",
+        min_value=1,
+        value=recommended_n_bins,
+    )
+
+    fig = plotly_calibration(y_true, y_pred, n_bins=n_bins, strategy=strategy)
+    st.plotly_chart(fig, use_container_width=True)
+
+    overconf = overconfidence(y_true, y_pred)
+    st.write(f"Your over/under- confidence score is {overconf:.2f}.")
+
+    # ---
+
+    fig = plotly_calibration_odds(y_true, y_pred, n_bins=n_bins, strategy=strategy)
+    st.plotly_chart(fig, use_container_width=True)