gjo-calibration/calibration.py

import logging
import numpy as np


# This function is a sklearn.calibration.calibration_curve modification
def calibration_curve(y_true, y_prob, *, n_bins=5, strategy="uniform"):
    y_true = np.array(y_true)
    y_prob = np.array(y_prob)

    if strategy == "quantile":  # Determine bin edges by distribution of data
        quantiles = np.linspace(0, 1, n_bins + 1)
        bins = np.percentile(y_prob, quantiles * 100)
        bins[-1] = bins[-1] + 1e-8

    elif strategy == "uniform":
        bins = np.linspace(0.0, 1.0 + 1e-8, n_bins + 1)

    else:
        raise ValueError(
            "Invalid entry to 'strategy' input. Strategy "
            "must be either 'quantile' or 'uniform'."
        )

    try:
        binids = np.digitize(y_prob, bins) - 1
    except Exception as e:
        np.set_printoptions(threshold=sys.maxsize)
        logging.info("=" * 40)
        logging.info(f"n_bins={n_bins}, strategy={strategy}")
        logging.info(f"y_true={repr(y_prob)}")
        logging.info(f"bins={repr(bins)}")
        logging.info("=" * 40)

        raise e


    bin_sums = np.bincount(binids, weights=y_prob, minlength=len(bins))
    bin_true = np.bincount(binids, weights=y_true, minlength=len(bins))
    bin_total = np.bincount(binids, minlength=len(bins))

    nonzero = bin_total != 0
    prob_true = bin_true[nonzero] / bin_total[nonzero]
    prob_pred = bin_sums[nonzero] / bin_total[nonzero]

    return prob_true, prob_pred, bin_total[nonzero]


def overconfidence(y_true, y_pred):
    x = y_pred * y_true + (1 - y_pred) * (1 - y_true)
    return np.mean((x - 1) * (x - 0.5)) / np.mean((x - 0.5) * (x - 0.5))
Improve exception handling 2021-06-02 21:46:01 +00:00			`import logging`
First commit: better late than never 2021-05-31 18:59:24 +00:00			`import numpy as np`


			`# This function is a sklearn.calibration.calibration_curve modification`
			`def calibration_curve(y_true, y_prob, *, n_bins=5, strategy="uniform"):`
			`y_true = np.array(y_true)`
			`y_prob = np.array(y_prob)`

			`if strategy == "quantile": # Determine bin edges by distribution of data`
			`quantiles = np.linspace(0, 1, n_bins + 1)`
			`bins = np.percentile(y_prob, quantiles * 100)`
			`bins[-1] = bins[-1] + 1e-8`

			`elif strategy == "uniform":`
			`bins = np.linspace(0.0, 1.0 + 1e-8, n_bins + 1)`

			`else:`
			`raise ValueError(`
			`"Invalid entry to 'strategy' input. Strategy "`
			`"must be either 'quantile' or 'uniform'."`
			`)`

Improve exception handling 2021-06-02 21:46:01 +00:00			`try:`
			`binids = np.digitize(y_prob, bins) - 1`
			`except Exception as e:`
			`np.set_printoptions(threshold=sys.maxsize)`
			`logging.info("=" * 40)`
			`logging.info(f"n_bins={n_bins}, strategy={strategy}")`
			`logging.info(f"y_true={repr(y_prob)}")`
			`logging.info(f"bins={repr(bins)}")`
			`logging.info("=" * 40)`

			`raise e`

First commit: better late than never 2021-05-31 18:59:24 +00:00
			`bin_sums = np.bincount(binids, weights=y_prob, minlength=len(bins))`
			`bin_true = np.bincount(binids, weights=y_true, minlength=len(bins))`
			`bin_total = np.bincount(binids, minlength=len(bins))`

			`nonzero = bin_total != 0`
			`prob_true = bin_true[nonzero] / bin_total[nonzero]`
			`prob_pred = bin_sums[nonzero] / bin_total[nonzero]`

			`return prob_true, prob_pred, bin_total[nonzero]`


			`def overconfidence(y_true, y_pred):`
			`x = y_pred * y_true + (1 - y_pred) * (1 - y_true)`
			`return np.mean((x - 1) * (x - 0.5)) / np.mean((x - 0.5) * (x - 0.5))`