# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/main/LICENSE

"""
This module defines the behavior of ``TProfile``.
"""

from __future__ import absolute_import

import numpy

import uproot
from uproot.behaviors.TH1 import boost_metadata, boost_axis_metadata


_kERRORMEAN = 0
_kERRORSPREAD = 1
_kERRORSPREADI = 2
_kERRORSPREADG = 3


# closely follows the ROOT function, using the same names (with 'root_' prepended)
# https://github.com/root-project/root/blob/ffc7c588ac91aca30e75d356ea971129ee6a836a/hist/hist/src/TProfileHelper.h#L141-L163
def _effective_counts_1d(fBinEntries, fBinSumw2, fNcells):
    root_sumOfWeights = fBinEntries
    root_sumOfWeights = numpy.asarray(root_sumOfWeights, dtype=numpy.float64)

    root_sumOfWeightSquare = fBinSumw2
    root_sumOfWeightSquare = numpy.asarray(root_sumOfWeightSquare, dtype=numpy.float64)

    if len(root_sumOfWeightSquare) == 0 or len(root_sumOfWeightSquare) != fNcells:
        return root_sumOfWeights

    positive = root_sumOfWeightSquare > 0

    out = numpy.zeros(len(root_sumOfWeights), dtype=numpy.float64)
    out[positive] = root_sumOfWeights[positive] ** 2 / root_sumOfWeightSquare[positive]
    return out


# duplicates the first part of '_values_errors_1d'
def _values_1d(fBinEntries, root_cont):
    root_sum = fBinEntries
    root_sum = numpy.asarray(root_sum, dtype=numpy.float64)
    nonzero = root_sum != 0

    root_contsum = numpy.zeros(len(root_cont), dtype=numpy.float64)
    root_contsum[nonzero] = root_cont[nonzero] / root_sum[nonzero]

    return root_contsum


def _error_mode_str(error_mode):
    if error_mode is None or error_mode == _kERRORMEAN or error_mode == "":
        return ""
    elif error_mode == _kERRORSPREAD or error_mode == "s":
        return "S"
    elif error_mode == _kERRORSPREADI or error_mode == "i":
        return "I"
    elif error_mode == _kERRORSPREADG or error_mode == "g":
        return "G"
    else:
        return "_"


# closely follows the ROOT function, using the same names (with 'root_' prepended)
# https://github.com/root-project/root/blob/ffc7c588ac91aca30e75d356ea971129ee6a836a/hist/hist/src/TProfileHelper.h#L660-L721
def _values_errors_1d(error_mode, fBinEntries, root_cont, fSumw2, fNcells, fBinSumw2):
    if error_mode is None or error_mode == _kERRORMEAN or error_mode == "":
        error_mode = _kERRORMEAN
    elif error_mode == _kERRORSPREAD or error_mode == "s":
        error_mode = _kERRORSPREAD
    elif error_mode == _kERRORSPREADI or error_mode == "i":
        error_mode = _kERRORSPREADI
    elif error_mode == _kERRORSPREADG or error_mode == "g":
        error_mode = _kERRORSPREADG
    else:
        raise ValueError(
            "error_mode must be None/0/'' for error-on-mean,\n"
            "                        1/'s' for spread (variance),\n"
            "                        2/'i' for integer spread (using sqrt(12)),\n"
            "                     or 3/'g' for Gaussian spread\n"
            "                    not "
            + repr(error_mode)
            + "see https://root.cern.ch/doc/master/classTProfile.html"
        )

    root_sum = fBinEntries
    root_sum = numpy.asarray(root_sum, dtype=numpy.float64)
    nonzero = root_sum != 0

    root_contsum = numpy.zeros(len(root_cont), dtype=numpy.float64)
    root_contsum[nonzero] = root_cont[nonzero] / root_sum[nonzero]

    if error_mode == _kERRORSPREADG:
        out = numpy.zeros(len(root_cont), dtype=numpy.float64)
        out[nonzero] = 1.0 / numpy.sqrt(root_sum[nonzero])
        return root_contsum, out

    root_err2 = fSumw2
    if root_err2 is None or len(root_err2) != fNcells:
        root_err2 = numpy.zeros(len(root_cont), dtype=numpy.float64)
    else:
        root_err2 = numpy.asarray(root_err2, dtype=numpy.float64)

    root_neff = _effective_counts_1d(fBinEntries, fBinSumw2, fNcells)

    root_eprim2 = numpy.zeros(len(root_cont), dtype=numpy.float64)
    root_eprim2[nonzero] = abs(
        root_err2[nonzero] / root_sum[nonzero] - root_contsum[nonzero] ** 2
    )
    root_eprim = numpy.sqrt(root_eprim2)

    if error_mode == _kERRORSPREADI:
        numer = numpy.ones(len(root_cont), dtype=numpy.float64)
        denom = numpy.full(len(root_cont), numpy.sqrt(12 * root_neff))

        eprim_nonzero = root_eprim != 0
        numer[eprim_nonzero] = root_eprim[eprim_nonzero]
        denom[eprim_nonzero] = numpy.sqrt(root_neff[eprim_nonzero])

        out = numpy.zeros(len(root_cont), dtype=numpy.float64)
        out[nonzero] = numer[nonzero] / denom[nonzero]
        return root_contsum, out

    if error_mode == _kERRORSPREAD:
        root_eprim[~nonzero] = 0.0
        return root_contsum, root_eprim

    out = numpy.zeros(len(root_cont), dtype=numpy.float64)
    out[nonzero] = root_eprim[nonzero] / numpy.sqrt(root_neff[nonzero])
    return root_contsum, out


class Profile(uproot.behaviors.TH1.Histogram):
    """
    Abstract class for profile plots.
    """

    @property
    def kind(self):
        return "MEAN"

    def counts(self, flow=False):
        """
        Args:
            flow (bool): If True, include underflow and overflow bins before and
                after the normal (finite-width) bins.

        The effective number of entries, which is a step in the calculation of
        :ref:`uproot.behaviors.TProfile.Profile.values`. The calculation
        of profile errors exactly follows ROOT's "effective entries", but in a
        vectorized NumPy form.
        """
        raise NotImplementedError(repr(self))

    def values(self, flow=False):
        """
        Args:
            flow (bool): If True, include underflow and overflow bins before and
                after the normal (finite-width) bins.

        Mean value of each bin as a 1, 2, or 3 dimensional ``numpy.ndarray`` of
        ``numpy.float64``.

        Setting ``flow=True`` increases the length of each dimension by two.
        """
        raise NotImplementedError(repr(self))

    def errors(self, flow=False, error_mode=""):
        """
        Args:
            flow (bool): If True, include underflow and overflow bins before and
                after the normal (finite-width) bins.
            error_mode (str): Choose a method for calculating the errors (see below).

        Errors (uncertainties) in the :ref:`uproot.behaviors.TH1.Histogram.values`
        as a 1, 2, or 3 dimensional ``numpy.ndarray`` of ``numpy.float64``.

        The calculation of profile errors exactly follows ROOT's function, but
        in a vectorized NumPy form. The ``error_mode`` may be

        * ``""`` for standard error on the mean
        * ``"s"`` for spread
        * ``"i"`` for integer data
        * ``"g"`` for Gaussian

        following `ROOT's profile documentation <https://root.cern.ch/doc/master/classTProfile.html>`__.

        Setting ``flow=True`` increases the length of each dimension by two.
        """
        values, errors = self._values_errors(flow, error_mode)
        return errors

    def variances(self, flow=False, error_mode=""):
        """
        Args:
            flow (bool): If True, include underflow and overflow bins before and
                after the normal (finite-width) bins.
            error_mode (str): Choose a method for calculating the errors (see below).

        Variances (uncertainties squared) in the
        :ref:`uproot.behaviors.TH1.Histogram.values` as a 1, 2, or 3
        dimensional ``numpy.ndarray`` of ``numpy.float64``.

        The calculation of profile variances exactly follows ROOT's function, but
        in a vectorized NumPy form. The ``error_mode`` may be

        * ``""`` for standard error on the mean (squared)
        * ``"s"`` for spread (squared)
        * ``"i"`` for integer data (squared)
        * ``"g"`` for Gaussian (squared)

        following `ROOT's profile documentation <https://root.cern.ch/doc/master/classTProfile.html>`__.

        Setting ``flow=True`` increases the length of each dimension by two.
        """
        values, errors = self._values_errors(flow, error_mode)
        return numpy.square(errors)


class TProfile(Profile):
    """
    Behaviors for one-dimensional profiles: ROOT's ``TProfile``.
    """

    no_inherit = (uproot.behaviors.TH1.TH1,)

    @property
    def axes(self):
        return (self.member("fXaxis"),)

    def axis(self, axis=0):  # default axis for one-dimensional is intentional
        if axis == 0 or axis == -1 or axis == "x":
            return self.member("fXaxis")
        else:
            raise ValueError("axis must be 0 (-1) or 'x' for a TProfile")

    @property
    def weighted(self):
        fBinSumw2 = self.member("fBinSumw2", none_if_missing=True)
        return fBinSumw2 is None or len(fBinSumw2) != len(self.member("fNcells"))

    def counts(self, flow=True):
        out = _effective_counts_1d(
            self.member("fBinEntries"),
            self.member("fBinSumw2"),
            self.member("fNcells"),
        )
        if flow:
            return out
        else:
            return out[1:-1]

    def values(self, flow=False):
        if hasattr(self, "_values"):
            values = self._values
        else:
            (root_cont,) = self.base(uproot.models.TArray.Model_TArray)
            root_cont = numpy.asarray(root_cont, dtype=numpy.float64)
            values = _values_1d(
                self.member("fBinEntries"),
                root_cont,
            )
            self._values = values

        if flow:
            return values
        else:
            return values[1:-1]

    def _values_errors(self, flow, error_mode):
        attr = "_errors" + _error_mode_str(error_mode)
        if hasattr(self, attr):
            values = self._values
            errors = getattr(self, attr)
        else:
            (root_cont,) = self.base(uproot.models.TArray.Model_TArray)
            root_cont = numpy.asarray(root_cont, dtype=numpy.float64)
            fSumw2 = self.member("fSumw2", none_if_missing=True)
            if fSumw2 is not None:
                fSumw2 = numpy.asarray(fSumw2)
            values, errors = _values_errors_1d(
                error_mode,
                numpy.asarray(self.member("fBinEntries")),
                root_cont,
                fSumw2,
                self.member("fNcells"),
                numpy.asarray(self.member("fBinSumw2")),
            )
            self._values = values
            setattr(self, attr, errors)

        if flow:
            return values, errors
        else:
            return values[1:-1], errors[1:-1]

    def to_boost(self, metadata=boost_metadata, axis_metadata=boost_axis_metadata):
        boost_histogram = uproot.extras.boost_histogram()

        effective_counts = self.counts(flow=True)
        values, errors = self._values_errors(True, self.member("fErrorMode"))
        variances = numpy.square(errors)
        sum_of_bin_weights = numpy.asarray(self.member("fBinEntries"))

        storage = boost_histogram.storage.WeightedMean()

        xaxis = uproot.behaviors.TH1._boost_axis(self.member("fXaxis"), axis_metadata)
        out = boost_histogram.Histogram(xaxis, storage=storage)
        for k, v in metadata.items():
            setattr(out, k, self.member(v))

        if isinstance(xaxis, boost_histogram.axis.StrCategory):
            effective_counts = effective_counts[1:]
            values = values[1:]
            variances = variances[1:]
            sum_of_bin_weights = sum_of_bin_weights[1:]

        view = out.view(flow=True)

        # https://github.com/root-project/root/blob/ffc7c588ac91aca30e75d356ea971129ee6a836a/hist/hist/src/TProfileHelper.h#L668-L671
        with numpy.errstate(divide="ignore", invalid="ignore"):
            sum_of_bin_weights_squared = (sum_of_bin_weights ** 2) / effective_counts

        # TODO: Drop this when boost-histogram has a way to set using the constructor.
        # New version should look something like this:
        # view[...] = np.stack(sum_of_bin_weights, sum_of_bin_weights_squared, values, variances)
        # Current / classic version:
        view["sum_of_weights"] = sum_of_bin_weights
        view["sum_of_weights_squared"] = sum_of_bin_weights_squared
        view["value"] = values
        view["_sum_of_weighted_deltas_squared"] = variances * (
            sum_of_bin_weights - sum_of_bin_weights_squared / sum_of_bin_weights
        )

        return out