Source code for ompy.vector

from __future__ import annotations
from typing import Optional, Iterable, Union, Any, Tuple, Dict, Sequence
import logging
import warnings
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from numpy import ndarray
from .filehandling import (load_numpy_1D, save_numpy_1D,
                           load_csv_1D, save_csv_1D,
                           load_txt_1D, save_txt_1D,
                           mama_read, mama_write,
                           filetype_from_suffix,
                           load_tar, save_tar)
from .decomposition import index
from .library import div0
from .rebin import rebin_1D
from .abstractarray import AbstractArray

LOG = logging.getLogger(__name__)
logging.captureWarnings(True)


[docs]class Vector(AbstractArray):
    """ Stores 1d array with energy axes (a vector)

    Attributes:
        values (np.ndarray): The values at each bin.
        E (np.ndarray): The energy of each bin. (mid-bin calibration)
        path (str or Path): The path to load a saved vector from
        std (np.ndarray): The standard deviation of the counts
        unit (str): Unit of the energies. Can be "keV" or "MeV".
            Defaults to "keV".
    """

    def __init__(self, values: Optional[Iterable[float]] = None,
                 E: Optional[Iterable[float]] = None,
                 path: Optional[Union[str, Path]] = None,
                 std: Optional[Iterable[float]] = None,
                 units: Optional[str] = "keV",
                 **kwargs):
        """
        There are several ways to initialize

        - An initialization with only `E`, or energy, defaults to
          zero values.
        - An initialization using a path loads a vector from said path.

        If no `std` is given, it will default to None

        Args:
            values: see above
            E: see above
            path: see above
            std: see above
            unit: see above
            kwargs: arguments to the filereader (see pandas.read_csv)
        Raises:
           ValueError if the given arrays are of differing lenghts.

        ToDo:
            - Crean up initialization.
        """
        if values is not None and E is not None:
            self.values = np.asarray(values, dtype=float).copy()
            self.E = np.asarray(E, dtype=float).copy()
        elif values is None and E is not None:
            self.E = np.asarray(E, dtype=float).copy()
            self.values = np.zeros_like(E)
        else:
            self.values = None
            self.E = None

        if std is not None:
            std = np.asarray(std, dtype=float).copy()
        self.std: Optional[ndarray] = std

        self.units = units

        if path is not None:
            self.load(path, **kwargs)
        self.verify_integrity()

    def __len__(self):
        return len(self.values)

[docs]    def verify_integrity(self, check_equidistant: bool = False):
        """ Verify the internal consistency of the vector

        Args:
            check_equidistant (bool, optional): Check whether energy array
                are equidistant spaced. Defaults to False.

        Raises:
            AssertionError or ValueError if any test fails
        """
        assert self.values is not None
        assert self.E is not None
        if self.E.shape != self.values.shape:
            raise ValueError("Energy and values must have same shape")
        if self.std is not None:
            if self.std.shape != self.values.shape:
                raise ValueError("std and values must have same shape")

        if check_equidistant:
            self.verify_equdistant(axis="x")

[docs]    def calibration(self) -> Dict[str, float]:
        """Calculate and return the calibration coefficients of the energy axes

        Formatted as "a{axis}{power of E}"
        """

        calibration = {"a0": self.E[0],
                       "a1": self.E[1]-self.E[0]}
        return calibration

[docs]    def plot(self, ax: Optional[Any] = None,
             scale: str = 'linear',
             kind: str = 'line',
             **kwargs) -> Tuple[Any, Any]:
        """ Plots the vector

        Args:
            ax (matplotlib axis, optional): The axis to plot onto. If not
                provided, a new figure is created
            scale (str, optional): The scale to use. Can be `linear`
                (default), `log`, `symlog` or `logit`.
            kind (str, optional):
                - 'line' : line plot (default) evokes `ax.plot`
                - 'plot' : same as 'line'
                - 'step' : step plot
                - 'bar' : vertical bar plot
            kwargs (optional): Additional kwargs to plot command.

        Returns:
            The figure and axis used.
        """
        fig, ax = plt.subplots() if ax is None else (ax.figure, ax)

        if self.std is None:
            if kind in ["plot", "line"]:
                kwargs.setdefault("markersize", 3)
                kwargs.setdefault("marker", "o")
                kwargs.setdefault("linestyle", "-")
                ax.plot(self.E, self.values, "o-", **kwargs)
            elif kind == "step":
                kwargs.setdefault("where", "mid")
                ax.step(self.E, self.values, **kwargs)
            else:
                raise NotImplementedError()
        else:
            kwargs.setdefault("markersize", 3)
            kwargs.setdefault("linewidth", 2)
            kwargs.setdefault("fmt", "o")
            ax.errorbar(self.E, self.values, yerr=self.std, **kwargs)
        ax.set_yscale(scale)
        ax.set_xlabel(f"Energy [{self.units}]")
        return fig, ax

[docs]    def save(self, path: Union[str, Path],
             filetype: Optional[str] = None,
             units: Optional[str] = "keV",
             **kwargs) -> None:
        """Save to a file of specified format

        Args:
            path (str or Path): Path to save
            filetype (str, optional): Filetype. Default uses
                auto-recognition from suffix.
                Options: ["numpy", "txt", "tar", "mama", "csv"]
            units (str, optional): Units for the x-axis. Default
                uses keV. Options: ["keV", "MeV", "same"]. "same"
                option will use the current selected units of the vector.
                Note: This keyword is ignored when saving to "mama" type.
            **kwargs: additional keyword arguments

        Raises:
            ValueError: Filetype is not supported.
            NotImplementedError: Unsupported units.
        """
        vector = self.copy()
        if units == 'keV':
            vector.to_keV()
        elif units == 'MeV':
            vector.to_MeV()
        elif units == 'same':  # We don't change the units
            pass
        else:
            raise NotImplementedError(f"Could not understand units={units}")
        path = Path(path) if isinstance(path, str) else path
        if filetype is None:
            filetype = filetype_from_suffix(path)
        filetype = filetype.lower()

        if filetype == 'numpy':
            save_numpy_1D(vector.values, vector.E, vector.std, path)
        elif filetype == 'txt':
            save_txt_1D(vector.values, vector.E, vector.std, path, **kwargs)
        elif filetype == 'tar':
            if vector.std is not None:
                save_tar([vector.values, vector.E, vector.std], path)
            else:
                save_tar([vector.values, vector.E], path)
        elif filetype == 'mama':
            if self.std is not None:
                warnings.warn("MaMa cannot store std. "
                              "Consider using another format")
            if vector.units != 'keV':
                warnings.warn("The MaMa format does not support storing "
                              "spectra with other units than keV. "
                              "`units` keyword will be ignored.")
                vector.to_keV()
            mama_write(self, path, comment="Made by OMpy", **kwargs)
        elif filetype == 'csv':
            save_csv_1D(vector.values, vector.E, vector.std, path, **kwargs)
        else:
            raise ValueError(f"Unknown filetype {filetype}")

[docs]    def load(self, path: Union[str, Path],
             filetype: Optional[str] = None,
             **kwargs) -> None:
        """Load to a file of specified format

        Args:
            path (str or Path): Path to Load
            filetype (str, optional): Filetype. Default uses
                auto-recognition from suffix.

        Raises:
            ValueError: Filetype is not supported
        """
        path = Path(path) if isinstance(path, str) else path
        if filetype is None:
            filetype = filetype_from_suffix(path)
        filetype = filetype.lower()

        if filetype == 'numpy':
            self.values, self.E, self.std = load_numpy_1D(path)
        elif filetype == 'txt':
            self.values, self.E, self.std = load_txt_1D(path)
        elif filetype == 'tar':
            from_file = load_tar(path)
            if len(from_file) == 3:
                self.values, self.E, self.std = from_file
            elif len(from_file) == 2:
                self.values, self.E = from_file
            else:
                raise ValueError(f"Expected two or three columns\
                 in file '{path}', got {len(from_file)}")
        elif filetype == 'mama':
            self.values, self.E = mama_read(path)
        elif filetype == 'csv':
            self.values, self.E, self.std = load_csv_1D(path, **kwargs)
        else:
            try:
                self.values, self.E = mama_read(path)
            except ValueError:  # from within ValueError
                raise ValueError(f"Unknown filetype {filetype}")
        self.verify_integrity()

        return None

[docs]    def transform(self, const: float = 1,
                  alpha: float = 0, inplace: bool = True) -> Optional[Vector]:
        """Apply a normalization transformation::

            vector -> const * vector * exp(alpha*energy)

        If the vector has `std`, the `std` will be transformed
        as well.

        Args:
            const (float, optional): The constant. Defaults to 1.
            alpha (float, optional): The exponential coefficient.
                Defaults to 0.
            inplace (bool, optional): Whether to apply the transformation
                inplace. If False, returns the transformed vector.

        Returns:
            Optional[Vector]
        """
        if self.std is not None:
            relative_uncertainty = self.std / self.values
        transformed = const*self.values*np.exp(alpha*self.E)

        if self.std is not None:
            std = relative_uncertainty * transformed
        if not inplace:
            units = self.units
            if self.std is not None:
                return Vector(transformed, E=self.E, std=std, units=units)
            return Vector(transformed, E=self.E, units=units)

        self.values = transformed
        if self.std is not None:
            self.std = std

[docs]    def error(self, other: Union[Vector, ndarray],
              std: Optional[ndarray] = None) -> float:
        """Computes the (weighted) χ²

        Args:
            other (Vector or ndarray]): The reference to compare itself to. If
                an array, assumes it has the same energy binning as itself.
            std (Optional[ndarray], optional): Standard deviations to use as
                inverse of the weights.

        Returns:
            float: χ²

        """
        # Hack since something is screwy with the import
        # |-> is this comment still up to date?
        try:
            self.has_equal_binning(other)
            other = other.values
        except TypeError:  # already an array
            pass
        squared_error = (self.values - other)**2
        if self.std is not None:
            if std is not None:
                sigmasq = self.std**2 + std**2
            else:
                sigmasq = self.std**2
        else:
            if std is not None:
                sigmasq = std**2
            else:
                sigmasq = 1

        error = div0(squared_error, sigmasq)
        return error.sum()

[docs]    def cut(self, Emin: Optional[float] = None,
            Emax: Optional[float] = None,
            inplace: bool = True) -> Optional[Vector]:
        """ Cut the vector at the energy limits

        Args:
            Emin (float, optional): The lower energy limit
            Emax (float, optional): The higher energy limit
            inplace (bool, optional): If `True` perform the cut on this vector
                or if `False` returns a copy. Defaults to True
        Returns:
            The cut vector if `inplace` is True.
        """
        Emin = Emin if Emin is not None else self.E.min()
        Emax = Emax if Emax is not None else self.E.max()
        imin = self.index(Emin)
        imax = self.index(Emax)
        cut = slice(imin, imax + 1)

        values = self.values[cut]
        E = self.E[cut]
        if self.std is not None:
            std = self.std[cut]
        else:
            std = None

        if inplace:
            self.E = E
            self.values = values
            self.std = std
        else:
            units = self.units
            return Vector(values=values, E=E, std=std, units=units)

[docs]    def cut_nan(self, inplace: bool = True) -> Vector:
        """ Cut the vector where elements are `np.nan`

        Args:
            inplace (bool, optional): If `True` perform the cut on this vector
                or if `False` returns a copy. Defaults to True
        Returns:
            The cut vector if `inplace` is True.
        """
        inan = np.argwhere(np.isnan(self.values))

        values = np.delete(self.values, inan)
        E = np.delete(self.E, inan)
        std = None if self.std is None else np.delete(self.std, inan)
        if inplace:
            self.values = values
            self.E = E
            self.std = std
        else:
            return Vector(values=values, E=E, std=std, units=self.units)

[docs]    def rebin(self, mids: Optional[Sequence[float]] = None,
              factor: Optional[float] = None,
              inplace: bool = True) -> Optional[Vector]:
        """ Rebins vector, assuming equidistant binning

        Args:
            mids: The new energy mids. Can not be
                given alongside 'factor'.
            factor: The factor by which the step size shall be
                changed. Can not be given alongside 'mids'.
            inplace: Whether to change E and values
                inplace or return the rebinned vector.
        Returns:
            The rebinned vector if inplace is 'False'.
        """
        if not (mids is None) ^ (factor is None):
            raise ValueError("Either 'mids' or 'factor' must be"
                             " specified, but not both.")
        mids_old = self.E

        if factor is not None:
            if factor <= 0:
                raise ValueError("'factor' must be positive")
            num_mids = int(len(mids_old)/factor)
            mids, step = np.linspace(mids_old[0], mids_old[-1],
                                     num=num_mids, retstep=True)
            LOG.debug("Rebinning with factor %g, giving %g mids",
                      factor, num_mids)
            LOG.debug("Old step size: %g\nNew step size: %g",
                      mids_old[1] - mids_old[0], step)
            mids = np.asarray(mids, dtype=float)

        rebinned = rebin_1D(self.values, mids_old, mids)
        if inplace:
            self.values = rebinned
            self.E = mids
            self.verify_integrity()
        else:
            return Vector(E=mids, values=rebinned)

[docs]    def to_MeV(self) -> Vector:
        """ Convert E from keV to MeV if necessary """
        if self.units == "MeV":
            pass
        elif self.units == "keV":
            self.E /= 1e3
            self.units = "MeV"
        else:
            raise NotImplementedError("Units must be keV or MeV")

[docs]    def to_keV(self) -> Vector:
        """ Convert E from MeV to keV if necessary """
        if self.units == "keV":
            pass
        elif self.units == "MeV":
            self.E *= 1e3
            self.units = "keV"
        else:
            raise NotImplementedError("Units must be keV or MeV")

[docs]    def closest(self, E_new: ndarray, side: Optional[str] = 'right',
                inplace=False) -> Union[Vector, None]:
        """ Re-bin the vector without merging bins.

            The resulting vector will have E_new as the x-axis while
            the jth y-value will be given by the ith value of the original
            y-values where E[i] < E_new[j] <= E[i+1] or
            E[i] <= E_new[j] < E[i+1].

            Args:
                E_new: Bin value to find. Value or array.
                side: 'left': E[i] < E[j] <= E[i+1],
                      'right': E[i] <= E[j] <= E[i+1]
                inplace: Whether to make the change inplace or not.
            Returns:
                Vector with the new E axis and the bin content of the bins
                that contains E.
            Raises:
                RuntimeError if the x-axis of the original vector is
                not sorted.
        """

        if not np.all(self.E[:-1] <= self.E[1:]):
            raise RuntimeError("x-axis not sorted.")

        indices = np.searchsorted(self.E, E_new, side=side)

        # Ensure that any element outside the range of E will get indice
        # -1.
        indices[indices >= len(self.E)] = 0
        indices -= 1

        # We need to append 0 to the end to ensure that we fill 0 if any
        # element E_new is outside of the bounds of self.E
        values = np.append(self.values, [0])
        values = values[indices]

        std = None
        if self.std is not None:
            std = np.append(self.std, [0])
            std = std[indices]

        if inplace:
            self.E = E_new
            self.values = values
            self.std = std
        else:
            return Vector(values=values, E=E_new, std=std, units=self.units)

[docs]    def cumulative(self, factor: Optional[Union[float, str]] = None,
                   inplace=False) -> Union[Vector, None]:
        """ Cumulative sum of the vector.

            Args:
                factor: A factor to multiply to the resulting vector. Possible
                values are None (default), a float or string 'de'. If 'de' the
                factor will be calculated by E[1] - E[0].
                inplace: Whether to make the change inplace or not.
            Returns:
                The cumulative sum vector if inplace is 'False'
            Raises:
                RuntimeError if elements in self.E are not equidistant
                and factor='de'.
                ValueError if factor is a string other than 'de'.
        """

        if factor is None:
            factor = 1.0
        elif isinstance(factor, str):
            if factor.lower() != 'de':
                raise ValueError(f"Unkown option for factor {factor}")
            factor = float(self.E[1] - self.E[0])
            if not np.all(np.diff(self.E) == factor):
                raise RuntimeError(f"Vector x-axis isn't equidistant.")

        cumsum = np.cumsum(self.values)*factor
        cumerr = None
        if self.std is not None:
            cumerr = np.sqrt(np.cumsum(self.std**2))*factor

        if inplace:
            self.values = cumsum
            self.std = cumerr
        else:
            return Vector(values=cumsum, E=self.E,
                          std=cumerr, units=self.units)

[docs]    def has_equal_binning(self, other: Vector, **kwargs) -> bool:
        """ Check whether `other` has equal_binning as `self` within precision.

        Args:
            other (Vector): Vector to compare to.
            kwargs: Additional kwargs to `np.allclose`.

        Returns:
            Returns `True` if both arrays are equal  .

        Raises:
            TypeError: If other is not a Vector.
            ValueError: If any of the bins in any of the arrays are not equal.

        """
        if not isinstance(other, Vector):
            raise TypeError("Other must be a Vector")
        if self.shape != other.shape:
            raise ValueError("Must have equal number of energy bins.")
        if not np.allclose(self.E, other.E, **kwargs):
            raise ValueError("Must have equal energy binning.")
        else:
            return True

[docs]    def index(self, E: float) -> int:
        """ Returns the closest index corresponding to the E value """
        return index(self.E, E)

    def __matmul__(self, other: Vector) -> Vector:
        result = self.copy()
        if isinstance(other, Vector):
            self.has_equal_binning(other)
        else:
            NotImplementedError("Type not implemented")

        result.values = result.values@other.values
        return result