Source code for ompy.ensembleNormalizer

import numpy as np
import logging
import matplotlib.pyplot as plt
import os
import itertools
from typing import List, Optional, Any, Tuple, Union, Callable
from operator import xor
import pandas as pd
from scipy.stats import norm as scipynorm
import copy
from itertools import repeat
from pathos.multiprocessing import ProcessPool
from pathos.helpers import cpu_count
from pathlib import Path

from .abstract_normalizer import AbstractNormalizer
from .models import ResultsNormalized
from .vector import Vector
from .extractor import Extractor
from .normalizer_nld import NormalizerNLD
from .normalizer_gsf import NormalizerGSF
from .normalizer_simultan import NormalizerSimultan


if 'JPY_PARENT_PID' in os.environ:
    from tqdm import tqdm_notebook as tqdm
else:
    from tqdm import tqdm


[docs]class EnsembleNormalizer(AbstractNormalizer):
    """Normalizes NLD nad γSF extracted from the ensemble

    Usage:
      The calling syntax can be either to normalize simultaneously::

        EnsembleNormalizer(extractor=...,
                           normalizer_simultan=...)

      , or to normalize sequentially::

          EnsembleNormalizer(extractor=...,
                               normalizer_nld=...,
                               normalizer_gsf=...)

    Note:
        If one should add a functionality that depends on random numbers
        withing the parallelized loop make sure to use the random generator
        exposed via the arguments (see Ensemble class for an example). If one
        uses np.random instead, this will be the same an exact copy for each
        process. Note that this is not an issue for multinest seach routine,
        which is anyhow seeded by default as implemented in ompy.

    Attributes:
        extractor (Extractor): Extractor instance
        normalizer_nld (NormalizerNLD): NormalizerNLD instance
        normalizer_gsf (NormalizerGSF): NormalizerGSF instance
        normalizer_simultan (NormalizerSimultan): NormalizerSimultan instance
        res (List[ResultsNormalized]): List of the results
        nprocesses (int): Number of processes for multiprocessing.
            Defaults to number of available cpus-1 (with mimimum 1).
    """
    LOG = logging.getLogger(__name__)  # overwrite parent variable
    logging.captureWarnings(True)

    def __init__(self, *, extractor: Extractor,
                 normalizer_nld: Optional[NormalizerNLD] = None,
                 normalizer_gsf: Optional[NormalizerGSF] = None,
                 normalizer_simultan: Optional[NormalizerSimultan] = None,
                 path: Optional[Union[str, Path]] = 'saved_run/normalizers',
                 regenerate: bool = False):
        """
        Args:
            extractor (Extractor): Extractor instance
            normalizer_nld (NormalizerNLD, optional): NormalizerNLD instance
            normalizer_gsf (NormalizerGSF, optional): NormalizerGSF instance
            normalizer_simultan (NormalizerSimultan, optional):
                NormalizerSimultan instance
        """
        super().__init__(regenerate)
        self.extractor = extractor

        self.normalizer_nld = copy.deepcopy(normalizer_nld)
        self.normalizer_gsf = copy.deepcopy(normalizer_gsf)

        self.normalizer_simultan = copy.deepcopy(normalizer_simultan)

        self.nprocesses: int = cpu_count()-1 if cpu_count() > 1 else 1

        self.res: Optional[List[ResultsNormalized]] = None

        if path is None:
            self.path = None
        else:
            self.path = Path(path)
            self.path.mkdir(exist_ok=True, parents=True)

[docs]    def normalize(self) -> None:
        """ Normalize ensemble """
        if not self.regenerate:
            try:
                self.load()
                return
            except FileNotFoundError:
                pass

        assert xor((self.normalizer_nld is not None
                    and self.normalizer_gsf is not None),
                   self.normalizer_simultan is not None), \
            "Either 'normalizer_nld' and 'normalizer_gsf' must be set, or " \
            "normalizer_simultan"

        gsfs = self.extractor.gsf
        nlds = self.extractor.nld

        try:
            self.LOG.info(f"Start normalization with {self.nprocesses} cpus")
            pool = ProcessPool(nodes=self.nprocesses)
            N = len(nlds)
            iterator = pool.imap(self.step, range(N), nlds, gsfs)
            self.res = list(tqdm(iterator, total=N))
            pool.close()
            pool.join()
            pool.clear()
        except Exception as e:
            import traceback
            self.LOG.error(
                f"Multiprocessing failed, running with single thread")
            self.LOG.debug(
                f"Multiprocessing error:\n{traceback.format_exc()}")
            self.res = []
            for i in tqdm(range(len(nlds)), total=len(nlds)):
                self.res.append(self.step(i, nlds[i], gsfs[i]))

        self.save()

[docs]    def step(self, i: int, nld: Vector, gsf: Vector):
        """ Normalization step for each ensemble member

        Args:
            i (int): Loop number
            nld (Vector): NLD before normalization
            gsf (Vector): gsf before normalization

        Returns:
            res (ResultsNormalized): results (/parameters) of normalization
        """
        self.LOG.info(f"\n\n---------\nNormalizing #{i}")
        nld.copy()
        nld.cut_nan()

        gsf.copy()
        gsf.cut_nan()

        if self.normalizer_simultan is not None:
            res = self.normalizeSimultan(i, nld=nld, gsf=gsf)
        else:
            res = self.normalizeStagewise(i, nld=nld, gsf=gsf)

        return res

[docs]    def normalizeSimultan(self, num: int, *,
                          nld: Vector, gsf: Vector) -> ResultsNormalized:
        """ Wrapper for simultaneous normalization

        Args:
            num (int): Loop number
            nld (Vector): NLD before normalization
            gsf (Vector): gsf before normalization

        Returns:
            res (ResultsNormalized): results (/parameters) of normalization
        """
        self.normalizer_simultan._save_instance = False
        self.normalizer_simultan.regenerate = True
        self.normalizer_simultan.normalize(gsf=gsf, nld=nld, num=num)
        return self.normalizer_simultan.res

[docs]    def normalizeStagewise(self, num: int, *,
                           nld: Vector, gsf: Vector) -> ResultsNormalized:
        """ Wrapper for stagewise normalization

        Args:
            num (int): Loop number
            nld (Vector): NLD before normalization
            gsf (Vector): gsf before normalization

        Returns:
            res (ResultsNormalized): results (/parameters) of normalization
        """
        for norm in [self.normalizer_nld, self.normalizer_gsf]:
            norm._save_instance = False
            norm.regenerate = True

        self.normalizer_nld.normalize(nld=nld, num=num)
        self.normalizer_gsf.normalize(normalizer_nld=self.normalizer_nld,
                                      gsf=gsf, num=num)

        # sample B from the gaussian uncertainty for each nld
        B = self.normalizer_gsf.res.pars["B"]
        N = len(self.normalizer_gsf.res.samples["A"])
        self.normalizer_gsf.res.samples["B"] = scipynorm.rvs(loc=B[0],
                                                             scale=B[1],
                                                             size=N)
        return self.normalizer_gsf.res

[docs]    def plot(self, ax: Tuple[Any, Any] = None,
             add_figlegend: bool = True,
             n_plot: bool = 5,
             plot_model_stats: bool = False,
             random_state: Optional[np.random.RandomState] = None,
             return_stats: bool = False,
             **kwargs) -> Union[Tuple[Any, Any],
                                Tuple[Any, Any, Tuple[Any, Any]]]:
        """Plots randomly drawn samples

        Args:
            ax (Tuple[Any, Any], optional): The matplotlib axis to plot onto.
                Creates axis is not provided.
            add_figlegend (bool, optional): Defaults to `True`.
            n_plot (bool, optional): Number of (nld, gsf) samples to plot
            plot_model_stats (bool, optional): Plot stats also for models used
                in normalization
            random_state (np.random.RandomState, optional): random state, set
                by default such that a repeated use of the function gives the
                same results.
            return_stats: Whether to return vector stats (percentiles)
            **kwargs: Description

        TODO:
            - Refactor code
            - Could not find out how to not plot dublicate legend entries,
              thus using a workaround
            - Checks if extrapolating where nld or gsf is np.nan

        Returns:
            Tuple: If `return_stats=False`, returns `fig, ax`,
                otherwise `fig, ax, (stats_nld, stats_gsf)`
        """

        if ax is None:
            fig, ax = plt.subplots(1, 2, constrained_layout=True)
        else:
            fig = ax[0].figure

        norm_sim = self.normalizer_simultan
        if norm_sim is not None:
            normalizer_gsf = copy.deepcopy(norm_sim.normalizer_gsf)
            normalizer_nld = copy.deepcopy(norm_sim.normalizer_nld)
        else:
            normalizer_gsf = copy.deepcopy(self.normalizer_gsf)
            normalizer_nld = copy.deepcopy(self.normalizer_nld)

        if random_state is None:  # cannot move this to definition
            random_state = np.random.RandomState(98765)

        samples = self.samples_from_res(random_state)
        self.plot_selection(ax=ax, samples=samples,
                            normalizer_nld=normalizer_nld,
                            normalizer_gsf=normalizer_gsf,
                            n_plot=n_plot)

        # unify Egrid (and values) in case vectors are not equally long
        self.samples_unify_E(samples["nld"])
        self.samples_unify_E(samples["gsf"])

        # get median, 1 sigma, ...
        prop_cycle = plt.rcParams['axes.prop_cycle']
        colors = prop_cycle.by_key()['color']
        percentiles = [0.16, 0.84]
        _, stats_nld, stats_gsf = self.plot_vector_stats(ax, samples,
                                                         percentiles,
                                                         colors[1])

        if plot_model_stats or return_stats:
            if plot_model_stats:
                ax_stats = ax
            else:
                fig, ax_stats = plt.subplots(2, 1)  # dummy
            Emin = samples["nld"].iloc[0].E[-1]
            x = np.linspace(Emin, normalizer_nld.norm_pars.Sn[0], num=20)
            stats_nld_model = \
                self.plot_nld_ext_stats(ax_stats[0], x=x, samples=samples,
                                        normalizer_nld=normalizer_nld,
                                        percentiles=percentiles,
                                        color=colors[2],
                                        label="model")

            E = samples["gsf"].iloc[0].E
            xlow = np.linspace(0.001, E[0], num=20)
            xhigh = np.linspace(E[-1], normalizer_gsf.norm_pars.Sn[0], num=20)
            stats_gsf_model = \
                self.plot_gsf_ext_stats(ax_stats[1], xlow=xlow, xhigh=xhigh,
                                        samples=samples,
                                        normalizer_gsf=normalizer_gsf,
                                        percentiles=percentiles,
                                        color=colors[2])

        if add_figlegend:
            fig.legend(loc=9, ncol=4, frameon=True)
            fig.subplots_adjust(left=0.1, right=0.9, top=0.8, bottom=0.1)

        if return_stats:
            return fig, ax, (stats_nld, stats_gsf, stats_nld_model,
                             stats_gsf_model)
        else:
            return fig, ax

[docs]    def samples_from_res(self,
                         random_state: Optional[np.random.RandomState] = None) -> pd.DataFrame:
        """Draw random samples from results with transformed nld & gsf

        Args:
            random_state (np.random.RandomState, optional): random state,
                set by default such that a repeated use of the function
                gives the same results.

        Returns:
            Samples
        """

        for i in range(len(self.res)):
            nld = self.extractor.nld[i].copy()
            gsf = self.extractor.gsf[i].copy()
            nld.to_MeV()
            gsf.to_MeV()
            samples_ = copy.deepcopy(self.res[i].samples)
            df = tranform_nld_gsf(samples_, nld, gsf,
                                  random_state=random_state)
            if i == 0:
                samples = df
            else:
                samples = samples.append(df)
        return samples

[docs]    def plot_selection(self, *, ax: Tuple[Any, Any],
                       samples: pd.DataFrame,
                       normalizer_nld: Optional[NormalizerNLD],
                       normalizer_gsf: Optional[NormalizerGSF],
                       n_plot: Optional[bool] = 5,
                       random_state: Optional[np.random.RandomState] = None) -> None:
        """ Plot some nld and gsf samples

        Args:
            ax (Tuple[Any, Any]): The matplotlib axis to plot onto.
                Creates axis is not provided.
            samples (pd.DataFrame): Random samples from results with
                transformed nld & gsf
            normalizer_nld (NormalizerNLD): NormalizerNLD instance.
                Note: Input a copy as the instance attributes will be changed.
            normalizer_gsf (NormalizerGSF): NormalizerGSF instance.
                Note: Input a copy as the instance attributes will be changed.
            n_plot (bool, optional): Number of (nld, gsf) samples to plot
            random_state (np.random.RandomState, optional): random state, set
                by default such that a repeated use of the function gives the
                same results.

        """
        if random_state is None:  # cannot move this to definition
            random_state = np.random.RandomState(98765)

        # dummy to draw axis
        n_plot_ = n_plot if n_plot > 0 else 1

        markers = itertools.cycle(('o', 'x', 'P', 'v', '^', '<', '>', '8',
                                   's', 'p', '*', 'h', 'H', 'D', 'd', 'X'))

        res = copy.deepcopy(self.res[0])  # dummy for later
        selection = samples.sample(n=n_plot_, random_state=random_state)
        for i, (_, row) in enumerate(selection.iterrows()):
            res.nld = row["nld"]
            res.gsf = row["gsf"]
            res.pars = row.to_dict()

            # workaround for the tuple (currently just a float)
            keys_workaround = ["T", "Eshift"]
            for key in keys_workaround:
                res.pars[key] = [res.pars[key], np.nan]

            # create extrapolations of gsf
            normalizer_gsf._gsf = row["gsf"]
            normalizer_gsf.extrapolate(row["gsf"])
            res.gsf_model_low = normalizer_gsf.model_low
            res.gsf_model_high = normalizer_gsf.model_high
            for model in [res.gsf_model_low, res.gsf_model_high]:
                model.shift_after = model.shift

            add_label = True if i == 0 else False
            plot_fitregion = True if i == 0 else False

            marker = next(markers)
            normalizer_nld.plot(ax=ax[0], results=res,
                                add_label=add_label, alpha=1/n_plot_,
                                add_figlegend=False,
                                plot_fitregion=plot_fitregion,
                                marker=marker, linestyle="--")
            normalizer_gsf.plot(ax=ax[1], results=res, add_label=False,
                                alpha=1/n_plot_,
                                add_figlegend=False,
                                plot_fitregion=plot_fitregion,
                                marker=marker, linestyle="--")

        if n_plot == 0:  # remove lines if dummy only
            l_keep = []
            for l in ax[0].lines:
                if l._label in ["known levels", "_nld(Sn)"]:
                    l_keep.append(l)
            ax[0].lines = [*l_keep]
            ax[1].lines = []

[docs]    def samples_unify_E(self, df: pd.DataFrame) -> None:
        """ Get nlds (or gsfs) on common energy grid, if diff. lengths

        After applying, DataFrame with vectors are on common
        energy grid. Missing values filled with np.nan.

        Args:
            df: DataFrame collumn with vectors to be put on unified energy grid
        """
        extend = np.array(list(map(vec_extend, df)))

        # if equal already: no need to proceede
        if np.equal(extend[0], extend).all():
            return None

        iEmin = np.argmin(extend[:, 0])
        iEmax = np.argmax(extend[:, 1])

        Eunion = np.union1d(df.iloc[iEmin].E, df.iloc[iEmax].E)

        # define helper function
        def vec_extend_values(vec, Eunion):  # noqa
            Eold = vec.E
            interEunion = np.in1d(Eunion, Eold, assume_unique=True)
            interEold = np.in1d(Eold, Eunion, assume_unique=True)
            vec.E = Eunion
            val_union = np.full_like(Eunion, np.nan)
            val_union[interEunion] = vec.values[interEold]
            vec.values = val_union

        # map function to each element
        array = df.to_numpy()
        np.array([vec_extend_values(xi, Eunion) for xi in array])

[docs]    @staticmethod
    def plot_vector_stats(ax: Tuple[Any, Any],
                          samples: pd.DataFrame,
                          percentiles: Tuple[float, float],
                          color: Any) -> Tuple[Any,
                                               pd.DataFrame, pd.DataFrame]:
        """ Helper for plotting of stats from a vector

        Args:
            ax: Axes to plot on
            samples: Samples of (nld, gsf, transfromation parameters)
            percentiles: Lower and upper percentile to plot the shading
            color (Any): Color of nld and gsf

        Returns:
            Lines of fill between, and stats DataFrame of nld and gsf
        """

        # workaround as DataFrame changes limits & labels
        lim_ax0 = [ax[0].get_xlim(), ax[0].get_ylim()]
        lim_ax1 = [ax[1].get_xlim(), ax[1].get_ylim()]
        label_ax0 = [ax[0].get_xlabel(), ax[0].get_ylabel()]
        label_ax1 = [ax[1].get_xlabel(), ax[1].get_ylabel()]

        # define helper function
        def vec_to_values(x, out):  # noqa
            idx, vec = x
            out[idx] = vec.values

        df = samples["nld"]
        E = df.iloc[0].E
        stats_nld = EnsembleNormalizer.stats_from_df(df, fmap=vec_to_values,
                                            shape_out=(len(df), len(E)),
                                            percentiles=percentiles)  # noqa
        stats_nld["x"] = E
        stats_nld.plot(x="x", y="median", ax=ax[0], legend=False,
                       color=color)

        df = samples["gsf"]
        E = df.iloc[0].E
        stats_gsf = EnsembleNormalizer.stats_from_df(df, fmap=vec_to_values,
                                            shape_out=(len(df), len(E)),
                                            percentiles=percentiles)  # noqa
        stats_gsf["x"] = df.iloc[0].E
        stats_gsf.plot(x="x", y="median", ax=ax[1], legend=False,
                       color=color)

        pc_diff = percentiles[1] - percentiles[0]
        label = fr"{(pc_diff)*100:.0f}\% credibility interval"
        ax[0].fill_between(stats_nld.x, stats_nld["low"], stats_nld["high"],
                           alpha=0.3,
                           label=label)
        lines = ax[1].fill_between(stats_gsf.x, stats_gsf["low"],
                                   stats_gsf["high"],
                                   alpha=0.3)

        ax[0].set_xlim(lim_ax0[0])
        ax[0].set_ylim(lim_ax0[1])
        ax[1].set_xlim(lim_ax1[0])
        ax[1].set_ylim(lim_ax1[1])

        ax[0].set_xlabel(label_ax0[0])
        ax[0].set_ylabel(label_ax0[1])
        ax[1].set_xlabel(label_ax1[0])
        ax[1].set_ylabel(label_ax1[1])

        return lines, stats_nld, stats_gsf

[docs]    @staticmethod
    def stats_from_df(df: pd.DataFrame,
                      fmap: Callable[[Vector, np.array], None],
                      shape_out: Tuple[int, int],
                      percentiles: Tuple[float, float]) -> pd.DataFrame:
        """Helper to get median, 68% or similar from a collection of Vectors

        Args:
            df: DataFrame of Vectors
            fmap: Applied to each row of df
            shape_out: output shape
            percentiles: Upper and lower percentiles for the stats
                (eg. 16 and 84% for something like 1 sigma)

        Returns:
            DataFrame with collumns ['median', 'low', 'high'] and entries for
            each energy of the Vectors.
        """
        array = df.to_numpy()
        out = np.zeros(shape_out)
        indexed = enumerate(array)
        # apply fmap to each row of the DataFrame
        np.fromiter(map(fmap, indexed, repeat(out)), dtype=float)
        stats = pd.DataFrame(out[:, :])
        stats = pd.DataFrame({'median': stats.median(),
                              'low': stats.quantile(percentiles[0], axis=0),
                              'high': stats.quantile(percentiles[1], axis=0)})
        return stats

[docs]    @staticmethod
    def plot_nld_ext_stats(ax: Any, *, x: np.ndarray,
                           samples: pd.DataFrame,
                           normalizer_nld: NormalizerNLD,
                           percentiles: Tuple[float, float],
                           **kwargs) -> pd.DataFrame:
        """Helper for plotting statistics of the nld extrapolation

        Args:
            ax: The matplotlib axis to plot onto.
            x: x-axis values (Energies)
            samples: Samples of (nld, gsf, transfromation parameters)
            normalizer_nld: NormalizerNLD instance.
            percentiles: Lower and upper percentile to plot
                the shading
            **kwargs: Additional keyword arguments for the plotting

        Returns:
            DataFrame with collumns ['median', 'low', 'high'] and entries for
            each energy of the Vectors.
        """
        # define helper function
        def to_values(a, out):  # noqa
            idx, val = a
            out[idx] = normalizer_nld.model(E=x, T=val[0], Eshift=val[1])

        df = samples[["T", "Eshift"]]
        stats = EnsembleNormalizer.stats_from_df(df, fmap=to_values,
                                                 shape_out=(len(df), len(x)),
                                                 percentiles=percentiles)
        ax.plot(x, stats["median"], **kwargs)
        ax.fill_between(x, stats["low"], stats["high"],
                        alpha=0.3, **kwargs)
        return stats

[docs]    @staticmethod
    def plot_gsf_ext_stats(ax: Any, *, xlow: np.ndarray, xhigh: np.ndarray,
                           samples: pd.DataFrame,
                           normalizer_gsf: NormalizerGSF,
                           percentiles: Tuple[float, float],
                           color: Any) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Helper for plotting statistics of the gsf extrapolations

        Args:
            ax: The matplotlib axis to plot onto.
            xlow: x-axis values (Energies) of the lower extrapolation
            xhigh: x-axis values (Energies) of the higher extrapolation
            samples: Samples of (nld, gsf, transfromation parameters)
            normalizer_gsf: NormalizerNLD instance.
            percentiles: Lower and upper percentile to plot
                the shading
            **kwargs: Additional keyword arguments for the plotting

        Returns:
            Tuple of DataFrames with collumns ['median', 'low', 'high'] and
            entries for each energy of the Vectors. First entry is for the
            lower extrapolation, secondentry is for the higher extrapolation
        """

        # define helper function
        def to_values(a, out):  # noqa
            idx, val = a
            low, high = normalizer_gsf.extrapolate(val, E=[xlow, xhigh])
            out[0, idx] = low.values
            out[1, idx] = high.values

        assert len(xlow) == len(xhigh)
        array = samples["gsf"].to_numpy()
        out = np.zeros((2, len(array), len(xlow)))
        indexed = enumerate(array)

        low, high = percentiles
        np.fromiter(map(to_values, indexed, repeat(out)), dtype=float)
        # stats for upper and lower model
        stats = []
        for i, arr in enumerate([out[0, :, :], out[1, :, :]]):
            stat = pd.DataFrame(arr)
            stat = pd.DataFrame({'median': stat.median(),
                                 'low': stat.quantile(low, axis=0),
                                 'high': stat.quantile(high, axis=0)})
            ax.plot(xlow if i == 0 else xhigh, stat["median"], color=color)
            ax.fill_between(xlow if i == 0 else xhigh,
                            stat["low"], stat["high"],
                            alpha=0.3, color=color)
            stats.append(stat)
        return stats

[docs]    def save_results_txt(self, path: Optional[Union[str, Path]] = None,
                         suffix: str = None):
        """ Save results as txt

        Uses a folder to save nld, gsf, and the samples (converted to an array)

        Args:
            path: The path to the save directory. If the
                value is None, 'self.path' will be used.
        """
        path = Path(path) if path is not None else Path(self.path)
        path.mkdir(exist_ok=True, parents=True)
        for i, res in enumerate(self.res):
            super().save_results_txt(path, nld=res.nld, gsf=res.gsf,
                                     samples=res.samples, suffix=i)


def tranform_nld_gsf(samples: dict, nld=None, gsf=None,
                     N_max: int = 100,
                     random_state=None) -> pd.DataFrame:
    """
    Use a list(dict) of samples of `A`, `B`, and `alpha` parameters from
    multinest to transform a (list of) nld and/or gsf sample(s). Can be used
    to normalize the nld and/or gsf

    Args:
        samples (dict): Multinest samples.
        nld (om.Vector or list/array[om.Vector], optional):
            nld ("unnormalized")
        gsf (om.Vector or list/array[om.Vector], optional):
            gsf ("unnormalized")
        N_max (int, optional): Maximum number of samples returned if `nld`
                               and `gsf` is a list/array
        random_state (optional): random state, set by default such that
                                 a repeted use of the function gives the same
                                 results.

    Returns:
        DataFrame with randomly selected samples of nld, gsf and the
        corresponding parameters. The nld and gsf are transformed

    """

    # Need to sweep though multinest samples in random order
    # as they are ordered with decreasing likelihood by default
    for key, value in samples.items():
        N_multinest = len(value)
        break
    randlist = np.arange(N_multinest)
    if random_state is None:  # cannot move this to definition
        random_state = np.random.RandomState(65489)
    random_state.shuffle(randlist)  # works in-place

    if nld is not None:
        A = samples["A"]
        alpha = samples["alpha"]
        if type(nld) is Vector:
            N = min(N_multinest, N_max)
        else:
            N = len(nld)
    nld_trans = []

    if gsf is not None:
        B = samples["B"]
        alpha = samples["alpha"]
        if type(gsf) is Vector:
            N = min(N_multinest, N_max)
        else:
            N = len(gsf)
    gsf_trans = []

    # transform the list
    for i in range(N):
        i_multi = randlist[i]
        # nld loop
        try:
            if type(nld) is Vector:
                nld_tmp = nld
            else:
                nld_tmp = nld[i]
            nld_tmp = nld_tmp.transform(alpha=alpha[i_multi],
                                        const=A[i_multi], inplace=False)
            nld_trans.append(nld_tmp)
        except:
            pass
        # gsf loop
        try:
            if type(gsf) is Vector:
                gsf_tmp = gsf
            else:
                gsf_tmp = gsf[i]
            gsf_tmp = gsf_tmp.transform(alpha=alpha[i_multi],
                                        const=B[i_multi], inplace=False)
            gsf_trans.append(gsf_tmp)
        except:
            pass

    df = pd.DataFrame()
    df = df.from_dict(samples)
    selected = df.iloc[randlist[:N]]
    selected["nld"] = nld_trans
    selected["gsf"] = gsf_trans

    return selected


def vec_extend(vector: Vector) -> Tuple[float, float]:
    """ Get the lowest and highest energy of the vector

    Assumes that the energy array is sorted.

    Args:
        vector: input Vector

    Returns:
        Tuple of lowest and highest energies of the Vector"""
    return vector.E[0], vector.E[-1]