Source code for txpipe.magnification

from .base_stage import PipelineStage
from .data_types import (
    YamlFile,
    HDFFile,
    FitsFile,
    PNGFile,
)
from .utils import LensNumberDensityStats, Splitter, rename_iterated
from .binning import build_tomographic_classifier, apply_classifier
import numpy as np
import warnings
from ceci.config import StageParameter



[docs]
class TXSSIMagnification(PipelineStage):
    """
    Compute the magnification coefficients using SSI outputs

    Follows the methodology of https://arxiv.org/abs/2012.12825
    and https://arxiv.org/abs/2209.09782
    """

    name = "TXSSIMagnification"
    parallel = False

    inputs = [
        ("binned_lens_catalog_nomag", HDFFile),
        ("binned_lens_catalog_mag", HDFFile),
    ]

    outputs = [
        ("magnification_coefficients", HDFFile),
        ("magnification_plot", PNGFile),
    ]

    config_options = {
        "chunk_rows": StageParameter(int, 10000, msg="Number of rows to process in each chunk."),
        # TODO: add a way for "applied_magnification" to be determined from the SSI inputs directly
        "applied_magnification": StageParameter(
            float, 1.02, msg="Magnification applied to the 'magnified' SSI catalog."
        ),
        "n_patches": StageParameter(int, 20, msg="Number of patches for bootstrap error estimation."),
        "bootstrap_error": StageParameter(bool, True, msg="Whether to compute bootstrap errors."),
    }

    def run(self):
        """
        Run the analysis for this stage.
        """
        from scipy.stats import bootstrap

        # load the catalogs
        nomag_cat = self.open_input("binned_lens_catalog_nomag")
        mag_cat = self.open_input("binned_lens_catalog_mag")

        # get/estimate the magnification applied to the magnified catalog
        # TO DO: could put this as optional config item in TXSSIIngest
        # so that it is done automatically
        mu = self.config["applied_magnification"]
        deltak = (1.0 - 1.0 / mu) / 2.0

        if self.config["bootstrap_error"]:
            # split no-mag sample up into patches using K-means clustering
            # to be used in bootstrap errors
            cluster = self.calc_cluster_patches(nomag_cat)

        # compute fractional change in number count for both samples
        # in each lens bin and for the full "2D" sample
        nbins = nomag_cat["lens/"].attrs["nbin_lens"]
        outfile = self.setup_output(nbins + 1)
        for ibin in range(nbins + 1):
            if ibin == nbins:  # last "bin" will be the full 2d sample (all bins)
                bin_label = "all"
                print(f"Computing magnification coefficient for bin_all")
            else:
                bin_label = ibin
                print(f"Computing magnification coefficient for bin {ibin + 1}")

            # single redshift bins should be low enough number
            # count that we can load the whole sample
            w1 = nomag_cat[f"lens/bin_{bin_label}/weight"][:]
            w2 = mag_cat[f"lens/bin_{bin_label}/weight"][:]

            # compute mag coeff with the whole sample
            csample = self.calc_frac_change(w1, w2) / deltak
            self.write_output(outfile, csample, ibin)

            if self.config["bootstrap_error"]:
                print(f"Computing uncertainty with bootstrap")

                # assign each object to it's patch from the K-means clustering
                # 1=unmagnified, 2=magnified
                patch1 = cluster.predict(
                    np.transpose(
                        [
                            nomag_cat[f"lens/bin_{bin_label}/ra"][:],
                            nomag_cat[f"lens/bin_{bin_label}/dec"][:],
                        ]
                    )
                )
                patch2 = cluster.predict(
                    np.transpose(
                        [
                            mag_cat[f"lens/bin_{bin_label}/ra"][:],
                            mag_cat[f"lens/bin_{bin_label}/dec"][:],
                        ]
                    )
                )

                # count the (weighted) number of objects in each patch
                label1, index_array1 = np.unique(patch1, return_inverse=True)
                weighted_counts1 = np.bincount(index_array1, weights=w1)
                label2, index_array2 = np.unique(patch2, return_inverse=True)
                weighted_counts2 = np.bincount(index_array2, weights=w2)
                assert (label1 == np.arange(cluster.n_clusters)).all(), "empty bootstrap patches"
                assert (label2 == np.arange(cluster.n_clusters)).all(), "empty bootstrap patches"

                # define a function that computes fractional number count change
                # and takes only patch IDs as input (i.e. fix the counts to this z bin)
                def calc_frac_change_patches_ibin(patch_ids):
                    return self.calc_frac_change_patches(patch_ids, weighted_counts1, weighted_counts2)

                boot = bootstrap(np.array([label1]), calc_frac_change_patches_ibin)

                csample_boot_mean = np.mean(boot.bootstrap_distribution) / deltak
                csample_boot_err = np.std(boot.bootstrap_distribution) / np.abs(deltak)
                self.write_output_boot(outfile, csample_boot_mean, csample_boot_err, ibin)

        self.plot_results(outfile)
        outfile.close()

    def calc_cluster_patches(self, nomag_cat):
        """
        Split the lens sample into patches using K-means clustering
        We are using the full no-magnification sample to do this
        but we could also use jackknife patches
        based on this SSI run's mask if we have it...
        """
        import sklearn.cluster

        n_patches = self.config["n_patches"]
        cluster = sklearn.cluster.KMeans(n_clusters=n_patches, n_init="auto")

        # limit number of objects to 1000 per patch to stop the training
        # from taking too long
        max_per_patch = 1000
        maxobj = max_per_patch * n_patches

        n_obj_lens = nomag_cat[f"lens/bin_all/ra"].shape[0]

        if n_obj_lens > maxobj:
            # Here we are reading random ra,decs directly from the HDF file
            # If the speed of this gets too slow we can look into ways of speeding things up
            index = np.sort(np.random.choice(n_obj_lens, size=maxobj, replace=False))
            ra = nomag_cat[f"lens/bin_all/ra"][index]
            dec = nomag_cat[f"lens/bin_all/dec"][index]
        else:
            ra = nomag_cat[f"lens/bin_all/ra"][:]
            dec = nomag_cat[f"lens/bin_all/dec"][:]
        radec = np.transpose([ra, dec])
        cluster.fit(radec)

        return cluster

    def setup_output(self, nlens):
        import h5py

        f = self.open_output("magnification_coefficients")
        g = f.create_group("magnification")

        g.create_dataset(
            "csample",
            shape=(nlens,),
        )
        g.create_dataset(
            "ctotal",
            shape=(nlens,),
        )
        g.create_dataset(
            "alpha",
            shape=(nlens,),
        )

        if self.config["bootstrap_error"]:
            g.create_dataset(
                "csample_boot_mean",
                shape=(nlens,),
            )
            g.create_dataset(
                "csample_boot_err",
                shape=(nlens,),
            )
        return f

    def write_output(self, outfile, csample, ibin):
        outfile["magnification/csample"][ibin] = csample
        outfile["magnification/ctotal"][ibin] = csample - 2.0
        outfile["magnification/alpha"][ibin] = csample / 2.0

    def write_output_boot(self, outfile, csample_mean, csample_err, ibin):
        outfile["magnification/csample_boot_mean"][ibin] = csample_mean
        outfile["magnification/csample_boot_err"][ibin] = csample_err

    def plot_results(self, outfile):
        """
        plot magnifiation coeff vs bin index
        """
        import matplotlib.pyplot as plt

        csample = outfile["magnification/csample"][:]
        index = np.arange(len(csample))

        fig = self.open_output("magnification_plot", wrapper=True)
        plt.plot(index, csample, "o", label="SSI", color="b")

        if self.config["bootstrap_error"]:
            mean = outfile["magnification/csample_boot_mean"][:]
            err = outfile["magnification/csample_boot_err"][:]
            plt.errorbar(index + 0.05, mean, err, fmt=".", color="r", label="SSI bootstrap")

        plt.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
        plt.xlabel("lens bin")
        plt.ylabel(r"$C_{\rm sample}$")
        plt.legend()
        plt.axhline(0, color="k", ls="-")
        plt.axhline(2, color="k", ls="--")
        fig.close()

    @staticmethod
    def calc_frac_change(weights_nomag, weights_mag):
        N0 = np.sum(weights_nomag)
        N1 = np.sum(weights_mag)
        return (N1 - N0) / N0

    @staticmethod
    def calc_frac_change_patches(patch_ids, counts1=None, counts2=None):
        N0 = np.sum(counts1[patch_ids])
        N1 = np.sum(counts2[patch_ids])
        return (N1 - N0) / N0