Source code for txpipe.psf_diagnostics

from ceci.config import StageParameter
from .base_stage import PipelineStage
from .data_types import Directory, ShearCatalog, HDFFile, PNGFile, TomographyCatalog, RandomsCatalog, YamlFile, TextFile
import numpy as np
import sys
import os
from .utils import read_shear_catalog_type
from .plotting import manual_step_histogram
from .shear_calibration import Calibrator


STAR_PSF_USED = 0
STAR_PSF_RESERVED = 1
STAR_TYPES = [STAR_PSF_USED, STAR_PSF_RESERVED]
STAR_TYPE_NAMES = ["PSF-used", "PSF-reserved"]
STAR_COLORS = ["blue", "orange", "green"]



[docs]
class TXPSFDiagnostics(PipelineStage):
    """
    Make histograms of PSF values

    This makes histograms of e1 and e2 residuals, and of the fractional
    PSF size excess.
    """

    name = "TXPSFDiagnostics"

    inputs = [("star_catalog", HDFFile)]
    outputs = [
        ("e1_psf_residual_hist", PNGFile),
        ("e2_psf_residual_hist", PNGFile),
        ("T_frac_psf_residual_hist", PNGFile),
        ("star_psf_stats", YamlFile),
    ]
    config_options = {}

    def run(self):
        # PSF tests
        import matplotlib

        matplotlib.use("agg")

        # Make plotters - in each case we supply the function to make
        # the thing we want to histogram, the name, label, and range
        plotters = [
            self.plot_histogram(
                lambda d: (d["measured_e1"] - d["model_e1"]),
                "e1_psf_residual",
                "$e_{1}-e_{1,psf}$",
                np.linspace(-0.1, 0.1, 51),
            ),
            self.plot_histogram(
                lambda d: (d["measured_e2"] - d["model_e2"]),
                "e2_psf_residual",
                "$e_{2}-e_{2,psf}$",
                np.linspace(-0.1, 0.1, 51),
            ),
            self.plot_histogram(
                lambda d: (d["measured_T"] - d["model_T"]) / d["measured_T"],
                "T_frac_psf_residual",
                "$(T-T{psf})/T{psf}$",
                np.linspace(-0.1, 0.1, 51),
            ),
        ]

        # Start off each of the plotters.  This will make them all run up to the
        # first yield statement, then pause and wait for the first chunk of data
        for plotter in plotters:
            print(plotter)
            plotter.send(None)

        # Create an iterator for reading through the input data.
        # This method automatically splits up data among the processes,
        # so the plotters should handle this.
        chunk_rows = 10000
        star_cols = [
            "measured_e1",
            "model_e1",
            "measured_e2",
            "model_e2",
            "measured_T",
            "model_T",
            "calib_psf_used",
            "calib_psf_reserved",
        ]
        iter_star = self.iterate_hdf("star_catalog", "stars", star_cols, chunk_rows)

        # Now loop through each chunk of input data, one at a time.
        # Each time we get a new segment of data, which goes to all the plotters
        for ((start, end, data),) in zip(iter_star):
            print(f"Read data {start} - {end}")
            data["star_type"] = load_star_type(data)
            # This causes each data = yield statement in each plotter to
            # be given this data chunk as the variable data.
            # data.update(data2)
            # data.update(data3)
            for plotter in plotters:
                plotter.send(data)

        # Tell all the plotters to finish, collect together results from the different
        # processors, and make their final plots.  Plotters need to respond
        # to the None input and
        results = {}
        for plotter in plotters:
            try:
                plotter.send(None)
            except StopIteration as err:
                # This feels like some cruel abuse because I don't
                # really understand how co-routines should be used.
                results.update(err.value)

        # save all counts, means, and sigmas to yaml file
        with self.open_output("star_psf_stats", wrapper=True) as f:
            f.write(results)

    def plot_histogram(self, function, output_name, xlabel, edges):
        import matplotlib.pyplot as plt
        from parallel_statistics import ParallelHistogram, ParallelMeanVariance

        print(f"Plotting {output_name}")
        counters = {s: ParallelHistogram(edges) for s in STAR_TYPES}
        stats = ParallelMeanVariance(len(STAR_TYPES))

        while True:
            data = yield

            if data is None:
                break

            # Get the specific quantity to plot
            # from the full data set
            value = function(data)

            for s in STAR_TYPES:
                r = data["star_type"] == s
                # build up histogram
                d = value[r]
                counters[s].add_data(d)
                # Also record data for mean and variance calculation
                # There are some NaNs in here which are presumably
                # not propagated to PSF estimation anyway
                stats.add_data(s, d[np.isfinite(d)])

        counts = {}
        for s in STAR_TYPES:
            counts[s] = counters[s].collect(self.comm)

        fig = self.open_output(output_name + "_hist", wrapper=True, figsize=(6, 15))
        for s in STAR_TYPES:
            plt.subplot(len(STAR_TYPES), 1, s + 1)
            manual_step_histogram(edges, counts[s], color=STAR_COLORS[s], label=STAR_TYPE_NAMES[s])
            plt.title(STAR_TYPE_NAMES[s])
            plt.xlabel(xlabel)
            plt.ylabel(r"$N_{stars}$")
        fig.close()

        n, mu, sigma2 = stats.collect(self.comm)
        results = {}
        for s in STAR_TYPES:
            name = STAR_TYPE_NAMES[s]
            results[f"{output_name}_{name}_n"] = int(n[s])
            results[f"{output_name}_{name}_mu"] = float(mu[s])
            results[f"{output_name}_{name}_std"] = float(sigma2[s] ** 0.5)

        return results




[docs]
class TXPSFMomentCorr(PipelineStage):
    """
    Compute PSF Moments
    """

    name = "TXPSFMomentCorr"
    parallel = False
    inputs = [("star_catalog", HDFFile)]
    outputs = [("moments_stats", HDFFile)]

    config_options = {
        "min_sep": StageParameter(float, default=0.5, msg="Minimum separation in arcmin."),
        "max_sep": StageParameter(float, default=250.0, msg="Maximum separation in arcmin."),
        "nbins": StageParameter(int, default=20, msg="Number of bins."),
        "bin_slop": StageParameter(float, default=0.01, msg="Bin slop for treecorr."),
        "sep_units": StageParameter(str, default="arcmin", msg="Separation units."),
        "subtract_mean": StageParameter(bool, default=False, msg="Subtract mean from data."),
    }

    def run(self):
        import treecorr
        import h5py
        import matplotlib

        self.config["num_threads"] = int(os.environ.get("OMP_NUM_THREADS", 1))

        matplotlib.use("agg")

        # Load the star catalog
        ra, dec, e_meas, e_mod, de, moment4_meas, moment4_mod, dmoment4, star_type = self.load_stars()

        moments_stats = {}
        for t in STAR_TYPES:
            s = np.where(star_type == t)[0]
            moments_stats[0, t] = self.compute_momentcorr(0, s, ra, dec, e_mod, e_mod)
            moments_stats[1, t] = self.compute_momentcorr(1, s, ra, dec, moment4_mod, e_mod)
            moments_stats[2, t] = self.compute_momentcorr(2, s, ra, dec, moment4_mod, moment4_mod)
            moments_stats[3, t] = self.compute_momentcorr(3, s, ra, dec, de, e_mod)
            moments_stats[4, t] = self.compute_momentcorr(4, s, ra, dec, de, moment4_mod)
            moments_stats[5, t] = self.compute_momentcorr(5, s, ra, dec, dmoment4, moment4_mod)
            moments_stats[6, t] = self.compute_momentcorr(6, s, ra, dec, e_mod, dmoment4)
            moments_stats[7, t] = self.compute_momentcorr(7, s, ra, dec, de, de)
            moments_stats[8, t] = self.compute_momentcorr(8, s, ra, dec, de, dmoment4)
            moments_stats[9, t] = self.compute_momentcorr(9, s, ra, dec, dmoment4, dmoment4)

        self.save_stats(moments_stats)

    def load_stars(self):
        with self.open_input("star_catalog") as f:
            g = f["stars"]
            ra = g["ra"][:]
            dec = g["dec"][:]
            e1meas = g["measured_e1"][:]
            e2meas = g["measured_e2"][:]
            e1mod = g["model_e1"][:]
            e2mod = g["model_e2"][:]
            e1meas_moment4 = g["measured_moment4_e1"][:]
            e2meas_moment4 = g["measured_moment4_e2"][:]
            e1mod_moment4 = g["model_moment4_e1"][:]
            e2mod_moment4 = g["model_moment4_e2"][:]

            # Note: definition are flipped for this paper
            de1 = e1mod - e1meas
            de2 = e2mod - e2meas
            de1_moment4 = e1mod_moment4 - e1meas_moment4
            de2_moment4 = e2mod_moment4 - e2meas_moment4

            if self.config["subtract_mean"]:
                e_meas = np.array((e1meas - np.mean(e1meas), e2meas - np.mean(e2meas)))
                e_mod = np.array((e1mod - np.mean(e1mod), e2mod - np.mean(e2mod)))
                de = np.array((de1 - np.mean(de1), de2 - np.mean(de2)))
                moment4_meas = np.array(
                    (e1meas_moment4 - np.mean(e1meas_moment4), e2meas_moment4 - np.mean(e2meas_moment4))
                )
                moment4_mod = np.array((e1mod_moment4 - np.mean(e1mod_moment4), e2mod_moment4 - np.mean(e2mod_moment4)))
                dmoment4 = np.array((de1_moment4 - np.mean(de1_moment4), de2_moment4 - np.mean(de2_moment4)))
            else:
                e_meas = np.array((e1meas, e2meas))
                e_mod = np.array((e1mod, e2mod))
                de = np.array((de1, de2))
                moment4_meas = np.array((e1meas_moment4, e2meas_moment4))
                moment4_mod = np.array((e1mod_moment4, e2mod_moment4))
                dmoment4 = np.array((de1_moment4, de2_moment4))

            star_type = load_star_type(g)

        return ra, dec, e_meas, e_mod, de, moment4_meas, moment4_mod, dmoment4, star_type

    def compute_momentcorr(self, i, s, ra, dec, q1, q2):
        # select a subset of the stars
        ra = ra[s]
        dec = dec[s]
        q1 = q1[:, s]
        q2 = q2[:, s]
        n = len(ra)
        print(f"Computing Rowe statistic rho_{i} from {n} objects")
        import treecorr

        corr = treecorr.GGCorrelation(self.config)
        cat1 = treecorr.Catalog(ra=ra, dec=dec, g1=q1[0], g2=q1[1], ra_units="deg", dec_units="deg")
        cat2 = treecorr.Catalog(ra=ra, dec=dec, g1=q2[0], g2=q2[1], ra_units="deg", dec_units="deg")
        corr.process(cat1, cat2)
        return corr.meanr, corr.xip, corr.varxip**0.5

    def moment_plots(self, rowe_stats):
        # First plot - stats 1,3,4
        import matplotlib.pyplot as plt
        import matplotlib.transforms as mtrans

        f = self.open_output("moments", wrapper=True, figsize=(10, 6 * len(STAR_TYPES)))

        for s in STAR_TYPES:
            ax = plt.subplot(len(STAR_TYPES), 1, s + 1)

            for j, i in enumerate([0]):
                theta, xi, err = rowe_stats[i, s]
                tr = mtrans.offset_copy(ax.transData, f.file, 0.05 * (j - 1), 0, units="inches")
                plt.errorbar(
                    theta,
                    abs(xi),
                    err,
                    fmt=".",
                    label=rf"$\rho_{i}$",
                    capsize=3,
                    # transform=tr,
                )
            plt.xscale("log")
            plt.yscale("log")
            plt.xlabel(r"$\theta$")
            plt.ylabel(r"$\xi_+(\theta)$")
            plt.legend()
            plt.title(STAR_TYPE_NAMES[s])
        f.close()

    def save_stats(self, moments_stats):
        f = self.open_output("moments_stats")
        g = f.create_group("moment_statistics")
        for i in range(0, 10):
            for s in STAR_TYPES:
                theta, xi, err = moments_stats[i, s]
                name = STAR_TYPE_NAMES[s]
                h = g.create_group(f"moment4_{i}_{name}")
                h.create_dataset("theta", data=theta)
                h.create_dataset("xi_plus", data=xi)
                h.create_dataset("xi_err", data=err)
        f.close()




[docs]
class TXTauStatistics(PipelineStage):
    """
    Compute and plot PSF Tau statistics where the definition of Tau stats are eq. 20-22
    of Gatti et al 2023.
    """

    name = "TXTauStatistics"
    parallel = False
    inputs = [
        ("binned_shear_catalog", ShearCatalog),
        ("star_catalog", HDFFile),
        ("rowe_stats", HDFFile),
    ]

    outputs = [
        ("tau0", PNGFile),
        ("tau2", PNGFile),
        ("tau5", PNGFile),
        ("tau_stats", HDFFile),
    ]

    config_options = {
        "min_sep": StageParameter(float, default=0.5, msg="Minimum separation in arcmin."),
        "max_sep": StageParameter(float, default=250.0, msg="Maximum separation in arcmin."),
        "nbins": StageParameter(int, default=20, msg="Number of bins."),
        "bin_slop": StageParameter(float, default=0.01, msg="Bin slop for treecorr."),
        "sep_units": StageParameter(str, default="arcmin", msg="Separation units."),
        "npatch": StageParameter(int, default=150, msg="Number of patches for variance calculation and processing."),
        "psf_size_units": StageParameter(str, default="sigma", msg="Units for PSF size."),
        "subtract_mean": StageParameter(bool, default=False, msg="Subtract mean from data."),
        "dec_cut": StageParameter(bool, default=True, msg="Affects KiDS-1000 only."),
        "star_type": StageParameter(str, default="PSF-reserved", msg="Star type to use."),
        "cov_method": StageParameter(str, default="bootstrap", msg="Covariance method."),
        "flip_g2": StageParameter(bool, default=False, msg="Flip g2 sign."),
        "tomographic": StageParameter(bool, default=True, msg="Use tomographic bins."),
    }

    def run(self):
        import treecorr
        import h5py
        import matplotlib
        import emcee

        matplotlib.use("agg")

        tau_stats = {}
        p_bestfits = {}

        # Load star properties
        ra, dec, e_psf, e_mod, de_psf, T_f, star_type = self.load_stars()

        for s in STAR_TYPES:
            if STAR_TYPE_NAMES[s] != self.config.star_type:
                continue

            # Load precomputed Rowe stats
            rowe_stats = self.load_rowe(s)

            # set ranges for mcmc
            ranges = {}
            ranges["alpha"] = [-1000.00, 1000.00]
            ranges["beta"] = [-1000.00, 1000.00]
            ranges["eta"] = [-1000.00, 1000.00]

            tau_stats[s] = {}
            p_bestfits[s] = {}
            # Load galaxies
            if self.config["tomographic"]:
                with self.open_input("binned_shear_catalog") as f:
                    nzbin = list(range(f["shear"].attrs["nbin_source"])) + ["all"]
            else:
                nzbin = ["all"]
            print(nzbin)
            for n in nzbin:
                gal_ra, gal_dec, gal_g1, gal_g2, gal_weight = self.load_galaxies(n)
                gal_g = np.array((gal_g1, gal_g2))
                print(f"computing tau stats for bin {n}")
                # Joint tau 0-2-5 data vector and cov
                # tau_stats contains [ang, tau0, tau2, tau5, cov]
                tau_stats[s][f"bin_{n}"] = self.compute_all_tau(
                    gal_ra, gal_dec, gal_g, gal_weight, s, ra, dec, e_psf, e_mod, de_psf, T_f, star_type
                )

                # Run simple mcmc to find best-fit values for alpha,beta,eta
                p_bestfits[s][f"bin_{n}"] = self.sample(tau_stats[s][f"bin_{n}"], rowe_stats, ranges)
        # Save tau stats in a h5 file
        self.save_tau_stats(tau_stats, p_bestfits)

        # Save tau plots
        self.tau_plots(tau_stats)

    def load_rowe(self, s):
        f = self.open_input("rowe_stats")
        rowe_stats = {}

        for i in 0, 1, 2, 3, 4, 5:
            name = STAR_TYPE_NAMES[s]
            theta = f["rowe_statistics"][f"rowe_{i}_{name}"]["theta"][:]
            xi_plus = f["rowe_statistics"][f"rowe_{i}_{name}"]["xi_plus"][:]
            xi_minus = f["rowe_statistics"][f"rowe_{i}_{name}"]["xi_minus"][:]
            xip_err = f["rowe_statistics"][f"rowe_{i}_{name}"]["xip_err"][:]
            xim_err = f["rowe_statistics"][f"rowe_{i}_{name}"]["xim_err"][:]

            rowe_stats[i] = theta, xi_plus, xi_minus, xip_err, xim_err

        return rowe_stats

    def sample(self, tau_stats, rowe_stats, ranges, nwalkers=32, ndim=3):
        """
        Run a simple mcmc chain to detemine the best-fit values for alpha, beta, eta
        """
        import emcee
        import scipy.optimize as optimize

        _, _, _, _, _, _, _, cov = tau_stats
        mask = cov.diagonal() > 0
        cov = cov[mask][:, mask]
        # We debias the covariance with two correction factors from Hartlap et. al 2006 eq. 17 and Dodelson & Schneider 2013 eq. 28
        Njk = self.config["npatch"]
        Ndv = 6 * self.config["nbins"]
        if not Ndv < Njk - 2:
            raise ValueError("Hartlap correction only valid for N data vector elements < N jackknife patches - 2")
        f_H = 1.0 * (Njk - Ndv - 2) / (Njk - 1)
        f_DS = 1 / (1 + (Ndv - ndim) * (Njk - Ndv - 2) / (Njk - Ndv - 1) / (Njk - Ndv - 4))
        cov = cov / f_H / f_DS

        _, tau0p, tau0m, tau2p, tau2m, tau5p, tau5m, _ = tau_stats
        invcov = np.linalg.inv(cov)

        initguess = [0, -1, 1]
        bestpars = optimize.minimize(
            self.chi2, initguess, args=(tau_stats, rowe_stats, invcov, mask), method="Nelder-Mead", tol=1e-6
        )
        initpos = [bestpars.x + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)]

        print("Computing best-fit alpha, beta, eta")
        sampler = emcee.EnsembleSampler(
            nwalkers, ndim, self.logProb, args=(tau_stats, rowe_stats, ranges, invcov, mask)
        )
        sampler.run_mcmc(initpos, nsteps=4000, progress=False)
        flat_samples = sampler.get_chain(discard=2000, flat=True)

        ret = {}
        var = ["alpha", "beta", "eta"]
        for i, v in enumerate(var):
            mcmc = np.percentile(flat_samples[:, i], [16, 50, 84])
            q = np.diff(mcmc)
            ret[v] = {"median": mcmc[1], "lerr": q[0], "rerr": q[1]}

        chi2 = self.chi2(
            [ret["alpha"]["median"], ret["beta"]["median"], ret["eta"]["median"]], tau_stats, rowe_stats, invcov, mask
        )
        # degree of freedom = nbins * 6 (i.e. tau 0+/-, tau2+/-, tau5+/-) - ndim
        dof = (self.config["nbins"] * 6) - ndim
        print("Best-fit finished. Resulting chi^2/dof: ", chi2 / dof)
        return ret

    def logPrior(self, theta, ranges):
        """
        If parameter in defined range return 0, otherwise -np.inf
        """
        alpha, beta, eta = theta

        if (
            (ranges["alpha"][0] < alpha < ranges["alpha"][1])
            and (ranges["beta"][0] < beta < ranges["beta"][1])
            and (ranges["eta"][0] < eta < ranges["eta"][1])
        ):
            return 0.0
        return -np.inf

    def chi2(self, theta, tau_stats, rowe_stats, invcov, mask):
        """
        Compute likelihood
        theta     : parameters
        tau_stats : Measured tau stats
        rowe_stats: Measured rowe stats to be used for Tau
        invcov    : Inverse covariance matrix
        """

        # Load parameters
        alpha, beta, eta = theta

        # Load rowe and tau
        _, rowe0p, rowe0m, _, _ = rowe_stats[0]
        _, rowe1p, rowe1m, _, _ = rowe_stats[1]
        _, rowe2p, rowe2m, _, _ = rowe_stats[2]
        _, rowe3p, rowe3m, _, _ = rowe_stats[3]
        _, rowe4p, rowe4m, _, _ = rowe_stats[4]
        _, rowe5p, rowe5m, _, _ = rowe_stats[5]
        _, tau0p, tau0m, tau2p, tau2m, tau5p, tau5m, _ = tau_stats

        # Create combined template
        T0p = alpha * rowe0p + beta * rowe2p + eta * rowe5p
        T0m = alpha * rowe0m + beta * rowe2m + eta * rowe5m
        T2p = alpha * rowe2p + beta * rowe1p + eta * rowe4p
        T2m = alpha * rowe2m + beta * rowe1m + eta * rowe4m
        T5p = alpha * rowe5p + beta * rowe4p + eta * rowe3p
        T5m = alpha * rowe5m + beta * rowe4m + eta * rowe3m

        # Create data and template vector
        Tall = np.concatenate([T0p, T0m, T2p, T2m, T5p, T5m])[mask]
        Xall = np.concatenate([tau0p, tau0m, tau2p, tau2m, tau5p, tau5m])[mask]

        return np.dot(Xall - Tall, np.dot(Xall - Tall, invcov))

    def logProb(self, theta, tau_stats, rowe_stats, ranges, invcov, mask):
        lp = self.logPrior(theta, ranges)
        if not np.isfinite(lp):
            return -np.inf
        return lp + (-0.5) * self.chi2(theta, tau_stats, rowe_stats, invcov, mask)

    def compute_all_tau(self, gra, gdec, g, gw, s, sra, sdec, e_meas, e_mod, de, T_f, star_type):
        """
        Compute tau0, tau2, tau5.
        All three needs to be computed at once due to covariance.

        gra    : RA of galaxies
        gdec   : DEC of galaxies
        g      : shear for observed galaxies np.array((e1, e2)
        gw     : weights

        s      : indices of stars to use in calculation
        sra    : RA of stars
        sdec   : DEC of stars

        e_meas : measured ellipticities of PSF from stars -- np.array((e1meas, e2meas))
        e_mod  : model ellipticities of PSF               -- np.array((e1mod, e2mod))
        de     : e_meas-e_mod                              -- np.array((de1, de2))
        T_f    : (T_meas - T_model)/T_meas                -- np.array(T_f)
        """

        import treecorr

        p = e_mod
        q = de
        w = e_meas * T_f

        sra, sdec = np.array((sra[star_type == s], sdec[star_type == s]))  # Get ra/dec for specific stars
        p = np.array(([p[0][star_type == s], p[1][star_type == s]]))  # Get p for specific stars
        q = np.array(([q[0][star_type == s], q[1][star_type == s]]))  # Get q for specific stars
        w = np.array(([w[0][star_type == s], w[1][star_type == s]]))  # Get w for specific stars

        print(f"Computing Tau 0,2,5 and the covariance")
        self.config["num_threads"] = int(os.environ.get("OMP_NUM_THREADS", 1))

        # Load all catalogs
        catg = treecorr.Catalog(
            ra=gra, dec=gdec, g1=g[0], g2=g[1], w=gw, ra_units="deg", dec_units="deg", npatch=self.config["npatch"]
        )  # galaxy shear
        catp = treecorr.Catalog(
            ra=sra, dec=sdec, g1=p[0], g2=p[1], ra_units="deg", dec_units="deg", patch_centers=catg.patch_centers
        )  # e_model
        catq = treecorr.Catalog(
            ra=sra, dec=sdec, g1=q[0], g2=q[1], ra_units="deg", dec_units="deg", patch_centers=catg.patch_centers
        )  # (e_* - e_model)
        catw = treecorr.Catalog(
            ra=sra, dec=sdec, g1=w[0], g2=w[1], ra_units="deg", dec_units="deg", patch_centers=catg.patch_centers
        )  # (e_*(T_* - T_model)/T_* )

        # Compute all corrleations
        print(f"Computing shear-e_model correlation {catg.nobj} x {catp.nobj}")
        corr0 = treecorr.GGCorrelation(self.config)
        corr0.process(catg, catp)
        print(f"Computing shear-(e_*-e_model) correlation {catg.nobj} x {catq.nobj}")
        corr2 = treecorr.GGCorrelation(self.config)
        corr2.process(catg, catq)
        print(f"Computing shear-(e_*(T_* - T_model)/T_*) correlation {catg.nobj} x {catw.nobj}")
        corr5 = treecorr.GGCorrelation(self.config)
        corr5.process(catg, catw)

        # Estimate covariance using bootstrap. The ordering is xip0,xim0,xip2,xim2,xip5,xim5.
        cov = treecorr.estimate_multi_cov([corr0, corr2, corr5], self.config.cov_method)
        if self.config["cov_method"] == "shot":
            cov = np.diag(cov)

        return corr0.meanr, corr0.xip, corr0.xim, corr2.xip, corr2.xim, corr5.xip, corr5.xim, cov

    def save_tau_stats(self, tau_stats, p_bestfits):
        """
        tau_stats: (dict) dictionary containing theta,tau0,tau2,tau5 and cov
        """
        f = self.open_output("tau_stats")
        g = f.create_group("tau_statistics")

        for s in STAR_TYPES:
            if STAR_TYPE_NAMES[s] != self.config.star_type:
                continue
            if self.config["tomographic"]:
                with self.open_input("binned_shear_catalog") as h:
                    nbin = list(range(h["shear"].attrs["nbin_source"])) + ["all"]
            else:
                nbin = ["all"]
            for n in nbin:
                theta, tau0p, tau0m, tau2p, tau2m, tau5p, tau5m, cov = tau_stats[s][f"bin_{n}"]
                name = STAR_TYPE_NAMES[s]
                h = g.create_group(f"tau_{name}/bin_{n}/")
                h.create_dataset("theta", data=theta)
                h.create_dataset("tau0p", data=tau0p)
                h.create_dataset("tau0m", data=tau0m)
                h.create_dataset("tau2p", data=tau2p)
                h.create_dataset("tau2m", data=tau2m)
                h.create_dataset("tau5p", data=tau5p)
                h.create_dataset("tau5m", data=tau5m)
                h.create_dataset("cov", data=cov)

                # Also save best-fit values
                h = g.create_group(f"bestfits_{name}/bin_{n}")
                alpha_err = max(p_bestfits[s][f"bin_{n}"]["alpha"]["lerr"], p_bestfits[s][f"bin_{n}"]["alpha"]["rerr"])
                beta_err = max(p_bestfits[s][f"bin_{n}"]["beta"]["lerr"], p_bestfits[s][f"bin_{n}"]["beta"]["rerr"])
                eta_err = max(p_bestfits[s][f"bin_{n}"]["eta"]["lerr"], p_bestfits[s][f"bin_{n}"]["eta"]["rerr"])
                h.create_dataset("alpha", data=p_bestfits[s][f"bin_{n}"]["alpha"]["median"])
                h.create_dataset("alpha_err", data=alpha_err)
                h.create_dataset("beta", data=p_bestfits[s][f"bin_{n}"]["beta"]["median"])
                h.create_dataset("beta_err", data=beta_err)
                h.create_dataset("eta", data=p_bestfits[s][f"bin_{n}"]["eta"]["median"])
                h.create_dataset("eta_err", data=eta_err)

        f.close()

    def load_stars(self):
        with self.open_input("star_catalog") as f:
            g = f["stars"]
            ra = g["ra"][:]
            dec = g["dec"][:]
            e1meas = g["measured_e1"][:]
            e2meas = g["measured_e2"][:]
            e1mod = g["model_e1"][:]
            e2mod = g["model_e2"][:]
            if self.config["flip_g2"]:
                e2meas *= -1
                e2mod *= -1
            de1 = e1meas - e1mod
            de2 = e2meas - e2mod

            if self.config["psf_size_units"] == "Tmeas":
                T_frac = (g["measured_T"][:] - g["model_T"][:]) / g["measured_T"][:]
            elif self.config["psf_size_units"] == "Tmodel":
                T_frac = (g["measured_T"][:] - g["model_T"][:]) / g["model_T"][:]
            elif self.config["psf_size_units"] == "sigma":
                T_frac = (g["measured_T"][:] ** 2 - g["model_T"][:] ** 2) / g["measured_T"][:] ** 2
            else:
                raise ValueError("Need to specify measured_T: Tmeas/Tmodel/sigma")

            if self.config["subtract_mean"]:
                e_meas = np.array((e1meas - np.mean(e1meas), e2meas - np.mean(e2meas)))
                e_mod = np.array((e1mod - np.mean(e1mod), e2mod - np.mean(e2mod)))
                de = np.array((de1 - np.mean(de1), de2 - np.mean(de2)))

            else:
                e_meas = np.array((e1meas, e2meas))
                e_mod = np.array((e1mod, e2mod))
                de = np.array((de1, de2))

            star_type = load_star_type(g)

        return ra, dec, e_meas, e_mod, de, T_frac, star_type

    def load_galaxies(self, zbin):
        with self.open_input("binned_shear_catalog") as f:
            g = f[f"shear/bin_{zbin}/"]
            ra = g["ra"][:]
            dec = g["dec"][:]
            g1 = g["g1"][:]
            g2 = g["g2"][:]
            w = g["weight"][:]

        if self.config["subtract_mean"]:
            g1 = g1 - np.average(g1, weights=w)
            g2 = g2 - np.average(g1, weights=w)

        if self.config["flip_g2"]:
            g2 *= -1

        return ra, dec, g1, g2, w

    def tau_plots(self, tau_stats):
        # Plot non-tomographic stats 1,3,4
        import matplotlib.pyplot as plt
        import matplotlib.transforms as mtrans

        for s in STAR_TYPES:
            if STAR_TYPE_NAMES[s] != self.config.star_type:
                continue

            theta, tau0p, tau0m, tau2p, tau2m, tau5p, tau5m, cov = tau_stats[s]["bin_all"]
            nb = len(theta)
            taus = {"0p": tau0p, "0m": tau0m, "2p": tau2p, "2m": tau2m, "5p": tau5p, "5m": tau5m}
            errs = {
                "0p": np.diag(cov[0 * nb : 1 * nb, 0 * nb : 1 * nb]) ** 0.5,
                "0m": np.diag(cov[1 * nb : 2 * nb, 1 * nb : 2 * nb]) ** 0.5,
                "2p": np.diag(cov[2 * nb : 3 * nb, 2 * nb : 3 * nb]) ** 0.5,
                "2m": np.diag(cov[3 * nb : 4 * nb, 3 * nb : 4 * nb]) ** 0.5,
                "5p": np.diag(cov[4 * nb : 5 * nb, 4 * nb : 5 * nb]) ** 0.5,
                "5m": np.diag(cov[5 * nb : 6 * nb, 5 * nb : 6 * nb]) ** 0.5,
            }

            for j, i in enumerate([0, 2, 5]):
                f = self.open_output(f"tau{i}", wrapper=True, figsize=(10, 6 * len(STAR_TYPES)))
                ax = plt.subplot(len(STAR_TYPES), 1, s + 1)

                tr = mtrans.offset_copy(ax.transData, f.file, 0.05 * (j - 1), 0, units="inches")
                plt.errorbar(
                    theta,
                    taus[f"{i}p"],
                    errs[f"{i}p"],
                    fmt=".",
                    label=rf"$\tau_{i}+$",
                    capsize=3,
                    color="blue",
                    transform=tr,
                )
                plt.errorbar(
                    theta,
                    taus[f"{i}m"],
                    errs[f"{i}m"],
                    fmt=".",
                    label=rf"$\tau_{i}-$",
                    capsize=3,
                    color="red",
                    transform=tr,
                )

                plt.xscale("log")
                plt.yscale("symlog")
                plt.xlabel(r"$\theta$")
                plt.ylabel(rf"$\tau_{i}(\theta)$")
                plt.legend()
                plt.title("Non-tomographic" + STAR_TYPE_NAMES[s])

                f.close()




[docs]
class TXRoweStatistics(PipelineStage):
    """
    Compute and plot PSF Rowe statistics

    People sometimes think that these statistics are called the Rho statistics,
    because we usually use that letter for them.  Not so.  They are named after
    the wonderful Barney Rowe, now sadly lost to high finance,
    who presented the first two of them in MNRAS 404, 350 (2010).
    """

    name = "TXRoweStatistics"
    parallel = False
    inputs = [("star_catalog", HDFFile), ("patch_centers", TextFile)]
    outputs = [
        ("rowe134", PNGFile),
        ("rowe25", PNGFile),
        ("rowe0", PNGFile),
        ("rowe_stats", HDFFile),
    ]

    config_options = {
        "min_sep": StageParameter(float, default=0.5, msg="Minimum separation in arcmin."),
        "max_sep": StageParameter(float, default=250.0, msg="Maximum separation in arcmin."),
        "nbins": StageParameter(int, default=20, msg="Number of bins."),
        "bin_slop": StageParameter(float, default=0.01, msg="Bin slop for treecorr."),
        "sep_units": StageParameter(str, default="arcmin", msg="Separation units."),
        "psf_size_units": StageParameter(str, default="sigma", msg="Units for PSF size."),
        "definition": StageParameter(str, default="des-y1", msg="Definition for Rowe statistics."),
        "subtract_mean": StageParameter(bool, default=False, msg="Subtract mean from data."),
        "star_type": StageParameter(str, default="PSF-reserved", msg="Star type to use."),
        "var_method": StageParameter(str, default="bootstrap", msg="Variance method."),
        "flip_g2": StageParameter(bool, default=False, msg="Flip g2 sign."),
    }

    def run(self):
        import treecorr
        import h5py
        import matplotlib

        matplotlib.use("agg")
        self.config["num_threads"] = int(os.environ.get("OMP_NUM_THREADS", 1))
        ra, dec, e_meas, e_mod, de, T_f, star_type = self.load_stars()
        rowe_stats = {}
        for t in STAR_TYPES:
            s = np.where(star_type == t)[0]
            if len(s) == 0:
                continue
            if self.config["definition"] == "des-y1" or self.config["definition"] == "des-y3":
                print("Using DES's definition of Rowes")
                rowe_stats[0, t] = self.compute_rowe(0, s, ra, dec, e_mod, e_mod)
                rowe_stats[1, t] = self.compute_rowe(1, s, ra, dec, de, de)
                rowe_stats[2, t] = self.compute_rowe(2, s, ra, dec, de, e_mod)
                rowe_stats[3, t] = self.compute_rowe(3, s, ra, dec, e_meas * T_f, e_meas * T_f)
                rowe_stats[4, t] = self.compute_rowe(4, s, ra, dec, de, e_meas * T_f)
                rowe_stats[5, t] = self.compute_rowe(5, s, ra, dec, e_mod, e_meas * T_f)
            elif self.config["definition"] == "hsc-y1" or self.config["definition"] == "hsc-y3":
                print("Using HSC's definition of Rowes")
                # de =  g_meas - g_model
                # dT = (T_meas - T_model)/Tmodel
                # rho1 = de x de
                # rho2 = e_mod x de
                # rho3 = e_mod(dT/T_mod) x e_mod(dT/T_mod)
                # rho4 = de(e_mod*dT/T_mod)
                # rho5 = e_mod*(e_mod*dT/T_mod)
                rowe_stats[0, t] = self.compute_rowe(0, s, ra, dec, e_mod, e_mod)
                rowe_stats[1, t] = self.compute_rowe(1, s, ra, dec, de, de)
                rowe_stats[2, t] = self.compute_rowe(2, s, ra, dec, de, e_mod)
                rowe_stats[3, t] = self.compute_rowe(3, s, ra, dec, e_mod * T_f, e_mod * T_f)
                rowe_stats[4, t] = self.compute_rowe(4, s, ra, dec, de, e_mod * T_f)
                rowe_stats[5, t] = self.compute_rowe(5, s, ra, dec, e_mod, e_meas * T_f)
        self.save_stats(rowe_stats)
        self.rowe_plots(rowe_stats)

    def load_stars(self):
        with self.open_input("star_catalog") as f:
            g = f["stars"]
            ra = g["ra"][:]
            dec = g["dec"][:]
            e1meas = g["measured_e1"][:]
            e2meas = g["measured_e2"][:]
            e1mod = g["model_e1"][:]
            e2mod = g["model_e2"][:]
            if self.config["flip_g2"]:
                e2meas *= -1
                e2mod *= -1
            de1 = e1meas - e1mod
            de2 = e2meas - e2mod
            if self.config["psf_size_units"] == "Tmeas":
                T_frac = (g["measured_T"][:] - g["model_T"][:]) / g["measured_T"][:]
            elif self.config["psf_size_units"] == "Tmodel":
                T_frac = (g["measured_T"][:] - g["model_T"][:]) / g["model_T"][:]
            elif self.config["psf_size_units"] == "sigma":
                T_frac = (g["measured_T"][:] ** 2 - g["model_T"][:] ** 2) / g["measured_T"][:] ** 2
            else:
                sys.exit("Need to specify measured_T: Tmeas/Tmodel/sigma")

            if self.config["subtract_mean"]:
                e_meas = np.array((e1meas - np.mean(e1meas), e2meas - np.mean(e2meas)))
                e_mod = np.array((e1mod - np.mean(e1mod), e2mod - np.mean(e2mod)))
                de = np.array((de1 - np.mean(de1), de2 - np.mean(de2)))

            else:
                e_meas = np.array((e1meas, e2meas))
                e_mod = np.array((e1mod, e2mod))
                de = np.array((de1, de2))

            star_type = load_star_type(g)

        return ra, dec, e_meas, e_mod, de, T_frac, star_type

    def compute_rowe(self, i, s, ra, dec, q1, q2):
        # select a subset of the stars
        ra = ra[s]
        dec = dec[s]
        q1 = q1[:, s]
        q2 = q2[:, s]
        n = len(ra)
        print(f"Computing Rowe statistic rho_{i} from {n} objects")
        import treecorr

        corr = treecorr.GGCorrelation(self.config)
        cat1 = treecorr.Catalog(
            ra=ra,
            dec=dec,
            g1=q1[0],
            g2=q1[1],
            ra_units="deg",
            dec_units="deg",
            patch_centers=self.get_input("patch_centers"),
        )
        cat2 = treecorr.Catalog(
            ra=ra,
            dec=dec,
            g1=q2[0],
            g2=q2[1],
            ra_units="deg",
            dec_units="deg",
            patch_centers=self.get_input("patch_centers"),
        )
        corr.process(cat1, cat2)
        return corr.meanr, corr.xip, corr.xim, corr.varxip**0.5, corr.varxim**0.5

    def rowe_plots(self, rowe_stats):
        # First plot - stats 1,3,4
        import matplotlib.pyplot as plt
        import matplotlib.transforms as mtrans

        f = self.open_output("rowe0", wrapper=True, figsize=(10, 6 * len(STAR_TYPES)))
        for s in STAR_TYPES:
            if STAR_TYPE_NAMES[s] != self.config.star_type:
                continue
            ax = plt.subplot(len(STAR_TYPES), 1, s + 1)

            for j, i in enumerate([0]):
                theta, xip, xim, xip_err, xim_err = rowe_stats[i, s]
                tr = mtrans.offset_copy(ax.transData, f.file, 0.05 * (j - 1), 0, units="inches")
                plt.errorbar(
                    theta,
                    xip,
                    xip_err,
                    fmt=".",
                    label=rf"$\rho_{i}+$",
                    capsize=3,
                    color="blue",
                    transform=tr,
                )
                plt.errorbar(
                    theta,
                    xim,
                    xim_err,
                    fmt=".",
                    label=rf"$\rho_{i}-$",
                    capsize=3,
                    color="blue",
                    transform=tr,
                )
            plt.xscale("log")
            plt.yscale("symlog")
            plt.xlabel(r"$\theta$")
            plt.ylabel(r"$\xi(\theta)$")
            plt.legend()
            plt.title(STAR_TYPE_NAMES[s])
        f.close()

        f = self.open_output("rowe134", wrapper=True, figsize=(10, 6 * len(STAR_TYPES)))
        for s in STAR_TYPES:
            if STAR_TYPE_NAMES[s] != self.config.star_type:
                continue
            ax = plt.subplot(len(STAR_TYPES), 1, s + 1)
            mkr = ["o", "s", "D"]
            for j, i in enumerate([1, 3, 4]):
                theta, xip, xim, xip_err, xim_err = rowe_stats[i, s]
                tr = mtrans.offset_copy(ax.transData, f.file, 0.05 * (j - 1), 0, units="inches")
                plt.errorbar(
                    theta,
                    xip,
                    xip_err,
                    marker=mkr[j],
                    label=rf"$\rho_{i}+$",
                    capsize=3,
                    color="blue",
                    transform=tr,
                )
                plt.errorbar(
                    theta,
                    xim,
                    xim_err,
                    marker=mkr[j],
                    label=rf"$\rho_{i}-$",
                    capsize=3,
                    color="red",
                    transform=tr,
                )
            plt.xscale("log")
            plt.yscale("symlog")
            plt.xlabel(r"$\theta$")
            plt.ylabel(r"$\xi(\theta)$")
            plt.legend()
            plt.title(STAR_TYPE_NAMES[s])
        f.close()

        f = self.open_output("rowe25", wrapper=True, figsize=(10, 6 * len(STAR_TYPES)))
        for s in STAR_TYPES:
            if STAR_TYPE_NAMES[s] != self.config.star_type:
                continue
            ax = plt.subplot(len(STAR_TYPES), 1, s + 1)
            mkr = ["o", "s"]
            for j, i in enumerate([2, 5]):
                theta, xip, xim, xip_err, xim_err = rowe_stats[i, s]
                tr = mtrans.offset_copy(ax.transData, f.file, 0.05 * j - 0.025, 0, units="inches")
                plt.errorbar(
                    theta,
                    xip,
                    xip_err,
                    fmt=mkr[j],
                    label=rf"$\rho_{i}+$",
                    capsize=3,
                    color="blue",
                    transform=tr,
                )
                plt.errorbar(
                    theta,
                    xim,
                    xim_err,
                    fmt=mkr[j],
                    label=rf"$\rho_{i}-$",
                    capsize=3,
                    color="red",
                    transform=tr,
                )
                plt.title(STAR_TYPE_NAMES[s])
                plt.xscale("log")
                plt.yscale("symlog")
                plt.xlabel(r"$\theta$")
                plt.ylabel(r"$\xi(\theta)$")
                plt.legend()
        f.close()

    def save_stats(self, rowe_stats):
        f = self.open_output("rowe_stats")
        g = f.create_group("rowe_statistics")
        for i in 0, 1, 2, 3, 4, 5:
            for s in STAR_TYPES:
                if STAR_TYPE_NAMES[s] != self.config.star_type:
                    continue
                theta, xip, xim, xip_err, xim_err = rowe_stats[i, s]
                name = STAR_TYPE_NAMES[s]
                h = g.create_group(f"rowe_{i}_{name}")
                h.create_dataset("theta", data=theta)
                h.create_dataset("xi_plus", data=xip)
                h.create_dataset("xi_minus", data=xim)
                h.create_dataset("xip_err", data=xip_err)
                h.create_dataset("xim_err", data=xim_err)
        f.close()




[docs]
class TXGalaxyStarShear(PipelineStage):
    """
    Compute and plot star x galaxy and star x star correlations.

    These are shape correlations; they differ from the Rowe stats slightly
    because they measure star values not interpolated PSF values.
    """

    name = "TXGalaxyStarShear"
    parallel = False

    inputs = [
        ("shear_catalog", ShearCatalog),
        ("star_catalog", HDFFile),
        ("shear_tomography_catalog", TomographyCatalog),
    ]
    outputs = [
        ("star_shear_test", PNGFile),
        ("star_star_test", PNGFile),
        ("star_shear_stats", HDFFile),
    ]

    config_options = {
        "min_sep": StageParameter(float, default=0.5, msg="Minimum separation in arcmin."),
        "max_sep": StageParameter(float, default=250.0, msg="Maximum separation in arcmin."),
        "nbins": StageParameter(int, default=20, msg="Number of bins."),
        "bin_slop": StageParameter(float, default=0.1, msg="Bin slop for treecorr."),
        "sep_units": StageParameter(str, default="arcmin", msg="Separation units."),
        "psf_size_units": StageParameter(str, default="sigma", msg="Units for PSF size."),
        "shear_catalog_type": StageParameter(str, default="metacal", msg="Shear catalog type."),
        "star_type": StageParameter(str, default="PSF-reserved", msg="Star type to use."),
        "flip_g2": StageParameter(bool, default=False, msg="Flip g2 sign."),
    }

    def run(self):
        import treecorr
        import h5py
        import matplotlib

        matplotlib.use("agg")
        self.config["num_threads"] = int(os.environ.get("OMP_NUM_THREADS", 1))
        ra, dec, e_psf, de_psf, star_type = self.load_stars()
        ra_gal, dec_gal, g1, g2, weight = self.load_galaxies()

        # only use reserved stars for this statistics
        galaxy_star_stats = {}
        star_star_stats = {}
        for t in STAR_TYPES:
            s = star_type == t
            if STAR_TYPE_NAMES[t] != self.config.star_type:
                continue
            galaxy_star_stats[1, t] = self.compute_galaxy_star(ra, dec, e_psf, s, ra_gal, dec_gal, g1, g2, weight)
            galaxy_star_stats[2, t] = self.compute_galaxy_star(ra, dec, de_psf, s, ra_gal, dec_gal, g1, g2, weight)
            star_star_stats[1, t] = self.compute_star_star(ra, dec, e_psf, s, ra_gal, dec_gal, e_psf, weight)
            star_star_stats[2, t] = self.compute_star_star(ra, dec, de_psf, s, ra_gal, dec_gal, de_psf, weight)

        self.save_stats(galaxy_star_stats, star_star_stats)
        self.galaxy_star_plots(galaxy_star_stats)
        self.star_star_plots(star_star_stats)

    def load_stars(self):
        with self.open_input("star_catalog") as f:
            g = f["stars"]
            ra = g["ra"][:]
            dec = g["dec"][:]
            e1 = g["measured_e1"][:]
            e2 = g["measured_e2"][:]
            de1 = e1 - g["model_e1"][:]
            de2 = e2 - g["model_e2"][:]
            e_psf = np.array((e1, e2))
            de_psf = np.array((de1, de2))

            star_type = load_star_type(g)

        return ra, dec, e_psf, de_psf, star_type

    def load_galaxies(self):
        # Columns we need from the shear catalog
        # TODO: not sure of an application where we would want to use true shear but can be added
        cat_type = read_shear_catalog_type(self)
        _, cal = Calibrator.load(self.get_input("shear_tomography_catalog"))

        # load tomography data
        with self.open_input("shear_tomography_catalog") as f:
            source_bin = f["tomography/bin"][:]
            mask = source_bin != -1  # Only use the sources that pass the fiducial cuts
            if cat_type == "metacal":
                R_total_2d = f["response/R_S_2d"][:] + f["response/R_gamma_mean_2d"][:]
            elif cat_type == "metadetect":
                R_total_2d = f["response/R_2d"][:]

        with self.open_input("shear_catalog") as f:
            g = f["shear"]

            # Get the base catalog for metadetect
            if cat_type == "metadetect":
                g = g["00"]

            ra = g["ra"][:][mask]
            dec = g["dec"][:][mask]

            if cat_type == "metacal":
                g1 = g["g1"][:][mask]
                g2 = g["g2"][:][mask]
                weight = g["weight"][:][mask]

            elif cat_type == "metadetect":
                g1 = g["g1"][:][mask]
                g2 = g["g2"][:][mask]
                weight = g["weight"][:][mask]

            else:
                g1 = g["g1"][:][mask]
                g2 = g["g2"][:][mask]
                weight = g["weight"][:][mask]
                sigma_e = g["sigma_e"][:][mask]
                m = g["m"][:][mask]

        if self.config["flip_g2"]:
            g2 *= -1

        if cat_type == "metacal" or cat_type == "metadetect":
            # We use S=0 here because we have already included it in R_total
            g1, g2 = cal.apply(g1, g2)

        elif cat_type == "lensfit":
            # In KiDS, the additive bias is calculated and removed per North and South field
            # therefore, we add dec to split data into these fields.
            # You can choose not to by setting dec_cut = 90 in the config, for example.
            g1, g2 = cal.apply(g1, g2, dec)
        else:
            print("Shear calibration type not recognized.")

        return ra, dec, g1, g2, weight

    def compute_galaxy_star(self, ra, dec, q, s, ra_gal, dec_gal, g1, g2, weight):
        # select the star type
        ra = ra[s]
        dec = dec[s]
        q = q[:, s]  # PSF quantity, either ellipticity or residual
        n = len(ra)
        i = len(ra_gal)
        print(f"Computing galaxy-cross-star statistic from {n} stars and {i} galaxies")
        import treecorr

        corr = treecorr.GGCorrelation(self.config)
        cat1 = treecorr.Catalog(ra=ra, dec=dec, g1=q[0], g2=q[1], ra_units="deg", dec_units="deg")
        cat2 = treecorr.Catalog(
            ra=ra_gal,
            dec=dec_gal,
            g1=g1,
            g2=g2,
            ra_units="deg",
            dec_units="deg",
            w=weight,
        )
        corr.process(cat1, cat2)
        return corr.meanr, corr.xip, corr.varxip**0.5

    def compute_star_star(self, ra, dec, q1, s, ra_gal, dec_gal, q2, weight):
        # select the star type
        ra = ra[s]
        dec = dec[s]
        q1 = q1[:, s]
        q2 = q2[:, s]
        n = len(ra)
        i = len(ra_gal)
        print(f"Computing galaxy-cross-star statistic from {n} stars and {i} galaxies")
        import treecorr

        corr = treecorr.GGCorrelation(self.config)
        cat1 = treecorr.Catalog(ra=ra, dec=dec, g1=q1[0], g2=q1[1], ra_units="deg", dec_units="deg")
        cat2 = treecorr.Catalog(ra=ra, dec=dec, g1=q2[0], g2=q2[1], ra_units="deg", dec_units="deg")
        corr.process(cat1, cat2)
        return corr.meanr, corr.xip, corr.varxip**0.5

    def galaxy_star_plots(self, galaxy_star_stats):
        import matplotlib.pyplot as plt
        import matplotlib.transforms as mtrans

        f = self.open_output("star_shear_test", wrapper=True, figsize=(10, 6 * len(STAR_TYPES)))
        TEST_TYPES = ["shear", "residual"]
        for s in STAR_TYPES:
            if STAR_TYPE_NAMES[s] != self.config.star_type:
                continue
            ax = plt.subplot(len(STAR_TYPES), 1, s + 1)
            for j, i in enumerate([1, 2]):
                theta, xi, err = galaxy_star_stats[i, s]
                plt.errorbar(
                    theta,
                    abs(xi),
                    err,
                    fmt=".",
                    label=f"galaxy cross star {TEST_TYPES[i - 1]}",
                    capsize=3,
                )
                plt.title(STAR_TYPE_NAMES[s])
                plt.xscale("log")
                plt.yscale("log")
                plt.xlabel(r"$\theta$")
                plt.ylabel(r"$\xi_+(\theta)$")
                plt.legend()
        f.close()

    def star_star_plots(self, star_star_stats):
        import matplotlib.pyplot as plt
        import matplotlib.transforms as mtrans

        f = self.open_output("star_star_test", wrapper=True, figsize=(10, 6 * len(STAR_TYPES)))
        TEST_TYPES = ["shear", "residual"]
        for s in STAR_TYPES:
            if STAR_TYPE_NAMES[s] != self.config.star_type:
                continue
            ax = plt.subplot(len(STAR_TYPES), 1, s + 1)
            for j, i in enumerate([1, 2]):
                theta, xi, err = star_star_stats[i, s]
                plt.errorbar(
                    theta,
                    abs(xi),
                    err,
                    fmt=".",
                    label=f"star cross star {TEST_TYPES[i - 1]}",
                    capsize=3,
                )
                plt.title(STAR_TYPE_NAMES[s])
                plt.xscale("log")
                plt.yscale("log")
                plt.xlabel(r"$\theta$")
                plt.ylabel(r"$\xi_+(\theta)$")
                plt.legend()
        f.close()

    def save_stats(self, galaxy_star_stats, star_star_stats):
        f = self.open_output("star_shear_stats")
        g = f.create_group("star_cross_galaxy")
        for i in 1, 2:
            for s in STAR_TYPES:
                if STAR_TYPE_NAMES[s] != self.config.star_type:
                    continue
                theta, xi, err = galaxy_star_stats[i, s]
                name = STAR_TYPE_NAMES[s]
                h = g.create_group(f"star_cross_galaxy_{i}_{name}")
                h.create_dataset("theta", data=theta)
                h.create_dataset("xi_plus", data=xi)
                h.create_dataset("xi_err", data=err)

        g = f.create_group("star_cross_star")
        for i in 1, 2:
            for s in STAR_TYPES:
                if STAR_TYPE_NAMES[s] != self.config.star_type:
                    continue
                theta, xi, err = star_star_stats[i, s]
                name = STAR_TYPE_NAMES[s]
                h = g.create_group(f"star_cross_star_{i}_{name}")
                h.create_dataset("theta", data=theta)
                h.create_dataset("xi_plus", data=xi)
                h.create_dataset("xi_err", data=err)
        f.close()




[docs]
class TXGalaxyStarDensity(PipelineStage):
    """
    Compute and plot star x galaxy and star x star density correlations

    This version uses the source catalog, though a version with lens
    samples might also be useful.
    """

    name = "TXGalaxyStarDensity"
    parallel = False

    inputs = [
        ("shear_catalog", ShearCatalog),
        ("star_catalog", HDFFile),
        ("shear_tomography_catalog", TomographyCatalog),
        ("random_cats", RandomsCatalog),
    ]
    outputs = [
        ("star_density_test", PNGFile),
        ("star_density_stats", HDFFile),
    ]

    config_options = {
        "min_sep": StageParameter(float, default=0.5, msg="Minimum separation in arcmin."),
        "max_sep": StageParameter(float, default=250.0, msg="Maximum separation in arcmin."),
        "nbins": StageParameter(int, default=20, msg="Number of bins."),
        "bin_slop": StageParameter(float, default=0.1, msg="Bin slop for treecorr."),
        "sep_units": StageParameter(str, default="arcmin", msg="Separation units."),
        "psf_size_units": StageParameter(str, default="sigma", msg="Units for PSF size."),
        "star_type": StageParameter(str, default="PSF-reserved", msg="Star type to use."),
        "flip_g2": StageParameter(bool, default=False, msg="Flip g2 sign."),
    }

    def run(self):
        import treecorr
        import h5py
        import matplotlib

        matplotlib.use("agg")
        self.config["num_threads"] = int(os.environ.get("OMP_NUM_THREADS", 1))
        ra, dec, star_type = self.load_stars()
        ra_gal, dec_gal = self.load_galaxies()
        ra_random, dec_random = self.load_randoms()

        galaxy_star_stats = {}
        for t in STAR_TYPES:
            s = star_type == t
            if STAR_TYPE_NAMES[t] != self.config.star_type:
                continue
            galaxy_star_stats[1, t] = self.compute_galaxy_star(ra, dec, s, ra_gal, dec_gal, ra_random, dec_random)
            galaxy_star_stats[2, t] = self.compute_star_star(ra, dec, s, ra_gal, dec_gal, ra_random, dec_random)

        self.save_stats(galaxy_star_stats)
        self.galaxy_star_plots(galaxy_star_stats)

    def load_stars(self):
        with self.open_input("star_catalog") as f:
            g = f["stars"]
            ra = g["ra"][:]
            dec = g["dec"][:]

            star_type = load_star_type(g)

        return ra, dec, star_type

    def load_randoms(self):
        with self.open_input("random_cats") as f:
            group = f["randoms"]
            ra_random = group["ra"][:]
            dec_random = group["dec"][:]
        return ra_random, dec_random

    def load_galaxies(self):
        # Columns we need from the shear catalog
        # TODO: not sure of an application where we would want to use true shear but can be added
        read_shear_catalog_type(self)

        # load tomography data
        with self.open_input("shear_tomography_catalog") as f:
            source_bin = f["tomography/bin"][:]
            mask = source_bin != -1  # Only use the sources that pass the fiducial cuts

        with self.open_input("shear_catalog", wrapper=True) as f:
            g = f.file["shear"]
            if f.catalog_type == "metadetect":
                g = g["00"]
            ra = g["ra"][:][mask]
            dec = g["dec"][:][mask]

        return ra, dec

    def compute_galaxy_star(self, ra, dec, s, ra_gal, dec_gal, ra_random, dec_random):
        # select the star type
        ra = ra[s]
        dec = dec[s]
        n = len(ra)
        i = len(ra_gal)
        print(f"Computing galaxy-cross-star statistic from {n} stars and {i} galaxies")

        import treecorr

        rancat = treecorr.Catalog(ra=ra_random, dec=dec_random, ra_units="degree", dec_units="degree")

        cat1 = treecorr.Catalog(ra=ra, dec=dec, ra_units="deg", dec_units="deg")
        cat2 = treecorr.Catalog(ra=ra_gal, dec=dec_gal, ra_units="deg", dec_units="deg")

        nn = treecorr.NNCorrelation(self.config)
        rn = treecorr.NNCorrelation(self.config)
        nr = treecorr.NNCorrelation(self.config)
        rr = treecorr.NNCorrelation(self.config)

        nn.process(cat1, cat2)
        nr.process(cat1, rancat)
        rn.process(rancat, cat2)
        rr.process(rancat, rancat)

        nn.calculateXi(rr=rr, dr=nr, rd=rn)
        return nn.meanr, nn.xi, nn.varxi**0.5

    def compute_star_star(self, ra, dec, s, ra_gal, dec_gal, ra_random, dec_random):
        # select the star type
        ra = ra[s]
        dec = dec[s]
        n = len(ra)
        i = len(ra_gal)
        print(f"Computing galaxy-cross-star statistic from {n} stars and {i} galaxies")

        import treecorr

        rancat = treecorr.Catalog(ra=ra_random, dec=dec_random, ra_units="degree", dec_units="degree")

        cat1 = treecorr.Catalog(ra=ra, dec=dec, ra_units="deg", dec_units="deg")
        cat2 = treecorr.Catalog(ra=ra_gal, dec=dec_gal, ra_units="deg", dec_units="deg")

        nn = treecorr.NNCorrelation(self.config)
        rn = treecorr.NNCorrelation(self.config)
        nr = treecorr.NNCorrelation(self.config)
        rr = treecorr.NNCorrelation(self.config)

        nn.process(cat1, cat1)
        nr.process(cat1, rancat)
        rn.process(rancat, cat1)
        rr.process(rancat, rancat)

        nn.calculateXi(rr=rr, dr=nr, rd=rn)
        return nn.meanr, nn.xi, nn.varxi**0.5

    def galaxy_star_plots(self, galaxy_star_stats):
        import matplotlib.pyplot as plt
        import matplotlib.transforms as mtrans

        f = self.open_output("star_density_test", wrapper=True, figsize=(10, 6 * len(STAR_TYPES)))
        TEST_TYPES = ["star cross galaxy", "star cross star"]
        for s in STAR_TYPES:
            if STAR_TYPE_NAMES[s] != self.config.star_type:
                continue
            ax = plt.subplot(len(STAR_TYPES), 1, s + 1)
            for j, i in enumerate([1, 2]):
                theta, xi, err = galaxy_star_stats[i, s]
                tr = mtrans.offset_copy(ax.transData, f.file, 0.05 * j - 0.025, 0, units="inches")
                plt.errorbar(
                    theta,
                    abs(xi),
                    err,
                    fmt=".",
                    label=f"{TEST_TYPES[i - 1]} density stats",
                    capsize=3,
                    # transform=tr,
                )
                plt.title(STAR_TYPE_NAMES[s])
                plt.xscale("log")
                plt.yscale("log")
                plt.xlabel(r"$\theta$")
                plt.ylabel(r"$\xi_+(\theta)$")
                plt.legend()
        f.close()

    def save_stats(self, galaxy_star_stats):
        f = self.open_output("star_density_stats")
        g = f.create_group("star_density")
        for i in 1, 2:
            for s in STAR_TYPES:
                if STAR_TYPE_NAMES[s] != self.config.star_type:
                    continue
                theta, xi, err = galaxy_star_stats[i, s]
                name = STAR_TYPE_NAMES[s]
                h = g.create_group(f"star_density_{i}_{name}")
                h.create_dataset("theta", data=theta)
                h.create_dataset("xi_plus", data=xi)
                h.create_dataset("xi_err", data=err)
        f.close()




[docs]
class TXBrighterFatterPlot(PipelineStage):
    """
    Compute and plot a diagnostic of the brighter-fatter effect

    This plots the mean e1, e2, and size difference between that interpolated
    at star locations and the star values themselves, as a function of magnitude.
    """

    name = "TXBrighterFatterPlot"
    parallel = False

    inputs = [("star_catalog", HDFFile)]

    outputs = [
        ("brighter_fatter_plot", PNGFile),
        ("brighter_fatter_data", HDFFile),
    ]

    config_options = {
        "band": StageParameter(str, default="r", msg="Band to use for magnitude."),
        "nbin": StageParameter(int, default=20, msg="Number of magnitude bins."),
        "mmin": StageParameter(float, default=18.5, msg="Minimum magnitude."),
        "mmax": StageParameter(float, default=23.5, msg="Maximum magnitude."),
    }

    def run(self):
        import h5py
        import matplotlib

        matplotlib.use("agg")

        data = self.load_stars()
        results = {}
        for s in STAR_TYPES:
            w = data["star_type"] == s
            data_s = {k: v[w] for k, v in data.items()}
            results[s] = self.compute_binned_stats(data_s)

        self.save_stats(results)
        self.save_plots(results)

    def load_stars(self):
        with self.open_input("star_catalog") as f:
            g = f["stars"]

            band = self.config["band"]
            data = {}
            data["mag"] = g[f"mag_{band}"][:]
            data["delta_e1"] = g["measured_e1"][:] - g["model_e1"][:]
            data["delta_e2"] = g["measured_e2"][:] - g["model_e2"][:]
            data["delta_T"] = g["measured_T"][:] - g["model_T"][:]
            data["star_type"] = load_star_type(g)

        return data

    def compute_binned_stats(self, data):
        # Which band this corresponds to depends on the
        # configuration option chosen
        mag = data["mag"]
        mmin = self.config["mmin"]
        mmax = self.config["mmax"]
        nbin = self.config["nbin"]
        # bin edges in magnitude
        edges = np.linspace(mmin, mmax, nbin + 1)
        index = np.digitize(mag, edges)

        # Space for all the output values to go in
        dT = np.zeros(nbin)
        errT = np.zeros(nbin)
        e1 = np.zeros(nbin)
        e2 = np.zeros(nbin)
        err1 = np.zeros(nbin)
        err2 = np.zeros(nbin)
        m = np.zeros(nbin)

        for i in range(nbin):
            # Select only objects where everything is finite, as well
            # as only thing in this tomographic bin
            w = np.where(
                (index == i + 1)
                & np.isfinite(data["delta_T"])
                & np.isfinite(data["delta_e1"])
                & np.isfinite(data["delta_e2"])
            )
            # x-value = mean mag
            m[i] = mag[w].mean()
            # y values
            dT_i = data["delta_T"][w]
            e1_i = data["delta_e1"][w]
            e2_i = data["delta_e2"][w]
            # Mean and error on mean of each of these quantities
            dT[i] = dT_i.mean()
            errT[i] = dT_i.std() / np.sqrt(dT_i.size)
            e1[i] = e1_i.mean()
            err1[i] = e1_i.std() / np.sqrt(e1_i.size)
            e2[i] = e2_i.mean()
            err2[i] = e2_i.std() / np.sqrt(e2_i.size)

        return [m, dT, errT, e1, err1, e2, err2]

    def save_plots(self, results):
        import matplotlib.pyplot as plt

        band = self.config["band"]
        n = len(results)
        width = n * 6
        f = self.open_output("brighter_fatter_plot", wrapper=True, figsize=(width, 8))
        for s, res in results.items():
            m, dT, errT, e1, err1, e2, err2 = res

            # Top plot - classic BF size plot, the size residual as a function of
            # magnitude
            ax = plt.subplot(2, n, 2 * s + 1)
            plt.title(STAR_TYPE_NAMES[s])
            plt.errorbar(m, dT, errT, fmt=".")
            plt.xlabel(f"{band}-band magnitude")
            plt.ylabel(r"$T_\mathrm{PSF} - T_\mathrm{model}$ ($\mathrm{arcsec}^2$)")
            plt.ylim(-0.025, 0.1)
            # Lower plot - the e1 and e2 residuals as a function of mag
            plt.subplot(2, n, 2 * s + 2, sharex=ax)
            plt.title(STAR_TYPE_NAMES[s])
            plt.errorbar(m, e1, err1, label="$e_1$", fmt=".")
            plt.errorbar(m, e2, err2, label="$e_2$", fmt=".")
            plt.ylabel(r"$e_\mathrm{PSF} - e_\mathrm{model}$")
            plt.xlabel(f"{band}-band magnitude")
            # May need to adjust this range
            plt.ylim(-0.02, 0.02)
        plt.legend()
        plt.tight_layout()
        f.close()

    def save_stats(self, results):
        # Save all the stats in results for later plotting
        # Save to standard HDF5 format.
        f = self.open_output("brighter_fatter_data")
        g1 = f.create_group("brighter_fatter")
        g1.attrs["band"] = self.config["band"]
        for s, res in results.items():
            (m, dT, errT, e1, err1, e2, err2) = res
            g = g1.create_group(STAR_TYPE_NAMES[s])
            g.create_dataset("mag", data=m)
            g.create_dataset("delta_T", data=dT)
            g.create_dataset("delta_T_error", data=errT)
            g.create_dataset("delta_e1", data=e1)
            g.create_dataset("delta_e1_error", data=err1)
            g.create_dataset("delta_e2", data=e2)
            g.create_dataset("delta_e2_error", data=err2)
        f.close()



def load_star_type(data):
    used = data["calib_psf_used"][:].astype("int")
    reserved = data["calib_psf_reserved"][:].astype("int")

    star_type = np.full(used.size, -99, dtype=int)
    star_type[used == 1] = STAR_PSF_USED
    star_type[reserved == 1] = STAR_PSF_RESERVED

    return star_type