Source code for txpipe.psf_diagnostics

from ceci.config import StageParameter
from .base_stage import PipelineStage
from .data_types import Directory, ShearCatalog, HDFFile, PNGFile, TomographyCatalog, RandomsCatalog, YamlFile, TextFile
import numpy as np
import sys
import os
from .utils import read_shear_catalog_type
from .plotting import manual_step_histogram
from .shear_calibration import Calibrator


STAR_PSF_USED = 0
STAR_PSF_RESERVED = 1
STAR_TYPES = [STAR_PSF_USED, STAR_PSF_RESERVED]
STAR_TYPE_NAMES = ["PSF-used", "PSF-reserved"]
STAR_COLORS = ["blue", "orange", "green"]


[docs] class TXPSFDiagnostics(PipelineStage): """ Make histograms of PSF values This makes histograms of e1 and e2 residuals, and of the fractional PSF size excess. """ name = "TXPSFDiagnostics" inputs = [("star_catalog", HDFFile)] outputs = [ ("e1_psf_residual_hist", PNGFile), ("e2_psf_residual_hist", PNGFile), ("T_frac_psf_residual_hist", PNGFile), ("star_psf_stats", YamlFile), ] config_options = {} def run(self): # PSF tests import matplotlib matplotlib.use("agg") # Make plotters - in each case we supply the function to make # the thing we want to histogram, the name, label, and range plotters = [ self.plot_histogram( lambda d: (d["measured_e1"] - d["model_e1"]), "e1_psf_residual", "$e_{1}-e_{1,psf}$", np.linspace(-0.1, 0.1, 51), ), self.plot_histogram( lambda d: (d["measured_e2"] - d["model_e2"]), "e2_psf_residual", "$e_{2}-e_{2,psf}$", np.linspace(-0.1, 0.1, 51), ), self.plot_histogram( lambda d: (d["measured_T"] - d["model_T"]) / d["measured_T"], "T_frac_psf_residual", "$(T-T{psf})/T{psf}$", np.linspace(-0.1, 0.1, 51), ), ] # Start off each of the plotters. This will make them all run up to the # first yield statement, then pause and wait for the first chunk of data for plotter in plotters: print(plotter) plotter.send(None) # Create an iterator for reading through the input data. # This method automatically splits up data among the processes, # so the plotters should handle this. chunk_rows = 10000 star_cols = [ "measured_e1", "model_e1", "measured_e2", "model_e2", "measured_T", "model_T", "calib_psf_used", "calib_psf_reserved", ] iter_star = self.iterate_hdf("star_catalog", "stars", star_cols, chunk_rows) # Now loop through each chunk of input data, one at a time. # Each time we get a new segment of data, which goes to all the plotters for ((start, end, data),) in zip(iter_star): print(f"Read data {start} - {end}") data["star_type"] = load_star_type(data) # This causes each data = yield statement in each plotter to # be given this data chunk as the variable data. # data.update(data2) # data.update(data3) for plotter in plotters: plotter.send(data) # Tell all the plotters to finish, collect together results from the different # processors, and make their final plots. Plotters need to respond # to the None input and results = {} for plotter in plotters: try: plotter.send(None) except StopIteration as err: # This feels like some cruel abuse because I don't # really understand how co-routines should be used. results.update(err.value) # save all counts, means, and sigmas to yaml file with self.open_output("star_psf_stats", wrapper=True) as f: f.write(results) def plot_histogram(self, function, output_name, xlabel, edges): import matplotlib.pyplot as plt from parallel_statistics import ParallelHistogram, ParallelMeanVariance print(f"Plotting {output_name}") counters = {s: ParallelHistogram(edges) for s in STAR_TYPES} stats = ParallelMeanVariance(len(STAR_TYPES)) while True: data = yield if data is None: break # Get the specific quantity to plot # from the full data set value = function(data) for s in STAR_TYPES: r = data["star_type"] == s # build up histogram d = value[r] counters[s].add_data(d) # Also record data for mean and variance calculation # There are some NaNs in here which are presumably # not propagated to PSF estimation anyway stats.add_data(s, d[np.isfinite(d)]) counts = {} for s in STAR_TYPES: counts[s] = counters[s].collect(self.comm) fig = self.open_output(output_name + "_hist", wrapper=True, figsize=(6, 15)) for s in STAR_TYPES: plt.subplot(len(STAR_TYPES), 1, s + 1) manual_step_histogram(edges, counts[s], color=STAR_COLORS[s], label=STAR_TYPE_NAMES[s]) plt.title(STAR_TYPE_NAMES[s]) plt.xlabel(xlabel) plt.ylabel(r"$N_{stars}$") fig.close() n, mu, sigma2 = stats.collect(self.comm) results = {} for s in STAR_TYPES: name = STAR_TYPE_NAMES[s] results[f"{output_name}_{name}_n"] = int(n[s]) results[f"{output_name}_{name}_mu"] = float(mu[s]) results[f"{output_name}_{name}_std"] = float(sigma2[s] ** 0.5) return results
[docs] class TXPSFMomentCorr(PipelineStage): """ Compute PSF Moments """ name = "TXPSFMomentCorr" parallel = False inputs = [("star_catalog", HDFFile)] outputs = [("moments_stats", HDFFile)] config_options = { "min_sep": StageParameter(float, default=0.5, msg="Minimum separation in arcmin."), "max_sep": StageParameter(float, default=250.0, msg="Maximum separation in arcmin."), "nbins": StageParameter(int, default=20, msg="Number of bins."), "bin_slop": StageParameter(float, default=0.01, msg="Bin slop for treecorr."), "sep_units": StageParameter(str, default="arcmin", msg="Separation units."), "subtract_mean": StageParameter(bool, default=False, msg="Subtract mean from data."), } def run(self): import treecorr import h5py import matplotlib self.config["num_threads"] = int(os.environ.get("OMP_NUM_THREADS", 1)) matplotlib.use("agg") # Load the star catalog ra, dec, e_meas, e_mod, de, moment4_meas, moment4_mod, dmoment4, star_type = self.load_stars() moments_stats = {} for t in STAR_TYPES: s = np.where(star_type == t)[0] moments_stats[0, t] = self.compute_momentcorr(0, s, ra, dec, e_mod, e_mod) moments_stats[1, t] = self.compute_momentcorr(1, s, ra, dec, moment4_mod, e_mod) moments_stats[2, t] = self.compute_momentcorr(2, s, ra, dec, moment4_mod, moment4_mod) moments_stats[3, t] = self.compute_momentcorr(3, s, ra, dec, de, e_mod) moments_stats[4, t] = self.compute_momentcorr(4, s, ra, dec, de, moment4_mod) moments_stats[5, t] = self.compute_momentcorr(5, s, ra, dec, dmoment4, moment4_mod) moments_stats[6, t] = self.compute_momentcorr(6, s, ra, dec, e_mod, dmoment4) moments_stats[7, t] = self.compute_momentcorr(7, s, ra, dec, de, de) moments_stats[8, t] = self.compute_momentcorr(8, s, ra, dec, de, dmoment4) moments_stats[9, t] = self.compute_momentcorr(9, s, ra, dec, dmoment4, dmoment4) self.save_stats(moments_stats) def load_stars(self): with self.open_input("star_catalog") as f: g = f["stars"] ra = g["ra"][:] dec = g["dec"][:] e1meas = g["measured_e1"][:] e2meas = g["measured_e2"][:] e1mod = g["model_e1"][:] e2mod = g["model_e2"][:] e1meas_moment4 = g["measured_moment4_e1"][:] e2meas_moment4 = g["measured_moment4_e2"][:] e1mod_moment4 = g["model_moment4_e1"][:] e2mod_moment4 = g["model_moment4_e2"][:] # Note: definition are flipped for this paper de1 = e1mod - e1meas de2 = e2mod - e2meas de1_moment4 = e1mod_moment4 - e1meas_moment4 de2_moment4 = e2mod_moment4 - e2meas_moment4 if self.config["subtract_mean"]: e_meas = np.array((e1meas - np.mean(e1meas), e2meas - np.mean(e2meas))) e_mod = np.array((e1mod - np.mean(e1mod), e2mod - np.mean(e2mod))) de = np.array((de1 - np.mean(de1), de2 - np.mean(de2))) moment4_meas = np.array( (e1meas_moment4 - np.mean(e1meas_moment4), e2meas_moment4 - np.mean(e2meas_moment4)) ) moment4_mod = np.array((e1mod_moment4 - np.mean(e1mod_moment4), e2mod_moment4 - np.mean(e2mod_moment4))) dmoment4 = np.array((de1_moment4 - np.mean(de1_moment4), de2_moment4 - np.mean(de2_moment4))) else: e_meas = np.array((e1meas, e2meas)) e_mod = np.array((e1mod, e2mod)) de = np.array((de1, de2)) moment4_meas = np.array((e1meas_moment4, e2meas_moment4)) moment4_mod = np.array((e1mod_moment4, e2mod_moment4)) dmoment4 = np.array((de1_moment4, de2_moment4)) star_type = load_star_type(g) return ra, dec, e_meas, e_mod, de, moment4_meas, moment4_mod, dmoment4, star_type def compute_momentcorr(self, i, s, ra, dec, q1, q2): # select a subset of the stars ra = ra[s] dec = dec[s] q1 = q1[:, s] q2 = q2[:, s] n = len(ra) print(f"Computing Rowe statistic rho_{i} from {n} objects") import treecorr corr = treecorr.GGCorrelation(self.config) cat1 = treecorr.Catalog(ra=ra, dec=dec, g1=q1[0], g2=q1[1], ra_units="deg", dec_units="deg") cat2 = treecorr.Catalog(ra=ra, dec=dec, g1=q2[0], g2=q2[1], ra_units="deg", dec_units="deg") corr.process(cat1, cat2) return corr.meanr, corr.xip, corr.varxip**0.5 def moment_plots(self, rowe_stats): # First plot - stats 1,3,4 import matplotlib.pyplot as plt import matplotlib.transforms as mtrans f = self.open_output("moments", wrapper=True, figsize=(10, 6 * len(STAR_TYPES))) for s in STAR_TYPES: ax = plt.subplot(len(STAR_TYPES), 1, s + 1) for j, i in enumerate([0]): theta, xi, err = rowe_stats[i, s] tr = mtrans.offset_copy(ax.transData, f.file, 0.05 * (j - 1), 0, units="inches") plt.errorbar( theta, abs(xi), err, fmt=".", label=rf"$\rho_{i}$", capsize=3, # transform=tr, ) plt.xscale("log") plt.yscale("log") plt.xlabel(r"$\theta$") plt.ylabel(r"$\xi_+(\theta)$") plt.legend() plt.title(STAR_TYPE_NAMES[s]) f.close() def save_stats(self, moments_stats): f = self.open_output("moments_stats") g = f.create_group("moment_statistics") for i in range(0, 10): for s in STAR_TYPES: theta, xi, err = moments_stats[i, s] name = STAR_TYPE_NAMES[s] h = g.create_group(f"moment4_{i}_{name}") h.create_dataset("theta", data=theta) h.create_dataset("xi_plus", data=xi) h.create_dataset("xi_err", data=err) f.close()
[docs] class TXTauStatistics(PipelineStage): """ Compute and plot PSF Tau statistics where the definition of Tau stats are eq. 20-22 of Gatti et al 2023. """ name = "TXTauStatistics" parallel = False inputs = [ ("binned_shear_catalog", ShearCatalog), ("star_catalog", HDFFile), ("rowe_stats", HDFFile), ] outputs = [ ("tau0", PNGFile), ("tau2", PNGFile), ("tau5", PNGFile), ("tau_stats", HDFFile), ] config_options = { "min_sep": StageParameter(float, default=0.5, msg="Minimum separation in arcmin."), "max_sep": StageParameter(float, default=250.0, msg="Maximum separation in arcmin."), "nbins": StageParameter(int, default=20, msg="Number of bins."), "bin_slop": StageParameter(float, default=0.01, msg="Bin slop for treecorr."), "sep_units": StageParameter(str, default="arcmin", msg="Separation units."), "npatch": StageParameter(int, default=150, msg="Number of patches for variance calculation and processing."), "psf_size_units": StageParameter(str, default="sigma", msg="Units for PSF size."), "subtract_mean": StageParameter(bool, default=False, msg="Subtract mean from data."), "dec_cut": StageParameter(bool, default=True, msg="Affects KiDS-1000 only."), "star_type": StageParameter(str, default="PSF-reserved", msg="Star type to use."), "cov_method": StageParameter(str, default="bootstrap", msg="Covariance method."), "flip_g2": StageParameter(bool, default=False, msg="Flip g2 sign."), "tomographic": StageParameter(bool, default=True, msg="Use tomographic bins."), } def run(self): import treecorr import h5py import matplotlib import emcee matplotlib.use("agg") tau_stats = {} p_bestfits = {} # Load star properties ra, dec, e_psf, e_mod, de_psf, T_f, star_type = self.load_stars() for s in STAR_TYPES: if STAR_TYPE_NAMES[s] != self.config.star_type: continue # Load precomputed Rowe stats rowe_stats = self.load_rowe(s) # set ranges for mcmc ranges = {} ranges["alpha"] = [-1000.00, 1000.00] ranges["beta"] = [-1000.00, 1000.00] ranges["eta"] = [-1000.00, 1000.00] tau_stats[s] = {} p_bestfits[s] = {} # Load galaxies if self.config["tomographic"]: with self.open_input("binned_shear_catalog") as f: nzbin = list(range(f["shear"].attrs["nbin_source"])) + ["all"] else: nzbin = ["all"] print(nzbin) for n in nzbin: gal_ra, gal_dec, gal_g1, gal_g2, gal_weight = self.load_galaxies(n) gal_g = np.array((gal_g1, gal_g2)) print(f"computing tau stats for bin {n}") # Joint tau 0-2-5 data vector and cov # tau_stats contains [ang, tau0, tau2, tau5, cov] tau_stats[s][f"bin_{n}"] = self.compute_all_tau( gal_ra, gal_dec, gal_g, gal_weight, s, ra, dec, e_psf, e_mod, de_psf, T_f, star_type ) # Run simple mcmc to find best-fit values for alpha,beta,eta p_bestfits[s][f"bin_{n}"] = self.sample(tau_stats[s][f"bin_{n}"], rowe_stats, ranges) # Save tau stats in a h5 file self.save_tau_stats(tau_stats, p_bestfits) # Save tau plots self.tau_plots(tau_stats) def load_rowe(self, s): f = self.open_input("rowe_stats") rowe_stats = {} for i in 0, 1, 2, 3, 4, 5: name = STAR_TYPE_NAMES[s] theta = f["rowe_statistics"][f"rowe_{i}_{name}"]["theta"][:] xi_plus = f["rowe_statistics"][f"rowe_{i}_{name}"]["xi_plus"][:] xi_minus = f["rowe_statistics"][f"rowe_{i}_{name}"]["xi_minus"][:] xip_err = f["rowe_statistics"][f"rowe_{i}_{name}"]["xip_err"][:] xim_err = f["rowe_statistics"][f"rowe_{i}_{name}"]["xim_err"][:] rowe_stats[i] = theta, xi_plus, xi_minus, xip_err, xim_err return rowe_stats def sample(self, tau_stats, rowe_stats, ranges, nwalkers=32, ndim=3): """ Run a simple mcmc chain to detemine the best-fit values for alpha, beta, eta """ import emcee import scipy.optimize as optimize _, _, _, _, _, _, _, cov = tau_stats mask = cov.diagonal() > 0 cov = cov[mask][:, mask] # We debias the covariance with two correction factors from Hartlap et. al 2006 eq. 17 and Dodelson & Schneider 2013 eq. 28 Njk = self.config["npatch"] Ndv = 6 * self.config["nbins"] if not Ndv < Njk - 2: raise ValueError("Hartlap correction only valid for N data vector elements < N jackknife patches - 2") f_H = 1.0 * (Njk - Ndv - 2) / (Njk - 1) f_DS = 1 / (1 + (Ndv - ndim) * (Njk - Ndv - 2) / (Njk - Ndv - 1) / (Njk - Ndv - 4)) cov = cov / f_H / f_DS _, tau0p, tau0m, tau2p, tau2m, tau5p, tau5m, _ = tau_stats invcov = np.linalg.inv(cov) initguess = [0, -1, 1] bestpars = optimize.minimize( self.chi2, initguess, args=(tau_stats, rowe_stats, invcov, mask), method="Nelder-Mead", tol=1e-6 ) initpos = [bestpars.x + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)] print("Computing best-fit alpha, beta, eta") sampler = emcee.EnsembleSampler( nwalkers, ndim, self.logProb, args=(tau_stats, rowe_stats, ranges, invcov, mask) ) sampler.run_mcmc(initpos, nsteps=4000, progress=False) flat_samples = sampler.get_chain(discard=2000, flat=True) ret = {} var = ["alpha", "beta", "eta"] for i, v in enumerate(var): mcmc = np.percentile(flat_samples[:, i], [16, 50, 84]) q = np.diff(mcmc) ret[v] = {"median": mcmc[1], "lerr": q[0], "rerr": q[1]} chi2 = self.chi2( [ret["alpha"]["median"], ret["beta"]["median"], ret["eta"]["median"]], tau_stats, rowe_stats, invcov, mask ) # degree of freedom = nbins * 6 (i.e. tau 0+/-, tau2+/-, tau5+/-) - ndim dof = (self.config["nbins"] * 6) - ndim print("Best-fit finished. Resulting chi^2/dof: ", chi2 / dof) return ret def logPrior(self, theta, ranges): """ If parameter in defined range return 0, otherwise -np.inf """ alpha, beta, eta = theta if ( (ranges["alpha"][0] < alpha < ranges["alpha"][1]) and (ranges["beta"][0] < beta < ranges["beta"][1]) and (ranges["eta"][0] < eta < ranges["eta"][1]) ): return 0.0 return -np.inf def chi2(self, theta, tau_stats, rowe_stats, invcov, mask): """ Compute likelihood theta : parameters tau_stats : Measured tau stats rowe_stats: Measured rowe stats to be used for Tau invcov : Inverse covariance matrix """ # Load parameters alpha, beta, eta = theta # Load rowe and tau _, rowe0p, rowe0m, _, _ = rowe_stats[0] _, rowe1p, rowe1m, _, _ = rowe_stats[1] _, rowe2p, rowe2m, _, _ = rowe_stats[2] _, rowe3p, rowe3m, _, _ = rowe_stats[3] _, rowe4p, rowe4m, _, _ = rowe_stats[4] _, rowe5p, rowe5m, _, _ = rowe_stats[5] _, tau0p, tau0m, tau2p, tau2m, tau5p, tau5m, _ = tau_stats # Create combined template T0p = alpha * rowe0p + beta * rowe2p + eta * rowe5p T0m = alpha * rowe0m + beta * rowe2m + eta * rowe5m T2p = alpha * rowe2p + beta * rowe1p + eta * rowe4p T2m = alpha * rowe2m + beta * rowe1m + eta * rowe4m T5p = alpha * rowe5p + beta * rowe4p + eta * rowe3p T5m = alpha * rowe5m + beta * rowe4m + eta * rowe3m # Create data and template vector Tall = np.concatenate([T0p, T0m, T2p, T2m, T5p, T5m])[mask] Xall = np.concatenate([tau0p, tau0m, tau2p, tau2m, tau5p, tau5m])[mask] return np.dot(Xall - Tall, np.dot(Xall - Tall, invcov)) def logProb(self, theta, tau_stats, rowe_stats, ranges, invcov, mask): lp = self.logPrior(theta, ranges) if not np.isfinite(lp): return -np.inf return lp + (-0.5) * self.chi2(theta, tau_stats, rowe_stats, invcov, mask) def compute_all_tau(self, gra, gdec, g, gw, s, sra, sdec, e_meas, e_mod, de, T_f, star_type): """ Compute tau0, tau2, tau5. All three needs to be computed at once due to covariance. gra : RA of galaxies gdec : DEC of galaxies g : shear for observed galaxies np.array((e1, e2) gw : weights s : indices of stars to use in calculation sra : RA of stars sdec : DEC of stars e_meas : measured ellipticities of PSF from stars -- np.array((e1meas, e2meas)) e_mod : model ellipticities of PSF -- np.array((e1mod, e2mod)) de : e_meas-e_mod -- np.array((de1, de2)) T_f : (T_meas - T_model)/T_meas -- np.array(T_f) """ import treecorr p = e_mod q = de w = e_meas * T_f sra, sdec = np.array((sra[star_type == s], sdec[star_type == s])) # Get ra/dec for specific stars p = np.array(([p[0][star_type == s], p[1][star_type == s]])) # Get p for specific stars q = np.array(([q[0][star_type == s], q[1][star_type == s]])) # Get q for specific stars w = np.array(([w[0][star_type == s], w[1][star_type == s]])) # Get w for specific stars print(f"Computing Tau 0,2,5 and the covariance") self.config["num_threads"] = int(os.environ.get("OMP_NUM_THREADS", 1)) # Load all catalogs catg = treecorr.Catalog( ra=gra, dec=gdec, g1=g[0], g2=g[1], w=gw, ra_units="deg", dec_units="deg", npatch=self.config["npatch"] ) # galaxy shear catp = treecorr.Catalog( ra=sra, dec=sdec, g1=p[0], g2=p[1], ra_units="deg", dec_units="deg", patch_centers=catg.patch_centers ) # e_model catq = treecorr.Catalog( ra=sra, dec=sdec, g1=q[0], g2=q[1], ra_units="deg", dec_units="deg", patch_centers=catg.patch_centers ) # (e_* - e_model) catw = treecorr.Catalog( ra=sra, dec=sdec, g1=w[0], g2=w[1], ra_units="deg", dec_units="deg", patch_centers=catg.patch_centers ) # (e_*(T_* - T_model)/T_* ) # Compute all corrleations print(f"Computing shear-e_model correlation {catg.nobj} x {catp.nobj}") corr0 = treecorr.GGCorrelation(self.config) corr0.process(catg, catp) print(f"Computing shear-(e_*-e_model) correlation {catg.nobj} x {catq.nobj}") corr2 = treecorr.GGCorrelation(self.config) corr2.process(catg, catq) print(f"Computing shear-(e_*(T_* - T_model)/T_*) correlation {catg.nobj} x {catw.nobj}") corr5 = treecorr.GGCorrelation(self.config) corr5.process(catg, catw) # Estimate covariance using bootstrap. The ordering is xip0,xim0,xip2,xim2,xip5,xim5. cov = treecorr.estimate_multi_cov([corr0, corr2, corr5], self.config.cov_method) if self.config["cov_method"] == "shot": cov = np.diag(cov) return corr0.meanr, corr0.xip, corr0.xim, corr2.xip, corr2.xim, corr5.xip, corr5.xim, cov def save_tau_stats(self, tau_stats, p_bestfits): """ tau_stats: (dict) dictionary containing theta,tau0,tau2,tau5 and cov """ f = self.open_output("tau_stats") g = f.create_group("tau_statistics") for s in STAR_TYPES: if STAR_TYPE_NAMES[s] != self.config.star_type: continue if self.config["tomographic"]: with self.open_input("binned_shear_catalog") as h: nbin = list(range(h["shear"].attrs["nbin_source"])) + ["all"] else: nbin = ["all"] for n in nbin: theta, tau0p, tau0m, tau2p, tau2m, tau5p, tau5m, cov = tau_stats[s][f"bin_{n}"] name = STAR_TYPE_NAMES[s] h = g.create_group(f"tau_{name}/bin_{n}/") h.create_dataset("theta", data=theta) h.create_dataset("tau0p", data=tau0p) h.create_dataset("tau0m", data=tau0m) h.create_dataset("tau2p", data=tau2p) h.create_dataset("tau2m", data=tau2m) h.create_dataset("tau5p", data=tau5p) h.create_dataset("tau5m", data=tau5m) h.create_dataset("cov", data=cov) # Also save best-fit values h = g.create_group(f"bestfits_{name}/bin_{n}") alpha_err = max(p_bestfits[s][f"bin_{n}"]["alpha"]["lerr"], p_bestfits[s][f"bin_{n}"]["alpha"]["rerr"]) beta_err = max(p_bestfits[s][f"bin_{n}"]["beta"]["lerr"], p_bestfits[s][f"bin_{n}"]["beta"]["rerr"]) eta_err = max(p_bestfits[s][f"bin_{n}"]["eta"]["lerr"], p_bestfits[s][f"bin_{n}"]["eta"]["rerr"]) h.create_dataset("alpha", data=p_bestfits[s][f"bin_{n}"]["alpha"]["median"]) h.create_dataset("alpha_err", data=alpha_err) h.create_dataset("beta", data=p_bestfits[s][f"bin_{n}"]["beta"]["median"]) h.create_dataset("beta_err", data=beta_err) h.create_dataset("eta", data=p_bestfits[s][f"bin_{n}"]["eta"]["median"]) h.create_dataset("eta_err", data=eta_err) f.close() def load_stars(self): with self.open_input("star_catalog") as f: g = f["stars"] ra = g["ra"][:] dec = g["dec"][:] e1meas = g["measured_e1"][:] e2meas = g["measured_e2"][:] e1mod = g["model_e1"][:] e2mod = g["model_e2"][:] if self.config["flip_g2"]: e2meas *= -1 e2mod *= -1 de1 = e1meas - e1mod de2 = e2meas - e2mod if self.config["psf_size_units"] == "Tmeas": T_frac = (g["measured_T"][:] - g["model_T"][:]) / g["measured_T"][:] elif self.config["psf_size_units"] == "Tmodel": T_frac = (g["measured_T"][:] - g["model_T"][:]) / g["model_T"][:] elif self.config["psf_size_units"] == "sigma": T_frac = (g["measured_T"][:] ** 2 - g["model_T"][:] ** 2) / g["measured_T"][:] ** 2 else: raise ValueError("Need to specify measured_T: Tmeas/Tmodel/sigma") if self.config["subtract_mean"]: e_meas = np.array((e1meas - np.mean(e1meas), e2meas - np.mean(e2meas))) e_mod = np.array((e1mod - np.mean(e1mod), e2mod - np.mean(e2mod))) de = np.array((de1 - np.mean(de1), de2 - np.mean(de2))) else: e_meas = np.array((e1meas, e2meas)) e_mod = np.array((e1mod, e2mod)) de = np.array((de1, de2)) star_type = load_star_type(g) return ra, dec, e_meas, e_mod, de, T_frac, star_type def load_galaxies(self, zbin): with self.open_input("binned_shear_catalog") as f: g = f[f"shear/bin_{zbin}/"] ra = g["ra"][:] dec = g["dec"][:] g1 = g["g1"][:] g2 = g["g2"][:] w = g["weight"][:] if self.config["subtract_mean"]: g1 = g1 - np.average(g1, weights=w) g2 = g2 - np.average(g1, weights=w) if self.config["flip_g2"]: g2 *= -1 return ra, dec, g1, g2, w def tau_plots(self, tau_stats): # Plot non-tomographic stats 1,3,4 import matplotlib.pyplot as plt import matplotlib.transforms as mtrans for s in STAR_TYPES: if STAR_TYPE_NAMES[s] != self.config.star_type: continue theta, tau0p, tau0m, tau2p, tau2m, tau5p, tau5m, cov = tau_stats[s]["bin_all"] nb = len(theta) taus = {"0p": tau0p, "0m": tau0m, "2p": tau2p, "2m": tau2m, "5p": tau5p, "5m": tau5m} errs = { "0p": np.diag(cov[0 * nb : 1 * nb, 0 * nb : 1 * nb]) ** 0.5, "0m": np.diag(cov[1 * nb : 2 * nb, 1 * nb : 2 * nb]) ** 0.5, "2p": np.diag(cov[2 * nb : 3 * nb, 2 * nb : 3 * nb]) ** 0.5, "2m": np.diag(cov[3 * nb : 4 * nb, 3 * nb : 4 * nb]) ** 0.5, "5p": np.diag(cov[4 * nb : 5 * nb, 4 * nb : 5 * nb]) ** 0.5, "5m": np.diag(cov[5 * nb : 6 * nb, 5 * nb : 6 * nb]) ** 0.5, } for j, i in enumerate([0, 2, 5]): f = self.open_output(f"tau{i}", wrapper=True, figsize=(10, 6 * len(STAR_TYPES))) ax = plt.subplot(len(STAR_TYPES), 1, s + 1) tr = mtrans.offset_copy(ax.transData, f.file, 0.05 * (j - 1), 0, units="inches") plt.errorbar( theta, taus[f"{i}p"], errs[f"{i}p"], fmt=".", label=rf"$\tau_{i}+$", capsize=3, color="blue", transform=tr, ) plt.errorbar( theta, taus[f"{i}m"], errs[f"{i}m"], fmt=".", label=rf"$\tau_{i}-$", capsize=3, color="red", transform=tr, ) plt.xscale("log") plt.yscale("symlog") plt.xlabel(r"$\theta$") plt.ylabel(rf"$\tau_{i}(\theta)$") plt.legend() plt.title("Non-tomographic" + STAR_TYPE_NAMES[s]) f.close()
[docs] class TXRoweStatistics(PipelineStage): """ Compute and plot PSF Rowe statistics People sometimes think that these statistics are called the Rho statistics, because we usually use that letter for them. Not so. They are named after the wonderful Barney Rowe, now sadly lost to high finance, who presented the first two of them in MNRAS 404, 350 (2010). """ name = "TXRoweStatistics" parallel = False inputs = [("star_catalog", HDFFile), ("patch_centers", TextFile)] outputs = [ ("rowe134", PNGFile), ("rowe25", PNGFile), ("rowe0", PNGFile), ("rowe_stats", HDFFile), ] config_options = { "min_sep": StageParameter(float, default=0.5, msg="Minimum separation in arcmin."), "max_sep": StageParameter(float, default=250.0, msg="Maximum separation in arcmin."), "nbins": StageParameter(int, default=20, msg="Number of bins."), "bin_slop": StageParameter(float, default=0.01, msg="Bin slop for treecorr."), "sep_units": StageParameter(str, default="arcmin", msg="Separation units."), "psf_size_units": StageParameter(str, default="sigma", msg="Units for PSF size."), "definition": StageParameter(str, default="des-y1", msg="Definition for Rowe statistics."), "subtract_mean": StageParameter(bool, default=False, msg="Subtract mean from data."), "star_type": StageParameter(str, default="PSF-reserved", msg="Star type to use."), "var_method": StageParameter(str, default="bootstrap", msg="Variance method."), "flip_g2": StageParameter(bool, default=False, msg="Flip g2 sign."), } def run(self): import treecorr import h5py import matplotlib matplotlib.use("agg") self.config["num_threads"] = int(os.environ.get("OMP_NUM_THREADS", 1)) ra, dec, e_meas, e_mod, de, T_f, star_type = self.load_stars() rowe_stats = {} for t in STAR_TYPES: s = np.where(star_type == t)[0] if len(s) == 0: continue if self.config["definition"] == "des-y1" or self.config["definition"] == "des-y3": print("Using DES's definition of Rowes") rowe_stats[0, t] = self.compute_rowe(0, s, ra, dec, e_mod, e_mod) rowe_stats[1, t] = self.compute_rowe(1, s, ra, dec, de, de) rowe_stats[2, t] = self.compute_rowe(2, s, ra, dec, de, e_mod) rowe_stats[3, t] = self.compute_rowe(3, s, ra, dec, e_meas * T_f, e_meas * T_f) rowe_stats[4, t] = self.compute_rowe(4, s, ra, dec, de, e_meas * T_f) rowe_stats[5, t] = self.compute_rowe(5, s, ra, dec, e_mod, e_meas * T_f) elif self.config["definition"] == "hsc-y1" or self.config["definition"] == "hsc-y3": print("Using HSC's definition of Rowes") # de = g_meas - g_model # dT = (T_meas - T_model)/Tmodel # rho1 = de x de # rho2 = e_mod x de # rho3 = e_mod(dT/T_mod) x e_mod(dT/T_mod) # rho4 = de(e_mod*dT/T_mod) # rho5 = e_mod*(e_mod*dT/T_mod) rowe_stats[0, t] = self.compute_rowe(0, s, ra, dec, e_mod, e_mod) rowe_stats[1, t] = self.compute_rowe(1, s, ra, dec, de, de) rowe_stats[2, t] = self.compute_rowe(2, s, ra, dec, de, e_mod) rowe_stats[3, t] = self.compute_rowe(3, s, ra, dec, e_mod * T_f, e_mod * T_f) rowe_stats[4, t] = self.compute_rowe(4, s, ra, dec, de, e_mod * T_f) rowe_stats[5, t] = self.compute_rowe(5, s, ra, dec, e_mod, e_meas * T_f) self.save_stats(rowe_stats) self.rowe_plots(rowe_stats) def load_stars(self): with self.open_input("star_catalog") as f: g = f["stars"] ra = g["ra"][:] dec = g["dec"][:] e1meas = g["measured_e1"][:] e2meas = g["measured_e2"][:] e1mod = g["model_e1"][:] e2mod = g["model_e2"][:] if self.config["flip_g2"]: e2meas *= -1 e2mod *= -1 de1 = e1meas - e1mod de2 = e2meas - e2mod if self.config["psf_size_units"] == "Tmeas": T_frac = (g["measured_T"][:] - g["model_T"][:]) / g["measured_T"][:] elif self.config["psf_size_units"] == "Tmodel": T_frac = (g["measured_T"][:] - g["model_T"][:]) / g["model_T"][:] elif self.config["psf_size_units"] == "sigma": T_frac = (g["measured_T"][:] ** 2 - g["model_T"][:] ** 2) / g["measured_T"][:] ** 2 else: sys.exit("Need to specify measured_T: Tmeas/Tmodel/sigma") if self.config["subtract_mean"]: e_meas = np.array((e1meas - np.mean(e1meas), e2meas - np.mean(e2meas))) e_mod = np.array((e1mod - np.mean(e1mod), e2mod - np.mean(e2mod))) de = np.array((de1 - np.mean(de1), de2 - np.mean(de2))) else: e_meas = np.array((e1meas, e2meas)) e_mod = np.array((e1mod, e2mod)) de = np.array((de1, de2)) star_type = load_star_type(g) return ra, dec, e_meas, e_mod, de, T_frac, star_type def compute_rowe(self, i, s, ra, dec, q1, q2): # select a subset of the stars ra = ra[s] dec = dec[s] q1 = q1[:, s] q2 = q2[:, s] n = len(ra) print(f"Computing Rowe statistic rho_{i} from {n} objects") import treecorr corr = treecorr.GGCorrelation(self.config) cat1 = treecorr.Catalog( ra=ra, dec=dec, g1=q1[0], g2=q1[1], ra_units="deg", dec_units="deg", patch_centers=self.get_input("patch_centers"), ) cat2 = treecorr.Catalog( ra=ra, dec=dec, g1=q2[0], g2=q2[1], ra_units="deg", dec_units="deg", patch_centers=self.get_input("patch_centers"), ) corr.process(cat1, cat2) return corr.meanr, corr.xip, corr.xim, corr.varxip**0.5, corr.varxim**0.5 def rowe_plots(self, rowe_stats): # First plot - stats 1,3,4 import matplotlib.pyplot as plt import matplotlib.transforms as mtrans f = self.open_output("rowe0", wrapper=True, figsize=(10, 6 * len(STAR_TYPES))) for s in STAR_TYPES: if STAR_TYPE_NAMES[s] != self.config.star_type: continue ax = plt.subplot(len(STAR_TYPES), 1, s + 1) for j, i in enumerate([0]): theta, xip, xim, xip_err, xim_err = rowe_stats[i, s] tr = mtrans.offset_copy(ax.transData, f.file, 0.05 * (j - 1), 0, units="inches") plt.errorbar( theta, xip, xip_err, fmt=".", label=rf"$\rho_{i}+$", capsize=3, color="blue", transform=tr, ) plt.errorbar( theta, xim, xim_err, fmt=".", label=rf"$\rho_{i}-$", capsize=3, color="blue", transform=tr, ) plt.xscale("log") plt.yscale("symlog") plt.xlabel(r"$\theta$") plt.ylabel(r"$\xi(\theta)$") plt.legend() plt.title(STAR_TYPE_NAMES[s]) f.close() f = self.open_output("rowe134", wrapper=True, figsize=(10, 6 * len(STAR_TYPES))) for s in STAR_TYPES: if STAR_TYPE_NAMES[s] != self.config.star_type: continue ax = plt.subplot(len(STAR_TYPES), 1, s + 1) mkr = ["o", "s", "D"] for j, i in enumerate([1, 3, 4]): theta, xip, xim, xip_err, xim_err = rowe_stats[i, s] tr = mtrans.offset_copy(ax.transData, f.file, 0.05 * (j - 1), 0, units="inches") plt.errorbar( theta, xip, xip_err, marker=mkr[j], label=rf"$\rho_{i}+$", capsize=3, color="blue", transform=tr, ) plt.errorbar( theta, xim, xim_err, marker=mkr[j], label=rf"$\rho_{i}-$", capsize=3, color="red", transform=tr, ) plt.xscale("log") plt.yscale("symlog") plt.xlabel(r"$\theta$") plt.ylabel(r"$\xi(\theta)$") plt.legend() plt.title(STAR_TYPE_NAMES[s]) f.close() f = self.open_output("rowe25", wrapper=True, figsize=(10, 6 * len(STAR_TYPES))) for s in STAR_TYPES: if STAR_TYPE_NAMES[s] != self.config.star_type: continue ax = plt.subplot(len(STAR_TYPES), 1, s + 1) mkr = ["o", "s"] for j, i in enumerate([2, 5]): theta, xip, xim, xip_err, xim_err = rowe_stats[i, s] tr = mtrans.offset_copy(ax.transData, f.file, 0.05 * j - 0.025, 0, units="inches") plt.errorbar( theta, xip, xip_err, fmt=mkr[j], label=rf"$\rho_{i}+$", capsize=3, color="blue", transform=tr, ) plt.errorbar( theta, xim, xim_err, fmt=mkr[j], label=rf"$\rho_{i}-$", capsize=3, color="red", transform=tr, ) plt.title(STAR_TYPE_NAMES[s]) plt.xscale("log") plt.yscale("symlog") plt.xlabel(r"$\theta$") plt.ylabel(r"$\xi(\theta)$") plt.legend() f.close() def save_stats(self, rowe_stats): f = self.open_output("rowe_stats") g = f.create_group("rowe_statistics") for i in 0, 1, 2, 3, 4, 5: for s in STAR_TYPES: if STAR_TYPE_NAMES[s] != self.config.star_type: continue theta, xip, xim, xip_err, xim_err = rowe_stats[i, s] name = STAR_TYPE_NAMES[s] h = g.create_group(f"rowe_{i}_{name}") h.create_dataset("theta", data=theta) h.create_dataset("xi_plus", data=xip) h.create_dataset("xi_minus", data=xim) h.create_dataset("xip_err", data=xip_err) h.create_dataset("xim_err", data=xim_err) f.close()
[docs] class TXGalaxyStarShear(PipelineStage): """ Compute and plot star x galaxy and star x star correlations. These are shape correlations; they differ from the Rowe stats slightly because they measure star values not interpolated PSF values. """ name = "TXGalaxyStarShear" parallel = False inputs = [ ("shear_catalog", ShearCatalog), ("star_catalog", HDFFile), ("shear_tomography_catalog", TomographyCatalog), ] outputs = [ ("star_shear_test", PNGFile), ("star_star_test", PNGFile), ("star_shear_stats", HDFFile), ] config_options = { "min_sep": StageParameter(float, default=0.5, msg="Minimum separation in arcmin."), "max_sep": StageParameter(float, default=250.0, msg="Maximum separation in arcmin."), "nbins": StageParameter(int, default=20, msg="Number of bins."), "bin_slop": StageParameter(float, default=0.1, msg="Bin slop for treecorr."), "sep_units": StageParameter(str, default="arcmin", msg="Separation units."), "psf_size_units": StageParameter(str, default="sigma", msg="Units for PSF size."), "shear_catalog_type": StageParameter(str, default="metacal", msg="Shear catalog type."), "star_type": StageParameter(str, default="PSF-reserved", msg="Star type to use."), "flip_g2": StageParameter(bool, default=False, msg="Flip g2 sign."), } def run(self): import treecorr import h5py import matplotlib matplotlib.use("agg") self.config["num_threads"] = int(os.environ.get("OMP_NUM_THREADS", 1)) ra, dec, e_psf, de_psf, star_type = self.load_stars() ra_gal, dec_gal, g1, g2, weight = self.load_galaxies() # only use reserved stars for this statistics galaxy_star_stats = {} star_star_stats = {} for t in STAR_TYPES: s = star_type == t if STAR_TYPE_NAMES[t] != self.config.star_type: continue galaxy_star_stats[1, t] = self.compute_galaxy_star(ra, dec, e_psf, s, ra_gal, dec_gal, g1, g2, weight) galaxy_star_stats[2, t] = self.compute_galaxy_star(ra, dec, de_psf, s, ra_gal, dec_gal, g1, g2, weight) star_star_stats[1, t] = self.compute_star_star(ra, dec, e_psf, s, ra_gal, dec_gal, e_psf, weight) star_star_stats[2, t] = self.compute_star_star(ra, dec, de_psf, s, ra_gal, dec_gal, de_psf, weight) self.save_stats(galaxy_star_stats, star_star_stats) self.galaxy_star_plots(galaxy_star_stats) self.star_star_plots(star_star_stats) def load_stars(self): with self.open_input("star_catalog") as f: g = f["stars"] ra = g["ra"][:] dec = g["dec"][:] e1 = g["measured_e1"][:] e2 = g["measured_e2"][:] de1 = e1 - g["model_e1"][:] de2 = e2 - g["model_e2"][:] e_psf = np.array((e1, e2)) de_psf = np.array((de1, de2)) star_type = load_star_type(g) return ra, dec, e_psf, de_psf, star_type def load_galaxies(self): # Columns we need from the shear catalog # TODO: not sure of an application where we would want to use true shear but can be added cat_type = read_shear_catalog_type(self) _, cal = Calibrator.load(self.get_input("shear_tomography_catalog")) # load tomography data with self.open_input("shear_tomography_catalog") as f: source_bin = f["tomography/bin"][:] mask = source_bin != -1 # Only use the sources that pass the fiducial cuts if cat_type == "metacal": R_total_2d = f["response/R_S_2d"][:] + f["response/R_gamma_mean_2d"][:] elif cat_type == "metadetect": R_total_2d = f["response/R_2d"][:] with self.open_input("shear_catalog") as f: g = f["shear"] # Get the base catalog for metadetect if cat_type == "metadetect": g = g["00"] ra = g["ra"][:][mask] dec = g["dec"][:][mask] if cat_type == "metacal": g1 = g["g1"][:][mask] g2 = g["g2"][:][mask] weight = g["weight"][:][mask] elif cat_type == "metadetect": g1 = g["g1"][:][mask] g2 = g["g2"][:][mask] weight = g["weight"][:][mask] else: g1 = g["g1"][:][mask] g2 = g["g2"][:][mask] weight = g["weight"][:][mask] sigma_e = g["sigma_e"][:][mask] m = g["m"][:][mask] if self.config["flip_g2"]: g2 *= -1 if cat_type == "metacal" or cat_type == "metadetect": # We use S=0 here because we have already included it in R_total g1, g2 = cal.apply(g1, g2) elif cat_type == "lensfit": # In KiDS, the additive bias is calculated and removed per North and South field # therefore, we add dec to split data into these fields. # You can choose not to by setting dec_cut = 90 in the config, for example. g1, g2 = cal.apply(g1, g2, dec) else: print("Shear calibration type not recognized.") return ra, dec, g1, g2, weight def compute_galaxy_star(self, ra, dec, q, s, ra_gal, dec_gal, g1, g2, weight): # select the star type ra = ra[s] dec = dec[s] q = q[:, s] # PSF quantity, either ellipticity or residual n = len(ra) i = len(ra_gal) print(f"Computing galaxy-cross-star statistic from {n} stars and {i} galaxies") import treecorr corr = treecorr.GGCorrelation(self.config) cat1 = treecorr.Catalog(ra=ra, dec=dec, g1=q[0], g2=q[1], ra_units="deg", dec_units="deg") cat2 = treecorr.Catalog( ra=ra_gal, dec=dec_gal, g1=g1, g2=g2, ra_units="deg", dec_units="deg", w=weight, ) corr.process(cat1, cat2) return corr.meanr, corr.xip, corr.varxip**0.5 def compute_star_star(self, ra, dec, q1, s, ra_gal, dec_gal, q2, weight): # select the star type ra = ra[s] dec = dec[s] q1 = q1[:, s] q2 = q2[:, s] n = len(ra) i = len(ra_gal) print(f"Computing galaxy-cross-star statistic from {n} stars and {i} galaxies") import treecorr corr = treecorr.GGCorrelation(self.config) cat1 = treecorr.Catalog(ra=ra, dec=dec, g1=q1[0], g2=q1[1], ra_units="deg", dec_units="deg") cat2 = treecorr.Catalog(ra=ra, dec=dec, g1=q2[0], g2=q2[1], ra_units="deg", dec_units="deg") corr.process(cat1, cat2) return corr.meanr, corr.xip, corr.varxip**0.5 def galaxy_star_plots(self, galaxy_star_stats): import matplotlib.pyplot as plt import matplotlib.transforms as mtrans f = self.open_output("star_shear_test", wrapper=True, figsize=(10, 6 * len(STAR_TYPES))) TEST_TYPES = ["shear", "residual"] for s in STAR_TYPES: if STAR_TYPE_NAMES[s] != self.config.star_type: continue ax = plt.subplot(len(STAR_TYPES), 1, s + 1) for j, i in enumerate([1, 2]): theta, xi, err = galaxy_star_stats[i, s] plt.errorbar( theta, abs(xi), err, fmt=".", label=f"galaxy cross star {TEST_TYPES[i - 1]}", capsize=3, ) plt.title(STAR_TYPE_NAMES[s]) plt.xscale("log") plt.yscale("log") plt.xlabel(r"$\theta$") plt.ylabel(r"$\xi_+(\theta)$") plt.legend() f.close() def star_star_plots(self, star_star_stats): import matplotlib.pyplot as plt import matplotlib.transforms as mtrans f = self.open_output("star_star_test", wrapper=True, figsize=(10, 6 * len(STAR_TYPES))) TEST_TYPES = ["shear", "residual"] for s in STAR_TYPES: if STAR_TYPE_NAMES[s] != self.config.star_type: continue ax = plt.subplot(len(STAR_TYPES), 1, s + 1) for j, i in enumerate([1, 2]): theta, xi, err = star_star_stats[i, s] plt.errorbar( theta, abs(xi), err, fmt=".", label=f"star cross star {TEST_TYPES[i - 1]}", capsize=3, ) plt.title(STAR_TYPE_NAMES[s]) plt.xscale("log") plt.yscale("log") plt.xlabel(r"$\theta$") plt.ylabel(r"$\xi_+(\theta)$") plt.legend() f.close() def save_stats(self, galaxy_star_stats, star_star_stats): f = self.open_output("star_shear_stats") g = f.create_group("star_cross_galaxy") for i in 1, 2: for s in STAR_TYPES: if STAR_TYPE_NAMES[s] != self.config.star_type: continue theta, xi, err = galaxy_star_stats[i, s] name = STAR_TYPE_NAMES[s] h = g.create_group(f"star_cross_galaxy_{i}_{name}") h.create_dataset("theta", data=theta) h.create_dataset("xi_plus", data=xi) h.create_dataset("xi_err", data=err) g = f.create_group("star_cross_star") for i in 1, 2: for s in STAR_TYPES: if STAR_TYPE_NAMES[s] != self.config.star_type: continue theta, xi, err = star_star_stats[i, s] name = STAR_TYPE_NAMES[s] h = g.create_group(f"star_cross_star_{i}_{name}") h.create_dataset("theta", data=theta) h.create_dataset("xi_plus", data=xi) h.create_dataset("xi_err", data=err) f.close()
[docs] class TXGalaxyStarDensity(PipelineStage): """ Compute and plot star x galaxy and star x star density correlations This version uses the source catalog, though a version with lens samples might also be useful. """ name = "TXGalaxyStarDensity" parallel = False inputs = [ ("shear_catalog", ShearCatalog), ("star_catalog", HDFFile), ("shear_tomography_catalog", TomographyCatalog), ("random_cats", RandomsCatalog), ] outputs = [ ("star_density_test", PNGFile), ("star_density_stats", HDFFile), ] config_options = { "min_sep": StageParameter(float, default=0.5, msg="Minimum separation in arcmin."), "max_sep": StageParameter(float, default=250.0, msg="Maximum separation in arcmin."), "nbins": StageParameter(int, default=20, msg="Number of bins."), "bin_slop": StageParameter(float, default=0.1, msg="Bin slop for treecorr."), "sep_units": StageParameter(str, default="arcmin", msg="Separation units."), "psf_size_units": StageParameter(str, default="sigma", msg="Units for PSF size."), "star_type": StageParameter(str, default="PSF-reserved", msg="Star type to use."), "flip_g2": StageParameter(bool, default=False, msg="Flip g2 sign."), } def run(self): import treecorr import h5py import matplotlib matplotlib.use("agg") self.config["num_threads"] = int(os.environ.get("OMP_NUM_THREADS", 1)) ra, dec, star_type = self.load_stars() ra_gal, dec_gal = self.load_galaxies() ra_random, dec_random = self.load_randoms() galaxy_star_stats = {} for t in STAR_TYPES: s = star_type == t if STAR_TYPE_NAMES[t] != self.config.star_type: continue galaxy_star_stats[1, t] = self.compute_galaxy_star(ra, dec, s, ra_gal, dec_gal, ra_random, dec_random) galaxy_star_stats[2, t] = self.compute_star_star(ra, dec, s, ra_gal, dec_gal, ra_random, dec_random) self.save_stats(galaxy_star_stats) self.galaxy_star_plots(galaxy_star_stats) def load_stars(self): with self.open_input("star_catalog") as f: g = f["stars"] ra = g["ra"][:] dec = g["dec"][:] star_type = load_star_type(g) return ra, dec, star_type def load_randoms(self): with self.open_input("random_cats") as f: group = f["randoms"] ra_random = group["ra"][:] dec_random = group["dec"][:] return ra_random, dec_random def load_galaxies(self): # Columns we need from the shear catalog # TODO: not sure of an application where we would want to use true shear but can be added read_shear_catalog_type(self) # load tomography data with self.open_input("shear_tomography_catalog") as f: source_bin = f["tomography/bin"][:] mask = source_bin != -1 # Only use the sources that pass the fiducial cuts with self.open_input("shear_catalog", wrapper=True) as f: g = f.file["shear"] if f.catalog_type == "metadetect": g = g["00"] ra = g["ra"][:][mask] dec = g["dec"][:][mask] return ra, dec def compute_galaxy_star(self, ra, dec, s, ra_gal, dec_gal, ra_random, dec_random): # select the star type ra = ra[s] dec = dec[s] n = len(ra) i = len(ra_gal) print(f"Computing galaxy-cross-star statistic from {n} stars and {i} galaxies") import treecorr rancat = treecorr.Catalog(ra=ra_random, dec=dec_random, ra_units="degree", dec_units="degree") cat1 = treecorr.Catalog(ra=ra, dec=dec, ra_units="deg", dec_units="deg") cat2 = treecorr.Catalog(ra=ra_gal, dec=dec_gal, ra_units="deg", dec_units="deg") nn = treecorr.NNCorrelation(self.config) rn = treecorr.NNCorrelation(self.config) nr = treecorr.NNCorrelation(self.config) rr = treecorr.NNCorrelation(self.config) nn.process(cat1, cat2) nr.process(cat1, rancat) rn.process(rancat, cat2) rr.process(rancat, rancat) nn.calculateXi(rr=rr, dr=nr, rd=rn) return nn.meanr, nn.xi, nn.varxi**0.5 def compute_star_star(self, ra, dec, s, ra_gal, dec_gal, ra_random, dec_random): # select the star type ra = ra[s] dec = dec[s] n = len(ra) i = len(ra_gal) print(f"Computing galaxy-cross-star statistic from {n} stars and {i} galaxies") import treecorr rancat = treecorr.Catalog(ra=ra_random, dec=dec_random, ra_units="degree", dec_units="degree") cat1 = treecorr.Catalog(ra=ra, dec=dec, ra_units="deg", dec_units="deg") cat2 = treecorr.Catalog(ra=ra_gal, dec=dec_gal, ra_units="deg", dec_units="deg") nn = treecorr.NNCorrelation(self.config) rn = treecorr.NNCorrelation(self.config) nr = treecorr.NNCorrelation(self.config) rr = treecorr.NNCorrelation(self.config) nn.process(cat1, cat1) nr.process(cat1, rancat) rn.process(rancat, cat1) rr.process(rancat, rancat) nn.calculateXi(rr=rr, dr=nr, rd=rn) return nn.meanr, nn.xi, nn.varxi**0.5 def galaxy_star_plots(self, galaxy_star_stats): import matplotlib.pyplot as plt import matplotlib.transforms as mtrans f = self.open_output("star_density_test", wrapper=True, figsize=(10, 6 * len(STAR_TYPES))) TEST_TYPES = ["star cross galaxy", "star cross star"] for s in STAR_TYPES: if STAR_TYPE_NAMES[s] != self.config.star_type: continue ax = plt.subplot(len(STAR_TYPES), 1, s + 1) for j, i in enumerate([1, 2]): theta, xi, err = galaxy_star_stats[i, s] tr = mtrans.offset_copy(ax.transData, f.file, 0.05 * j - 0.025, 0, units="inches") plt.errorbar( theta, abs(xi), err, fmt=".", label=f"{TEST_TYPES[i - 1]} density stats", capsize=3, # transform=tr, ) plt.title(STAR_TYPE_NAMES[s]) plt.xscale("log") plt.yscale("log") plt.xlabel(r"$\theta$") plt.ylabel(r"$\xi_+(\theta)$") plt.legend() f.close() def save_stats(self, galaxy_star_stats): f = self.open_output("star_density_stats") g = f.create_group("star_density") for i in 1, 2: for s in STAR_TYPES: if STAR_TYPE_NAMES[s] != self.config.star_type: continue theta, xi, err = galaxy_star_stats[i, s] name = STAR_TYPE_NAMES[s] h = g.create_group(f"star_density_{i}_{name}") h.create_dataset("theta", data=theta) h.create_dataset("xi_plus", data=xi) h.create_dataset("xi_err", data=err) f.close()
[docs] class TXBrighterFatterPlot(PipelineStage): """ Compute and plot a diagnostic of the brighter-fatter effect This plots the mean e1, e2, and size difference between that interpolated at star locations and the star values themselves, as a function of magnitude. """ name = "TXBrighterFatterPlot" parallel = False inputs = [("star_catalog", HDFFile)] outputs = [ ("brighter_fatter_plot", PNGFile), ("brighter_fatter_data", HDFFile), ] config_options = { "band": StageParameter(str, default="r", msg="Band to use for magnitude."), "nbin": StageParameter(int, default=20, msg="Number of magnitude bins."), "mmin": StageParameter(float, default=18.5, msg="Minimum magnitude."), "mmax": StageParameter(float, default=23.5, msg="Maximum magnitude."), } def run(self): import h5py import matplotlib matplotlib.use("agg") data = self.load_stars() results = {} for s in STAR_TYPES: w = data["star_type"] == s data_s = {k: v[w] for k, v in data.items()} results[s] = self.compute_binned_stats(data_s) self.save_stats(results) self.save_plots(results) def load_stars(self): with self.open_input("star_catalog") as f: g = f["stars"] band = self.config["band"] data = {} data["mag"] = g[f"mag_{band}"][:] data["delta_e1"] = g["measured_e1"][:] - g["model_e1"][:] data["delta_e2"] = g["measured_e2"][:] - g["model_e2"][:] data["delta_T"] = g["measured_T"][:] - g["model_T"][:] data["star_type"] = load_star_type(g) return data def compute_binned_stats(self, data): # Which band this corresponds to depends on the # configuration option chosen mag = data["mag"] mmin = self.config["mmin"] mmax = self.config["mmax"] nbin = self.config["nbin"] # bin edges in magnitude edges = np.linspace(mmin, mmax, nbin + 1) index = np.digitize(mag, edges) # Space for all the output values to go in dT = np.zeros(nbin) errT = np.zeros(nbin) e1 = np.zeros(nbin) e2 = np.zeros(nbin) err1 = np.zeros(nbin) err2 = np.zeros(nbin) m = np.zeros(nbin) for i in range(nbin): # Select only objects where everything is finite, as well # as only thing in this tomographic bin w = np.where( (index == i + 1) & np.isfinite(data["delta_T"]) & np.isfinite(data["delta_e1"]) & np.isfinite(data["delta_e2"]) ) # x-value = mean mag m[i] = mag[w].mean() # y values dT_i = data["delta_T"][w] e1_i = data["delta_e1"][w] e2_i = data["delta_e2"][w] # Mean and error on mean of each of these quantities dT[i] = dT_i.mean() errT[i] = dT_i.std() / np.sqrt(dT_i.size) e1[i] = e1_i.mean() err1[i] = e1_i.std() / np.sqrt(e1_i.size) e2[i] = e2_i.mean() err2[i] = e2_i.std() / np.sqrt(e2_i.size) return [m, dT, errT, e1, err1, e2, err2] def save_plots(self, results): import matplotlib.pyplot as plt band = self.config["band"] n = len(results) width = n * 6 f = self.open_output("brighter_fatter_plot", wrapper=True, figsize=(width, 8)) for s, res in results.items(): m, dT, errT, e1, err1, e2, err2 = res # Top plot - classic BF size plot, the size residual as a function of # magnitude ax = plt.subplot(2, n, 2 * s + 1) plt.title(STAR_TYPE_NAMES[s]) plt.errorbar(m, dT, errT, fmt=".") plt.xlabel(f"{band}-band magnitude") plt.ylabel(r"$T_\mathrm{PSF} - T_\mathrm{model}$ ($\mathrm{arcsec}^2$)") plt.ylim(-0.025, 0.1) # Lower plot - the e1 and e2 residuals as a function of mag plt.subplot(2, n, 2 * s + 2, sharex=ax) plt.title(STAR_TYPE_NAMES[s]) plt.errorbar(m, e1, err1, label="$e_1$", fmt=".") plt.errorbar(m, e2, err2, label="$e_2$", fmt=".") plt.ylabel(r"$e_\mathrm{PSF} - e_\mathrm{model}$") plt.xlabel(f"{band}-band magnitude") # May need to adjust this range plt.ylim(-0.02, 0.02) plt.legend() plt.tight_layout() f.close() def save_stats(self, results): # Save all the stats in results for later plotting # Save to standard HDF5 format. f = self.open_output("brighter_fatter_data") g1 = f.create_group("brighter_fatter") g1.attrs["band"] = self.config["band"] for s, res in results.items(): (m, dT, errT, e1, err1, e2, err2) = res g = g1.create_group(STAR_TYPE_NAMES[s]) g.create_dataset("mag", data=m) g.create_dataset("delta_T", data=dT) g.create_dataset("delta_T_error", data=errT) g.create_dataset("delta_e1", data=e1) g.create_dataset("delta_e1_error", data=err1) g.create_dataset("delta_e2", data=e2) g.create_dataset("delta_e2_error", data=err2) f.close()
def load_star_type(data): used = data["calib_psf_used"][:].astype("int") reserved = data["calib_psf_reserved"][:].astype("int") star_type = np.full(used.size, -99, dtype=int) star_type[used == 1] = STAR_PSF_USED star_type[reserved == 1] = STAR_PSF_RESERVED return star_type