Source code for txpipe.ingest.mocks

from ..base_stage import PipelineStage
from ..data_types import ShearCatalog, HDFFile, TextFile, QPPDFFile
from ..shear_calibration import band_variants, metacal_variants, metadetect_variants
from ceci.config import StageParameter
import numpy as np


[docs] class TXCosmoDC2Mock(PipelineStage): """ Simulate mock shear and photometry measurements from CosmoDC2 (or similar) This stage simulates metacal data and metacalibrated photometry measurements, starting from a cosmology catalogs of the kind used as an input to DC2 image and obs-catalog simulations. This is mainly useful for testing infrastructure in advance of the DC2 catalogs being available, but might also be handy for starting from a purer simulation. """ name = "TXCosmoDC2Mock" parallel = False inputs = [("response_model", HDFFile)] outputs = [ ("shear_catalog", ShearCatalog), ("photometry_catalog", HDFFile), ] config_options = { "cat_name": StageParameter(str, "cosmoDC2", msg="Name of the mock catalog to use."), "visits_per_band": StageParameter(int, 165, msg="Number of visits per band for noise simulation."), "snr_limit": StageParameter(float, 4.0, msg="S/N limit for object selection."), "max_size": StageParameter(int, 99999999999999, msg="Maximum catalog size for testing."), "extra_cols": StageParameter(str, "", msg="Extra columns to include (space-separated)."), "max_npix": StageParameter(int, 99999999999999, msg="Maximum number of pixels."), "unit_response": StageParameter(bool, False, msg="Whether to use unit response in simulation."), "cat_size": StageParameter(int, 0, msg="Pre-computed catalog size, if known."), "flip_g2": StageParameter( bool, True, msg="Whether to flip g2 sign to match conventions. TreeCorr and NaMaster use flip_g2=True" ), "apply_mag_cut": StageParameter(bool, False, msg="Apply magnitude cut for descqa comparison."), "Mag_r_limit": StageParameter(float, -19, msg="Magnitude r limit for object selection."), "metadetect": StageParameter( bool, True, msg="Whether to make a metadetect-style catalog (True) or metacal (False)." ), "add_shape_noise": StageParameter(bool, True, msg="Whether to add shape noise to simulation."), "healpixels": StageParameter(list, [-1], msg="List of HEALPix pixels to use."), } def data_iterator(self, gc): # Columns we need from the cosmo simulation cols = [ "mag_true_u_lsst", "mag_true_g_lsst", "mag_true_r_lsst", "mag_true_i_lsst", "mag_true_z_lsst", "mag_true_y_lsst", "ra", "dec", "ellipticity_1_true", "ellipticity_2_true", "shear_1", "shear_2", "size_true", "galaxy_id", "redshift_true", ] # Add any extra requestd columns cols += self.config["extra_cols"].split() it = gc.get_quantities(cols, return_iterator=True) nfile = len(gc._file_list) if hasattr(gc, "_file_list") else 0 for i, data in enumerate(it): if nfile: j = i + 1 print(f"Loading chunk {j}/{nfile}") yield data def load_catalog(self): import GCRCatalogs cat_name = self.config["cat_name"] self.bands = ("u", "g", "r", "i", "z", "y") print(f"Loading from catalog {cat_name}") kw = {} if self.config["healpixels"] != [-1]: kw = {"config_overwrite": {"healpix_pixels": self.config["healpixels"]}} gc = GCRCatalogs.load_catalog(cat_name, **kw) # GCR sometimes tries to read the entire catalog # to measure its length rather than looking at metadata # this can take a very long time. # allow the user to say that already know it. N = self.config["cat_size"] if N == 0: N = len(gc) return gc, N def run(self): gc, N = self.load_catalog() print(f"Rank {self.rank} loaded: length = {N}.") target_size = min(N, self.config["max_size"]) select_fraction = target_size / N if target_size != N: print(f"Will select approx {100 * select_fraction:.2f}% of objects ({target_size})") # Prepare output files metacal_file = self.open_output("shear_catalog", parallel=self.is_mpi()) photo_file = self.open_output("photometry_catalog", parallel=self.is_mpi()) photo_cols = self.setup_photometry_output(photo_file, target_size) if self.config["metadetect"]: metacal_cols = self.setup_metadetect_output(metacal_file, target_size) else: metacal_cols = self.setup_metacal_output(metacal_file, target_size) # Load the metacal response file self.load_metacal_response_model() # Keep track of catalog position start = 0 count = 0 # Loop through chunks of for data in self.data_iterator(gc): # The initial chunk size, of all the input data. # This will be reduced later as we remove objects some_col = list(data.keys())[0] chunk_size = len(data[some_col]) print(f"Process {self.rank} read chunk {count} - {count + chunk_size} of {N}") count += chunk_size # Select a random fraction of the catalog if we are cutting down # We can't just take the earliest galaxies because they are ordered # by redshift if target_size != N: select = np.random.uniform(size=chunk_size) < select_fraction nselect = select.sum() print(f"Cutting down to {nselect}/{chunk_size} objects") for name in list(data.keys()): data[name] = data[name][select] # Simulate the various output data sets mock_photometry = self.make_mock_photometry(data) # Cut out any objects too faint to be detected and measured. # We have to do this after the photometry, so that we know if # the object is detected, but we can do it before making the mock # metacal info, saving us some time simulating un-needed objects if self.config["snr_limit"] > 0: # otherwise there is no need to run this function which is slow self.remove_undetected(data, mock_photometry) if self.config["apply_mag_cut"]: self.apply_magnitude_cut(data) if self.config["metadetect"]: mock_shear = self.make_mock_metadetect(data, mock_photometry) else: mock_shear = self.make_mock_metacal(data, mock_photometry) # The chunk size has now changed some_col = list(mock_photometry.keys())[0] chunk_size = len(mock_photometry[some_col]) # start is where this process should start writing this # chunk of data. end is where the final process will finish # writing, becoming the starting point for the whole next # chunk over all the processes start, end = self.next_output_indices(start, chunk_size) # Save all output self.write_output( start, target_size, photo_cols, metacal_cols, photo_file, mock_photometry, metacal_file, mock_shear, ) # The next iteration starts writing where the current one ends. start = end if start >= target_size: break # Tidy up photo_file.close() metacal_file.close() self.truncate_outputs(end) def truncate_outputs(self, n): import h5py if self.comm is not None: self.comm.Barrier() def visitor(name, node): if isinstance(node, h5py.Dataset): print(f"Resizing {name}") node.resize((n,)) if self.rank == 0: # all files should now be closed for all procs print(f"Resizing all outupts to size {n}") with h5py.File(self.get_output("photometry_catalog"), "r+") as f: f["photometry"].visititems(visitor) with h5py.File(self.get_output("shear_catalog"), "r+") as f: f["shear"].visititems(visitor) def next_output_indices(self, start, chunk_size): if self.comm is None: end = start + chunk_size else: all_indices = self.comm.allgather(chunk_size) starting_points = np.concatenate(([0], np.cumsum(all_indices))) # use the old start to find the end point. # the final starting point (not used below, since it is larger # than the largest self.rank value) is the total data length end = start + starting_points[-1] start = start + starting_points[self.rank] print(f"- Rank {self.rank} writing output to {start}-{start + chunk_size}") return start, end def setup_photometry_output(self, photo_file, target_size): # Get a list of all the column names cols = ["ra", "dec", "extendedness"] for band in self.bands: cols.append(f"mag_{band}") cols.append(f"mag_{band}_err") cols.append(f"snr_{band}") for col in self.config["extra_cols"].split(): cols.append(col) # Make group for all the photometry group = photo_file.create_group("photometry") group.attrs["bands"] = self.bands # Extensible columns becase we don't know the size yet. # We will cut down the size at the end. for col in cols: group.create_dataset(col, (target_size,), maxshape=(target_size,), dtype="f8") # The only non-float column for now group.create_dataset("id", (target_size,), maxshape=(target_size,), dtype="i8") return cols + ["id"] def setup_metadetect_output(self, metacal_file, target_size): # Get a list of all the column names cols = metadetect_variants( "g1", "g2", "T", "s2n", "T_err", "ra", "dec", "psf_g1", "psf_g2", "psf_g1", "psf_g2", "psf_T_mean", "weight", ) + band_variants("riz", "mag", "mag_err", shear_catalog_type="metadetect") # Store the truth values only for the primary catalog cols += ["00/true_g1", "00/true_g2", "00/redshift_true"] # Make group for all the photometry group = metacal_file.create_group("shear") group.attrs["bands"] = self.bands # Extensible columns becase we don't know the size yet. # We will cut down the size at the end. for col in cols: group.create_dataset(col, (target_size,), maxshape=(target_size,), dtype="f8") # Integer columns int_cols = metadetect_variants("id", "flags") for col in int_cols: group.create_dataset(col, (target_size,), maxshape=(target_size,), dtype="i8") return cols + int_cols def setup_metacal_output(self, metacal_file, target_size): # Get a list of all the column names cols = ( [ "ra", "dec", "psf_g1", "psf_g2", "psf_g1", "psf_g2", "psf_T_mean", ] + metacal_variants("g1", "g2", "T", "s2n", "T_err") + band_variants("riz", "mag", "mag_err", shear_catalog_type="metacal") + ["weight"] ) cols += ["true_g1", "true_g2", "redshift_true"] # Make group for all the photometry group = metacal_file.create_group("shear") group.attrs["bands"] = self.bands # Extensible columns becase we don't know the size yet. # We will cut down the size at the end. for col in cols: group.create_dataset(col, (target_size,), maxshape=(target_size,), dtype="f8") group.create_dataset("id", (target_size,), maxshape=(target_size,), dtype="i8") for col in metacal_variants("flags"): group.create_dataset(col, (target_size,), maxshape=(target_size,), dtype="i8") return cols + ["id", "flags"] def load_metacal_response_model(self): """ Load an HDF file containing the response model R(log10(snr), size) R_std(log10(snr), size) where R is the mean metacal response in a bin and R_std is its standard deviation. So far only one of these files exists! """ import scipy.interpolate if self.config["unit_response"]: return model_file = self.open_input("response_model") snr_centers = model_file["R_model/log10_snr"][:] sz_centers = model_file["R_model/size"][:] R_mean = model_file["R_model/R_mean"][:] R_std = model_file["R_model/R_std"][:] model_file.close() # Save a 2D spline snr_grid, sz_grid = np.meshgrid(snr_centers, sz_centers) self.R_spline = scipy.interpolate.SmoothBivariateSpline( snr_grid.T.flatten(), sz_grid.T.flatten(), R_mean.flatten(), w=R_std.flatten(), ) self.Rstd_spline = scipy.interpolate.SmoothBivariateSpline( snr_grid.T.flatten(), sz_grid.T.flatten(), R_std.flatten() ) def write_output( self, start, target_size, photo_cols, metacal_cols, photo_file, photo_data, metacal_file, metacal_data, ): """ Save the photometry we have just simulated to disc Parameters ---------- photo_file: HDF File object metacal_file: HDF File object photo_data: dict Dictionary of simulated photometry metacal_data: dict Dictionary of simulated metacal data """ # Work out the range of data to output (since we will be # doing this in chunks). If we have cut down to a random # subset of the catalog then we may have gone over the # target length, depending on the random selection n = len(photo_data["id"]) end = min(start + n, target_size) # assert photo_data['id'].min()>0 # Save each column for name in photo_cols: photo_file[f"photometry/{name}"][start:end] = photo_data[name] for name in metacal_cols: metacal_file[f"shear/{name}"][start:end] = metacal_data[name] def make_mock_photometry(self, data): # The visit count affects the overall noise levels n_visit = self.config["visits_per_band"] # Do all the work in the function below photo = make_mock_photometry(n_visit, self.bands, data, self.config["unit_response"]) for col in self.config["extra_cols"].split(): photo[col] = data[col] return photo def make_mock_metadetect(self, data, photo): """ !!! This function is not working properly yet!!! Generate a mock metadetect table. This is a long and complicated function unfortunately. Throughout we assume a single R = R11 = R22, with R12 = R21 = 0 Parameters ---------- data: dict Dictionary of arrays read from the input cosmo catalog photo: dict Dictionary of arrays generated to simulate photometry """ # These are the numbers from figure F1 of the DES Y1 shear catalog paper # (this version is not yet public but is awaiting a second referee response) # Overall SNR for the three bands usually used for shape measurement # We use the true SNR not the estimated one, though these are pretty close snr = (photo["snr_r"] ** 2 + photo["snr_i"] ** 2 + photo["snr_z"] ** 2) ** 0.5 snr_1p = (photo["snr_r_1p"] ** 2 + photo["snr_i_1p"] ** 2 + photo["snr_z_1p"] ** 2) ** 0.5 snr_1m = (photo["snr_r_1m"] ** 2 + photo["snr_i_1m"] ** 2 + photo["snr_z_1m"] ** 2) ** 0.5 snr_2p = (photo["snr_r_2p"] ** 2 + photo["snr_i_2p"] ** 2 + photo["snr_z_2p"] ** 2) ** 0.5 snr_2m = (photo["snr_r_2m"] ** 2 + photo["snr_i_2m"] ** 2 + photo["snr_z_2m"] ** 2) ** 0.5 if self.config["unit_response"]: assert np.allclose(snr, snr_1p) assert np.allclose(snr, snr_2m) nobj = snr.size log10_snr = np.log10(snr) # Convert from the half-light radius which is in the # input catalogs to a sigma. Do this by pretending # that it is a Gaussian. This is clearly wrong, and # if this causes major errors in the size cuts we may # have to modify this size_hlr = data["size_true"] size_sigma = size_hlr / np.sqrt(2 * np.log(2)) size_T = 2 * size_sigma**2 # Use a fixed PSF across all the objects psf_fwhm = 0.75 psf_sigma = psf_fwhm / (2 * np.sqrt(2 * np.log(2))) psf_T = 2 * psf_sigma**2 if self.config["unit_response"]: R = 1.0 R_size = 0.0 else: # Use the response model to get a reasonable response # value for this size and SNR R_mean = self.R_spline(log10_snr, size_T, grid=False) R_std = self.Rstd_spline(log10_snr, size_T, grid=False) # Assume a 0.2 correlation between the size response # and the shear response. rho = 0.2 f = np.random.multivariate_normal([0.0, 0.0], [[1.0, rho], [rho, 1.0]], nobj).T R, R_size = f * R_std + R_mean # Convert magnitudes to fluxes according to the baseline # use in the metacal numbers flux_r = 10**0.4 * (27 - photo["mag_r"]) flux_i = 10**0.4 * (27 - photo["mag_i"]) flux_z = 10**0.4 * (27 - photo["mag_z"]) # Note that this is delta_gamma not 2*delta_gamma, because # of how we use it below delta_gamma = 0.01 # Use a fixed shape noise per component to generate # an overall if self.config["add_shape_noise"]: shape_noise = 0.26 else: shape_noise = 0.0 eps = np.random.normal(0, shape_noise, nobj) + 1.0j * np.random.normal(0, shape_noise, nobj) # True shears without shape noise g1 = data["shear_1"] g2 = data["shear_2"] if self.config["flip_g2"]: g2 *= -1 # Do the full combination of (g,epsilon) -> e, not the approx one g = g1 + 1j * g2 e = (eps + g) / (1 + g.conj() * eps) e1 = e.real e2 = e.imag zero = np.zeros(nobj) # Now collect together everything to go into the metacal # file output = { # Basic values "00/id": photo["id"], "1p/id": photo["id"], "1m/id": photo["id"], "2p/id": photo["id"], "2m/id": photo["id"], "00/ra": photo["ra"], "1p/ra": photo["ra"], "1m/ra": photo["ra"], "2p/ra": photo["ra"], "2m/ra": photo["ra"], "00/dec": photo["dec"], "1p/dec": photo["dec"], "1m/dec": photo["dec"], "2p/dec": photo["dec"], "2m/dec": photo["dec"], # Keep the truth values for use in some testing paths # We are not pretending to do meta "00/true_g1": g1, "00/true_g2": g2, "00/redshift_true": photo["redshift_true"], # g1 "00/g1": e1 * R, "1p/g1": (e1 + delta_gamma) * R, "1m/g1": (e1 - delta_gamma) * R, "2p/g1": e1 * R, "2m/g1": e1 * R, # g2 "00/g2": e2 * R, "1p/g2": e2 * R, "1m/g2": e2 * R, "2p/g2": (e2 + delta_gamma) * R, "2m/g2": (e2 - delta_gamma) * R, # T "00/T": size_T, "1p/T": size_T + R_size * delta_gamma, "1m/T": size_T - R_size * delta_gamma, "2p/T": size_T + R_size * delta_gamma, "2m/T": size_T - R_size * delta_gamma, # Terr "00/T_err": zero, "1p/T_err": zero, "1m/T_err": zero, "2p/T_err": zero, "2m/T_err": zero, # size "00/s2n": snr, "1p/s2n": snr_1p, "1m/s2n": snr_1m, "2p/s2n": snr_2p, "2m/s2n": snr_2m, # Magntiudes and fluxes, just copied from the inputs. "00/mag_r": photo["mag_r"], "00/mag_i": photo["mag_i"], "00/mag_z": photo["mag_z"], "00/mag_err_r": photo["mag_r_err"], "00/mag_err_i": photo["mag_i_err"], "00/mag_err_z": photo["mag_z_err"], "1p/mag_r": photo["mag_r_1p"], "2p/mag_r": photo["mag_r_2p"], "1m/mag_r": photo["mag_r_1m"], "2m/mag_r": photo["mag_r_2m"], "1p/mag_i": photo["mag_i_1p"], "2p/mag_i": photo["mag_i_2p"], "1m/mag_i": photo["mag_i_1m"], "2m/mag_i": photo["mag_i_2m"], "1p/mag_z": photo["mag_z_1p"], "2p/mag_z": photo["mag_z_2p"], "1m/mag_z": photo["mag_z_1m"], "2m/mag_z": photo["mag_z_2m"], "1p/mag_err_r": photo["mag_r_err"], "2p/mag_err_r": photo["mag_r_err"], "1m/mag_err_r": photo["mag_r_err"], "2m/mag_err_r": photo["mag_r_err"], "1p/mag_err_i": photo["mag_i_err"], "2p/mag_err_i": photo["mag_i_err"], "1m/mag_err_i": photo["mag_i_err"], "2m/mag_err_i": photo["mag_i_err"], "1p/mag_err_z": photo["mag_z_err"], "2p/mag_err_z": photo["mag_z_err"], "1m/mag_err_z": photo["mag_z_err"], "2m/mag_err_z": photo["mag_z_err"], # Fixed PSF parameters - all round with same size "00/psf_g1": zero, "1p/psf_g1": zero, "1m/psf_g1": zero, "2p/psf_g1": zero, "2m/psf_g1": zero, "00/psf_g2": zero, "1p/psf_g2": zero, "1m/psf_g2": zero, "2p/psf_g2": zero, "2m/psf_g2": zero, "00/psf_g1": zero, "1p/psf_g1": zero, "1m/psf_g1": zero, "2p/psf_g1": zero, "2m/psf_g1": zero, "00/psf_g2": zero, "1p/psf_g2": zero, "1m/psf_g2": zero, "2p/psf_g2": zero, "2m/psf_g2": zero, "00/psf_T_mean": np.repeat(psf_T, nobj), "1p/psf_T_mean": np.repeat(psf_T, nobj), "1m/psf_T_mean": np.repeat(psf_T, nobj), "2p/psf_T_mean": np.repeat(psf_T, nobj), "2m/psf_T_mean": np.repeat(psf_T, nobj), # Everything that gets this far should be used, so flag=0 "00/flags": np.zeros(nobj, dtype=np.int32), "1p/flags": np.zeros(nobj, dtype=np.int32), "1m/flags": np.zeros(nobj, dtype=np.int32), "2p/flags": np.zeros(nobj, dtype=np.int32), "2m/flags": np.zeros(nobj, dtype=np.int32), # we use weights of one for everything for metacal # if that ever changes we may also need to add # weight_1p, etc. "00/weight": np.ones(nobj), "1p/weight": np.ones(nobj), "1m/weight": np.ones(nobj), "2p/weight": np.ones(nobj), "2m/weight": np.ones(nobj), } return output def make_mock_metacal(self, data, photo): """ I copied the old version here to use if we do not want metadetect. Generate a mock metacal table. This is a long and complicated function unfortunately. Throughout we assume a single R = R11 = R22, with R12 = R21 = 0 Parameters ---------- data: dict Dictionary of arrays read from the input cosmo catalog photo: dict Dictionary of arrays generated to simulate photometry """ # These are the numbers from figure F1 of the DES Y1 shear catalog paper # (this version is not yet public but is awaiting a second referee response) # Overall SNR for the three bands usually used for shape measurement # We use the true SNR not the estimated one, though these are pretty close snr = (photo["snr_r"] ** 2 + photo["snr_i"] ** 2 + photo["snr_z"] ** 2) ** 0.5 snr_1p = (photo["snr_r_1p"] ** 2 + photo["snr_i_1p"] ** 2 + photo["snr_z_1p"] ** 2) ** 0.5 snr_1m = (photo["snr_r_1m"] ** 2 + photo["snr_i_1m"] ** 2 + photo["snr_z_1m"] ** 2) ** 0.5 snr_2p = (photo["snr_r_2p"] ** 2 + photo["snr_i_2p"] ** 2 + photo["snr_z_2p"] ** 2) ** 0.5 snr_2m = (photo["snr_r_2m"] ** 2 + photo["snr_i_2m"] ** 2 + photo["snr_z_2m"] ** 2) ** 0.5 if self.config["unit_response"]: assert np.allclose(snr, snr_1p) assert np.allclose(snr, snr_2m) nobj = snr.size log10_snr = np.log10(snr) # Convert from the half-light radius which is in the # input catalogs to a sigma. Do this by pretending # that it is a Gaussian. This is clearly wrong, and # if this causes major errors in the size cuts we may # have to modify this size_hlr = data["size_true"] size_sigma = size_hlr / np.sqrt(2 * np.log(2)) size_T = 2 * size_sigma**2 # Use a fixed PSF across all the objects psf_fwhm = 0.75 psf_sigma = psf_fwhm / (2 * np.sqrt(2 * np.log(2))) psf_T = 2 * psf_sigma**2 if self.config["unit_response"]: R = 1.0 R_size = 0.0 else: # Use the response model to get a reasonable response # value for this size and SNR R_mean = self.R_spline(log10_snr, size_T, grid=False) R_std = self.Rstd_spline(log10_snr, size_T, grid=False) # Assume a 0.2 correlation between the size response # and the shear response. rho = 0.2 f = np.random.multivariate_normal([0.0, 0.0], [[1.0, rho], [rho, 1.0]], nobj).T R, R_size = f * R_std + R_mean # Convert magnitudes to fluxes according to the baseline # use in the metacal numbers flux_r = 10**0.4 * (27 - photo["mag_r"]) flux_i = 10**0.4 * (27 - photo["mag_i"]) flux_z = 10**0.4 * (27 - photo["mag_z"]) # Note that this is delta_gamma not 2*delta_gamma, because # of how we use it below delta_gamma = 0.01 # Use a fixed shape noise per component to generate # an overall if self.config["add_shape_noise"]: shape_noise = 0.26 else: shape_noise = 0.0 eps = np.random.normal(0, shape_noise, nobj) + 1.0j * np.random.normal(0, shape_noise, nobj) # True shears without shape noise g1 = data["shear_1"] g2 = data["shear_2"] if self.config["flip_g2"]: g2 *= -1 # Do the full combination of (g,epsilon) -> e, not the approx one g = g1 + 1j * g2 e = (eps + g) / (1 + g.conj() * eps) e1 = e.real e2 = e.imag zero = np.zeros(nobj) # Now collect together everything to go into the metacal # file output = { # Basic values "id": photo["id"], "ra": photo["ra"], "dec": photo["dec"], # Keep the truth value just in case "true_g1": g1, "true_g2": g2, # add true redshift since it is used in source selector "redshift_true": photo["redshift_true"], # g1 "g1": e1 * R, "g1_1p": (e1 + delta_gamma) * R, "g1_1m": (e1 - delta_gamma) * R, "g1_2p": e1 * R, "g1_2m": e1 * R, # g2 "g2": e2 * R, "g2_1p": e2 * R, "g2_1m": e2 * R, "g2_2p": (e2 + delta_gamma) * R, "g2_2m": (e2 - delta_gamma) * R, # T "T": size_T, "T_1p": size_T + R_size * delta_gamma, "T_1m": size_T - R_size * delta_gamma, "T_2p": size_T + R_size * delta_gamma, "T_2m": size_T - R_size * delta_gamma, # Terr "T_err": zero, "T_err_1p": zero, "T_err_1m": zero, "T_err_2p": zero, "T_err_2m": zero, # size "s2n": snr, "s2n_1p": snr_1p, "s2n_1m": snr_1m, "s2n_2p": snr_2p, "s2n_2m": snr_2m, # Magntiudes and fluxes, just copied from the inputs. "mag_r": photo["mag_r"], "mag_i": photo["mag_i"], "mag_z": photo["mag_z"], "mag_err_r": photo["mag_r_err"], "mag_err_i": photo["mag_i_err"], "mag_err_z": photo["mag_z_err"], "mag_r_1p": photo["mag_r_1p"], "mag_r_2p": photo["mag_r_2p"], "mag_r_1m": photo["mag_r_1m"], "mag_r_2m": photo["mag_r_2m"], "mag_i_1p": photo["mag_i_1p"], "mag_i_2p": photo["mag_i_2p"], "mag_i_1m": photo["mag_i_1m"], "mag_i_2m": photo["mag_i_2m"], "mag_z_1p": photo["mag_z_1p"], "mag_z_2p": photo["mag_z_2p"], "mag_z_1m": photo["mag_z_1m"], "mag_z_2m": photo["mag_z_2m"], "mag_err_r_1p": photo["mag_r_err"], "mag_err_r_2p": photo["mag_r_err"], "mag_err_r_1m": photo["mag_r_err"], "mag_err_r_2m": photo["mag_r_err"], "mag_err_i_1p": photo["mag_i_err"], "mag_err_i_2p": photo["mag_i_err"], "mag_err_i_1m": photo["mag_i_err"], "mag_err_i_2m": photo["mag_i_err"], "mag_err_z_1p": photo["mag_z_err"], "mag_err_z_2p": photo["mag_z_err"], "mag_err_z_1m": photo["mag_z_err"], "mag_err_z_2m": photo["mag_z_err"], # Fixed PSF parameters - all round with same size "psf_g1": zero, "psf_g2": zero, "psf_T_mean": np.repeat(psf_T, nobj), # Everything that gets this far should be used, so flag=0 "flags": np.zeros(nobj, dtype=np.int32), "flags_1p": np.zeros(nobj, dtype=np.int32), "flags_1m": np.zeros(nobj, dtype=np.int32), "flags_2p": np.zeros(nobj, dtype=np.int32), "flags_2m": np.zeros(nobj, dtype=np.int32), # we use weights of one for everything for metacal # if that ever changes we may also need to add # weight_1p, etc. "weight": np.ones(nobj), } return output def apply_magnitude_cut(self, data): """ Allow for a cut in absolute magnitude. """ mag_limit = self.config["Mag_r_limit"] sel = data["Mag_true_r_sdss_z0"] < mag_limit ndet = sel.sum() ntot = sel.size fract = ndet * 100.0 / ntot print(f"{ndet} objects pass magnitude cut out of {ntot} objects ({fract:.1f}%)") # Remove all objects not selected for key in list(data.keys()): data[key] = data[key][sel] def remove_undetected(self, data, photo): """ Strip out any undetected objects from the two simulated data sets. Use a configuration parameter snr_limit to decide on the detection limit. """ snr_limit = self.config["snr_limit"] # This will become a boolean array in a minute when # we OR it with an array detected = False # Check if detected in any band. Makes a boolean array # Even though we started with just a single False. for band in self.bands: detected_in_band = photo[f"snr_{band}"] > snr_limit not_detected_in_band = ~detected_in_band # Set objects not detected in one band that are detected in another # to inf magnitude in that band, and the SNR to zero. # We have to do this for each of the variants also, because otherwise # we end up with wildly different final SNR values later. # This is the metadetection issue really! for v in metacal_variants(f"snr_{band}"): photo[v][not_detected_in_band] = 0.0 for v in metacal_variants(f"mag_{band}"): photo[v][not_detected_in_band] = np.inf # Record that we have detected this object at all detected |= detected_in_band # the protoDC2 sims have an edge with zero shear. # Remove it. zero_shear_edge = (abs(data["shear_1"]) == 0) & (abs(data["shear_2"]) == 0) print("Removing {} objects with identically zero shear in both terms".format(zero_shear_edge.sum())) detected &= ~zero_shear_edge # Print out interesting information ndet = detected.sum() ntot = detected.size fract = ndet * 100.0 / ntot print(f"Detected {ndet} out of {ntot} objects ({fract:.1f}%)") # Remove all objects not detected in *any* band # make a copy of the keys with list(photo.keys()) so we are not # modifying during the iteration for key in list(photo.keys()): photo[key] = photo[key][detected] for key in list(data.keys()): data[key] = data[key][detected]
[docs] class TXBuzzardMock(TXCosmoDC2Mock): """ Simulate mock photometry from Buzzard. May be obsolete. """ name = "TXBuzzardMock" parallel = False inputs = [("response_model", HDFFile)] outputs = [ ("shear_catalog", ShearCatalog), ("photometry_catalog", HDFFile), ] config_options = { "cat_name": StageParameter(str, "buzzard", msg="Name of the mock catalog to use."), "visits_per_band": StageParameter(int, 165, msg="Number of visits per band for noise simulation."), "snr_limit": StageParameter(float, 4.0, msg="S/N limit for object selection."), "max_size": StageParameter(int, 99999999999999, msg="Maximum catalog size for testing."), "extra_cols": StageParameter(str, "", msg="Extra columns to include (space-separated)."), "max_npix": StageParameter(int, 99999999999999, msg="Maximum number of pixels."), "unit_response": StageParameter(bool, False, msg="Whether to use unit response in simulation."), "flip_g2": StageParameter(bool, True, msg="Whether to flip g2 sign to match conventions."), }
[docs] class TXGaussianSimsMock(TXCosmoDC2Mock): """ Simulate mock photometry from gaussian simulations This stage simulates metacal data and metacalibrated photometry measurements, starting from simple Gaussian simulations produced starting from CCL power spectra and poission sampling galaxies from it. This is mainly useful for testing infrastructure starting from a purer simulation. """ name = "TXGaussianSimsMock" parallel = False inputs = [("response_model", HDFFile)] outputs = [ ("shear_catalog", ShearCatalog), ("photometry_catalog", HDFFile), ] config_options = { "cat_name": StageParameter(str, "GaussianSims", msg="Name of the Gaussian simulation catalog."), "visits_per_band": StageParameter(int, 165, msg="Number of visits per band for noise simulation."), "snr_limit": StageParameter(float, 0.0, msg="S/N limit for object selection (0 for all)."), "max_size": StageParameter(int, 99999999999999, msg="Maximum catalog size for testing."), "extra_cols": StageParameter(str, "", msg="Extra columns to include (space-separated)."), "max_npix": StageParameter(int, 99999999999999, msg="Maximum number of pixels."), "unit_response": StageParameter(bool, True, msg="Whether to use unit response in simulation."), "cat_size": StageParameter(int, 0, msg="Catalog size (0 for all)."), "flip_g2": StageParameter(bool, False, msg="Whether to flip g2 sign to match conventions."), "apply_mag_cut": StageParameter(bool, False, msg="Apply magnitude cut for descqa comparison."), "metadetect": StageParameter( bool, True, msg="Whether to make a metadetect-style catalog (True) or metacal (False)." ), "add_shape_noise": StageParameter(bool, False, msg="Whether to add shape noise to simulation."), } def data_iterator(self, cat): # all cols we need ( ra, dec, g1, g2, z, m_u, m_g, m_r, m_i, m_z, m_y, etrue1, etrue2, size, galaxy_id, ) = cat # figuring out number of chunks chunk_size = 1_000_000 nchunk = len(ra) // chunk_size if nchunk * chunk_size < len(ra): nchunk += 1 # main loop for i in range(nchunk): # start and end index of this chunk in full data s = chunk_size * i e = s + chunk_size # make dict just for this chunk of data data_chunk = {} data_chunk["ra"] = ra[s:e] data_chunk["dec"] = dec[s:e] data_chunk["shear_1"] = g1[s:e] data_chunk["shear_2"] = g2[s:e] data_chunk["size_true"] = size[s:e] data_chunk["galaxy_id"] = galaxy_id[s:e] data_chunk["redshift_true"] = z[s:e] data_chunk["ellipticity_1_true"] = etrue1[s:e] data_chunk["ellipticity_2_true"] = etrue2[s:e] data_chunk["mag_true_u_lsst"] = m_u[s:e] data_chunk["mag_true_g_lsst"] = m_g[s:e] data_chunk["mag_true_r_lsst"] = m_r[s:e] data_chunk["mag_true_i_lsst"] = m_i[s:e] data_chunk["mag_true_z_lsst"] = m_z[s:e] data_chunk["mag_true_y_lsst"] = m_y[s:e] # send this chunk of data back to caller yield data_chunk def load_catalog(self): """ This method loads the Gaussian sims produced in this notebook https://github.com/LSSTDESC/txpipe-cosmodc2/blob/master/notebooks/MaskAreaGaussianSims_and_FakeTXPipeInput.ipynb """ cat_name = self.config["cat_name"] self.bands = ("u", "g", "r", "i", "z", "y") print(f"Loading from catalog {cat_name}") cat = np.load(cat_name, allow_pickle=True) N = len(cat[0]) return cat, N
def make_mock_photometry(n_visit, bands, data, unit_response): """ Generate a mock photometric table with noise added This is mostly from LSE‐40 by Zeljko Ivezic, Lynne Jones, and Robert Lupton retrieved here: http://faculty.washington.edu/ivezic/Teaching/Astr511/LSST_SNRdoc.pdf """ output = {} nobj = data["galaxy_id"].size output["ra"] = data["ra"] output["dec"] = data["dec"] output["id"] = data["galaxy_id"] output["extendedness"] = np.ones(nobj) # Sky background, seeing FWHM, and system throughput, # all from table 2 of Ivezic, Jones, & Lupton B_b = np.array([85.07, 467.9, 1085.2, 1800.3, 2775.7, 3614.3]) fwhm = np.array([0.77, 0.73, 0.70, 0.67, 0.65, 0.63]) T_b = np.array([0.0379, 0.1493, 0.1386, 0.1198, 0.0838, 0.0413]) # effective pixels size for a Gaussian PSF, from equation # 27 of Ivezic, Jones, & Lupton pixel = 0.2 # arcsec N_eff = 2.436 * (fwhm / pixel) ** 2 # other numbers from Ivezic, Jones, & Lupton sigma_inst2 = 10.0**2 # instrumental noise in photons per pixel, just below eq 42 gain = 1 # ADU units per photon, also just below eq 42 D = 6.5 # primary mirror diameter in meters, from LSST key numbers page (effective clear diameter) # Not sure what effective clear diameter means but it's closer to the number used in the paper time = 30.0 # seconds per exposure, from LSST key numbers page sigma_b2 = 0.0 # error on background, just above eq 42 # combination of these used below, from various equations factor = 5455.0 / gain * (D / 6.5) ** 2 * (time / 30.0) # Fake some metacal responses if unit_response: mag_responses = [0.0 for i in bands] else: mag_responses = generate_mock_metacal_mag_responses(bands, nobj) delta_gamma = 0.01 # this is the half-delta gamma, i.e. gamma_+ - gamma_0 # that's the right thing to use here because we are doing m+ = m0 + dm/dy*dy # Use the same response for gamma1 and gamma2 for band, b_b, t_b, n_eff, mag_resp in zip(bands, B_b, T_b, N_eff, mag_responses): # truth magnitude mag = data[f"mag_true_{band}_lsst"] output[f"mag_true_{band}_lsst"] = mag # expected signal photons, over all visits c_b = factor * 10 ** (0.4 * (25 - mag)) * t_b * n_visit # expected background photons, over all visits background = np.sqrt((b_b + sigma_inst2 + sigma_b2) * n_eff * n_visit) # total expected photons mu = c_b + background # Observed number of photons in excess of the expected background. # This can go negative for faint magnitudes, indicating that the object is # not going to be detected n_obs = np.random.poisson(mu) - background n_obs_err = np.sqrt(mu) # signal to noise, true and estimated values true_snr = c_b / background obs_snr = n_obs / background # observed magnitude from inverting c_b expression above mag_obs = 25 - 2.5 * np.log10(n_obs / factor / t_b / n_visit) # converting error on n_obs to error on mag mag_err = 2.5 / np.log(10.0) / obs_snr output[f"true_snr_{band}"] = true_snr output[f"snr_{band}"] = obs_snr output[f"mag_{band}"] = mag_obs output[f"mag_err_{band}"] = mag_err output[f"mag_{band}_err"] = mag_err m = mag_resp * delta_gamma m1 = mag_obs + m m2 = mag_obs - m mag_obs_1p = m1 mag_obs_1m = m2 output[f"mag_{band}_1p"] = m1 output[f"mag_{band}_1m"] = m2 output[f"mag_{band}_2p"] = m1 output[f"mag_{band}_2m"] = m2 # Scale the SNR values according the to change in magnitude.r s = np.power(10.0, -0.4 * m) snr1 = obs_snr * s snr2 = obs_snr / s output[f"snr_{band}_1p"] = snr1 output[f"snr_{band}_1m"] = snr2 output[f"snr_{band}_2p"] = snr1 output[f"snr_{band}_2m"] = snr2 return output def generate_mock_metacal_mag_responses(bands, nobj): nband = len(bands) mu = np.zeros(nband) # seems approx mean of response across bands, from HSC tract rho = 0.25 # approx correlation between response in bands, from HSC tract sigma2 = 1.7**2 # approx variance of response, from HSC tract covmat = np.full((nband, nband), rho * sigma2) np.fill_diagonal(covmat, sigma2) mag_responses = np.random.multivariate_normal(mu, covmat, nobj).T return mag_responses
[docs] class TXSimpleMock(PipelineStage): """ Load an ascii astropy table and put it in shear catalog format. """ name = "TXSimpleMock" parallel = False inputs = [("mock_shear_catalog", TextFile)] outputs = [("shear_catalog", ShearCatalog)] config_options = {} def run(self): from astropy.table import Table import numpy as np # Load the data. We are assuming here it is small enough to fit in memory input_filename = self.get_input("mock_shear_catalog") input_data = Table.read(input_filename, format="ascii") n = len(input_data) data = {} # required columns for col in ["ra", "dec", "g1", "g2", "s2n", "T"]: data[col] = input_data[col] # It's most likely we will have a redshift column. # Check for both that and "redshift_true" if "redshift" in input_data.colnames: data["redshift_true"] = input_data["redshift"] elif "redshift_true" in input_data.colnames: data["redshift_true"] = input_data["redshift_true"] # If there is an ID column then use it, but otherwise just use # sequential IDs if "id" in input_data.colnames: data["galaxy_id"] = input_data["id"] else: data["galaxy_id"] = np.arange(len(input_data)) # if these catalogs are not present then we fake them. defaults = { "T_err": 0.0, "psf_g1": 0.0, "psf_g2": 0.0, "psf_T_mean": 0.202, # this corresponds to a FWHM of 0.75 arcsec "weight": 1.0, "flags": 0, } for key, value in defaults.items(): if key in input_data.colnames: data[key] = input_data[key] else: data[key] = np.full(n, value) self.save_catalog(data) def save_catalog(self, data): with self.open_output("shear_catalog") as f: g = f.create_group("shear") g.attrs["catalog_type"] = "simple" for key, value in data.items(): g.create_dataset(key, data=value)
[docs] class TXMockTruthPZ(PipelineStage): name = "TXMockTruthPZ" parallel = False inputs = [("shear_catalog", ShearCatalog)] outputs = [("photoz_pdfs", QPPDFFile)] config_options = { "mock_sigma_z": StageParameter(float, 0.001, msg="Sigma_z for mock photo-z PDF generation."), } def run(self): import qp import numpy as np sigma_z = self.config["mock_sigma_z"] # read the input truth redshifts with self.open_input("shear_catalog", wrapper=True) as f: group = f.file[f.get_primary_catalog_group()] n = group["ra"].size redshifts = group["redshift_true"][:] zgrid = np.linspace(0, 3, 301) pdfs = np.zeros((n, len(zgrid))) spread_z = sigma_z * (1 + redshifts) # make a gaussian PDF for each object delta = zgrid[np.newaxis, :] - redshifts[:, np.newaxis] pdfs = np.exp(-0.5 * (delta / spread_z[:, np.newaxis]) ** 2) / np.sqrt(2 * np.pi) / spread_z[:, np.newaxis] q = qp.Ensemble(qp.interp, data=dict(xvals=zgrid, yvals=pdfs)) q.set_ancil(dict(zmode=redshifts, zmean=redshifts, zmedian=redshifts)) q.write_to(self.get_output("photoz_pdfs"))
def test(): import pylab data = { "ra": None, "dec": None, "galaxy_id": None, } bands = "ugrizy" n_visit = 165 M5 = [24.22, 25.17, 24.74, 24.38, 23.80] for b, m5 in zip(bands, M5): data[f"mag_true_{b}_lsst"] = np.repeat(m5, 10000) results = make_mock_photometry(n_visit, bands, data, True) pylab.hist(results["snr_r"], bins=50, histtype="step") pylab.savefig("snr_r.png")