"""
These functions return the rotation periods and effective temperatures for
single members of benchmark open clusters. These are the functions used to
acquire and clean the data behind Figure 1 of BPH23. Generally, most users
will want to just download the data from the paper, rather than using these
functions. An additional dependency if you do want to use them is the
``cdips`` module, best installed via a setup.py install from
https://github.com/lgbouma/cdips.
Get all available cluster data:
| _get_cluster_Prot_Teff_data
Get and clean individual cluster data:
| get_alphaPer
| get_Pleiades
| get_Blanco1
| get_PscEri
| get_NGC3532
| get_GroupX
| get_Praesepe_Rampalli_2021
| get_NGC6811
| get_NGC6819
| get_Ruprecht147
| get_M67
"""
#############
## LOGGING ##
#############
import logging
from gyrointerp import log_sub, log_fmt, log_date_fmt
DEBUG = False
if DEBUG:
level = logging.DEBUG
else:
level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
level=level,
style=log_sub,
format=log_fmt,
datefmt=log_date_fmt,
)
LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception
#############
## IMPORTS ##
#############
import os
import numpy as np, pandas as pd
from astropy.table import Table
from astropy.io import fits
from copy import deepcopy
from matplotlib import cm
try:
from cdips.utils.gaiaqueries import (
given_dr3_sourceids_get_dr2_xmatch, given_source_ids_get_gaia_data,
given_source_ids_get_neighbor_counts
)
HAVE_GAIA_UTILS = 1
except (ModuleNotFoundError, AssertionError) as e:
# if cdips is not installed (github.com/lgbouma/cdips), and specifically if
# the gaia credentials have not been correctly configured at
# ~/.gaia_credentials per ``cdips.utils.gaiaqueries``, suppress the
# associated error.
LOGWARNING(e)
HAVE_GAIA_UTILS = 0
from gyrointerp.paths import LOCALDIR, DATADIR, RESULTSDIR
from gyrointerp.extinctionpriors import extinction_A_V_dict
from gyrointerp.teff import (
given_dr2_BpmRp_AV_get_Teff_Curtis2020, _given_VmKs_get_Teff,
_given_GmKs_get_Teff
)
from gyrointerp.helpers import (
prepend_colstr, left_merge, given_dr2_get_dr3_dataframes
)
def _get_cluster_Prot_Teff_data(N_colors=5, logo_colors=0):
"""
This function retrieves dataframes of all available reference cluster data.
It also defines the colors / labels / zorders used across many plots in the
BPH23 manuscript.
"""
assert N_colors in [5,6,7]
overwrite = 0
df_aper = get_alphaPer(overwrite=overwrite)
df_psceri = get_PscEri(overwrite=overwrite)
df_bla1 = get_Blanco1(overwrite=overwrite)
df_plei = get_Pleiades(overwrite=overwrite)
df_3532 = get_NGC3532(overwrite=overwrite)
df_grpx = get_GroupX(overwrite=overwrite)
df_prae = get_Praesepe_Rampalli_2021(overwrite=overwrite)
df_6811 = get_NGC6811(overwrite=overwrite)
df_6819 = get_NGC6819(overwrite=overwrite)
df_r147 = get_Ruprecht147(overwrite=overwrite)
df_m67 = get_M67(overwrite=overwrite)
# tested, manuscript
#cmap = cm.terrain(np.linspace(0,0.8,N_colors))
#cmap = cm.Accent(np.linspace(0,0.6,N_colors))
#cmap = cm.hsv(np.linspace(0,0.8,N_colors))
#cmap = [f"C{ix}" for ix in range(N_colors)]
#cmap = cm.Set3(np.linspace(0,0.5,N_colors))
#cmap = cm.tab20c(np.linspace(0.1,1,N_colors))
# adopted, manuscript
cmap = ["#5CC8CB", "#2B64C6", "#F9E16A",
"#81C83D", '#EC702D', '#E9A2AE',
"k"]
if logo_colors:
# ADOPTED LOGO COLORS
cmap = [None, "#ffb3ba", None, "#ffffba","#bae1ff", None]
manual_colors = 0
if manual_colors:
cmap = cm.RdYlBu(np.linspace(0,1,N_colors))
cmap = [None, cmap[0], None, cmap[1], cmap[2], None]
# contents:
# prot/teff dataframe, RGB color, label, zorder
z0 = 2
d = {
'α Per': [df_aper, cmap[0], '80 Myr α Per', z0+12, "o"],
'Pleiades': [df_plei, cmap[1], '120 Myr Pleiades', z0+2, "o"],
'Blanco-1': [df_bla1, cmap[1], '120 Myr Blanco-1', z0+2, "8"],
'Psc-Eri': [df_psceri, cmap[1], '120 Myr Psc-Eri', z0+2, "p"],
'NGC-3532': [df_3532, cmap[2], '300 Myr NGC-3532', z0+4, "o"],
'Group-X': [df_grpx, cmap[2], '300 Myr Group-X', z0+4, "8"],
'Praesepe': [df_prae, cmap[3], '670 Myr Praesepe', z0+6, "o"],
'NGC-6811': [df_6811, cmap[4], '1 Gyr NGC-6811', z0+8, "o"],
'NGC-6819': [df_6819, cmap[5], '2.5 Gyr NGC-6819', z0+10, "8"],
'Ruprecht-147': [df_r147, cmap[5], '2.7 Gyr Rup-147', z0+10, "o"],
'M67': [df_m67, cmap[6], '4 Gyr M67', z0+12, "o"],
'120-Myr': [None, cmap[1], '', None, None],
'300-Myr': [None, cmap[2], '', None, None],
'2.6-Gyr': [None, cmap[5], '', None, None],
}
return d
[docs]def get_NGC6819(overwrite=0):
"""
Return NGC-6819 (Meibom+2015), as processed by Curtis+2020 (Table 5).
"""
cluster = 'ngc6819'
outdir = os.path.join(DATADIR, "interim", cluster)
if not os.path.exists(outdir): os.mkdir(outdir)
cachepath = os.path.join(outdir, "Curtis_2020_ngc6819_X_GDR3_supplemented.csv")
if os.path.exists(cachepath) and not overwrite:
LOGINFO(f"Found {cachepath}, and not overwrite; returning.")
return pd.read_csv(cachepath)
fitspath = os.path.join(DATADIR, "literature",
"Curtis_2020_t5_composite_923_rows.fits")
hdul = fits.open(fitspath)
df = Table(hdul[1].data).to_pandas()
hdul.close()
# require finite Gaia DR2 source_id. (Drop 2 rows of the 440, wide
# binaries).
df = df[df.Cluster == "NGC 6819"]
dr2_source_ids = np.array(df.GaiaDR2).astype(np.int64)
gdf, s_dr3 = given_dr2_get_dr3_dataframes(
dr2_source_ids, "Curtis_2020_ngc6819_DR2", "Curtis_2020_ngc6819_DR3",
overwrite=overwrite
)
mdf = deepcopy(df)
mdf = mdf.rename({'GaiaDR2':'dr2_source_id'}, axis='columns')
mdf['dr2_bp_rp'] = mdf['BP-RP'] # gaia dr2 BP-RP
# Merge
mdf0 = left_merge(mdf, s_dr3, 'dr2_source_id', 'dr2_source_id')
mdf = left_merge(mdf0, gdf, 'dr3_source_id', 'dr3_source_id')
mdf['Teff_Curtis20'] = given_dr2_BpmRp_AV_get_Teff_Curtis2020(
np.array(mdf.dr2_bp_rp), extinction_A_V_dict["NGC-6819"]
)
dGmag = 3.25 # anything within 20x the brightness of the target
sep_arcsec = 1.5*3.98 # and 1.5 Kepler pixels
runid = "Curtis_2020_ngc6819_neighbor_count"
count_df, _ = given_source_ids_get_neighbor_counts(
np.array(mdf.dr3_source_id).astype(np.int64), dGmag, sep_arcsec,
runid, overwrite=overwrite
)
assert np.all(
count_df.source_id.astype(str) == mdf.dr3_source_id.astype(str)
)
mdf['nbhr_count'] = count_df['nbhr_count']
mdf['flag_nbhr_count'] = (
mdf['nbhr_count'] >= 1
)
if "abs_magnitude_difference" not in mdf:
mdf["abs_magnitude_difference"] = np.abs(mdf.magnitude_difference)
mdf["flag_bad_gaiamatch"] = (
(mdf.angular_distance > 2)
|
(mdf.abs_magnitude_difference > 0.2)
)
mdf["M_G"] = mdf['phot_g_mean_mag'] + 5*np.log10(mdf['parallax']/1e3) + 5
# NOTE: CAMD and RV cuts already done by Curtis+2020, so omitting.
mdf["flag_ruwe_outlier"] = mdf.ruwe > 1.2
mdf["flag_possible_binary"] = (
(mdf.flag_ruwe_outlier)
|
(mdf.non_single_star)
|
(mdf.flag_nbhr_count)
|
(mdf.flag_bad_gaiamatch)
)
# follow advice from
# https://vizier.cfa.harvard.edu/viz-bin/VizieR-3?-source=J/ApJ/904/140/table1&-out.max=50&-out.form=HTML%20Table&-oc.form=sexa
# "Bench" column notes / definition
mdf['flag_pass_author_quality'] = mdf.Prot > 0
mdf["flag_benchmark_period"] = (
mdf.flag_pass_author_quality
#&
#(~mdf.flag_possible_binary)
&
(~mdf.flag_bad_gaiamatch)
)
mdf.to_csv(cachepath, index=False)
LOGINFO(f"Wrote {cachepath}")
return mdf
[docs]def get_Ruprecht147(overwrite=0):
"""
Return Curtis+2020 (Table 1) rotation period dataframe with keys:
"Prot", "Teff_Curtis20", "flag_benchmark_period" (for gyro calibration),
_plus_ the usual Gaia DR3 columns.
NOTE:
The "flag_possible_binary" flag in this case is weaker than in the
other comparison clusters, because there aren't very many good stars.
"""
cluster = 'ruprecht147'
outdir = os.path.join(DATADIR, "interim", cluster)
if not os.path.exists(outdir): os.mkdir(outdir)
cachepath = os.path.join(outdir, "Curtis_2020_rup147_X_GDR3_supplemented.csv")
if os.path.exists(cachepath) and not overwrite:
LOGINFO(f"Found {cachepath}, and not overwrite; returning.")
return pd.read_csv(cachepath)
fitspath = os.path.join(DATADIR, "literature",
"Curtis_2020_t1_ruprecht147_440_rows.fits")
hdul = fits.open(fitspath)
df = Table(hdul[1].data).to_pandas()
hdul.close()
# require finite Gaia DR2 source_id. (Drop 2 rows of the 440, wide
# binaries).
df = df[(~pd.isnull(df.GaiaDR2)) & (df.GaiaDR2 > 0)]
dr2_source_ids = np.array(df.GaiaDR2).astype(np.int64)
gdf, s_dr3 = given_dr2_get_dr3_dataframes(
dr2_source_ids, "Curtis_2020_rup147_DR2", "Curtis_2020_rup147_DR3",
overwrite=overwrite
)
mdf = deepcopy(df)
mdf = mdf.rename({'GaiaDR2':'dr2_source_id'}, axis='columns')
mdf = mdf.drop(['ruwe'], axis='columns')
mdf['dr2_bp_rp'] = mdf['BP-RP'] # gaia dr2 BP-RP
# Merge
mdf0 = left_merge(mdf, s_dr3, 'dr2_source_id', 'dr2_source_id')
mdf = left_merge(mdf0, gdf, 'dr3_source_id', 'dr3_source_id')
mdf['Teff_Curtis20'] = given_dr2_BpmRp_AV_get_Teff_Curtis2020(
np.array(mdf.dr2_bp_rp), extinction_A_V_dict["Ruprecht-147"]
)
dGmag = 3.25 # anything within 20x the brightness of the target
sep_arcsec = 1.5*3.98 # and 1.5 Kepler pixels
runid = "Curtis_2020_rup147_neighbor_count"
count_df, _ = given_source_ids_get_neighbor_counts(
np.array(mdf.dr3_source_id).astype(np.int64), dGmag, sep_arcsec,
runid, overwrite=overwrite
)
assert np.all(
count_df.source_id.astype(str) == mdf.dr3_source_id.astype(str)
)
mdf['nbhr_count'] = count_df['nbhr_count']
mdf['flag_nbhr_count'] = (
mdf['nbhr_count'] >= 1
)
if "abs_magnitude_difference" not in mdf:
mdf["abs_magnitude_difference"] = np.abs(mdf.magnitude_difference)
mdf["flag_bad_gaiamatch"] = (
(mdf.angular_distance > 2)
|
(mdf.abs_magnitude_difference > 0.2)
)
mdf["M_G"] = mdf['phot_g_mean_mag'] + 5*np.log10(mdf['parallax']/1e3) + 5
# NOTE: CAMD and RV cuts already done by Curtis+2020, so omitting.
mdf["flag_ruwe_outlier"] = mdf.ruwe > 1.2
mdf["flag_possible_binary"] = (
(mdf.flag_ruwe_outlier)
|
(mdf.non_single_star)
|
(mdf.flag_nbhr_count)
|
(mdf.flag_bad_gaiamatch)
)
mdf['flag_pass_author_quality'] = (
(mdf.Bench == 'Yes')
&
(mdf.Prot > 0)
)
# follow advice from
# https://vizier.cfa.harvard.edu/viz-bin/VizieR-3?-source=J/ApJ/904/140/table1&-out.max=50&-out.form=HTML%20Table&-oc.form=sexa
# "Bench" column notes / definition
mdf["flag_benchmark_period"] = (
(mdf.flag_pass_author_quality)
&
(~mdf.flag_possible_binary)
)
mdf.to_csv(cachepath, index=False)
LOGINFO(f"Wrote {cachepath}")
return mdf
[docs]def get_NGC6811(overwrite=0):
"""
Returns the Curtis+2019 (Table 1) rotation period dataframe with keys:
"Prot", "Teff_Curtis20", "flag_benchmark_period" (for gyro calibration),
_plus_ "flag_possible_binary", "flag_ruwe_outlier", "flag_camd_outlier",
"flag_rverror_outlier", _plus_ the usual Gaia DR3 columns.
"""
cluster = 'ngc6811'
outdir = os.path.join(DATADIR, "interim", cluster)
if not os.path.exists(outdir): os.mkdir(outdir)
cachepath = os.path.join(outdir, "Curtis_2019_X_GDR3_supplemented.csv")
if os.path.exists(cachepath) and not overwrite:
LOGINFO(f"Found {cachepath}, and not overwrite; returning.")
return pd.read_csv(cachepath)
fitspath = os.path.join(DATADIR, "literature",
"Curtis_2019_table1_171rows.fits")
hdul = fits.open(fitspath)
df = Table(hdul[1].data).to_pandas()
hdul.close()
dr2_source_ids = np.array(df.Gaia).astype(np.int64)
gdf, s_dr3 = given_dr2_get_dr3_dataframes(
dr2_source_ids, "Curtis_2019_ngc6811_DR2", "Curtis_2019_ngc6811_DR3",
overwrite=overwrite
)
mdf = deepcopy(df)
mdf = mdf.rename({'Gaia':'dr2_source_id'}, axis='columns')
gcdf = given_source_ids_get_gaia_data(
dr2_source_ids, "Curtis_2019_dr2_colors", n_max=10000,
overwrite=overwrite, enforce_all_sourceids_viable=True,
which_columns='g.source_id, g.bp_rp', gaia_datarelease='gaiadr2'
)
gcdf = gcdf.rename({"source_id":"dr2_source_id"}, axis='columns')
assert np.all(gcdf.dr2_source_id.astype(str) == mdf.dr2_source_id.astype(str))
mdf['dr2_bp_rp'] = gcdf['bp_rp']
# Merge
mdf0 = left_merge(mdf, s_dr3, 'dr2_source_id', 'dr2_source_id')
mdf = left_merge(mdf0, gdf, 'dr3_source_id', 'dr3_source_id')
mdf['Teff_Curtis20'] = given_dr2_BpmRp_AV_get_Teff_Curtis2020(
np.array(mdf.dr2_bp_rp), extinction_A_V_dict["Pleiades"]
)
dGmag = 3.25 # anything within 20x the brightness of the target
sep_arcsec = 1.5*3.98 # and 1.5 Kepler pixels
runid = "Curtis_2019_neighbor_count"
count_df, _ = given_source_ids_get_neighbor_counts(
np.array(mdf.dr3_source_id).astype(np.int64), dGmag, sep_arcsec,
runid, overwrite=overwrite
)
assert np.all(
count_df.source_id.astype(str) == mdf.dr3_source_id.astype(str)
)
mdf['nbhr_count'] = count_df['nbhr_count']
mdf['flag_nbhr_count'] = (
mdf['nbhr_count'] >= 1
)
if "abs_magnitude_difference" not in mdf:
mdf["abs_magnitude_difference"] = np.abs(mdf.magnitude_difference)
mdf["flag_bad_gaiamatch"] = (
(mdf.angular_distance > 2)
|
(mdf.abs_magnitude_difference > 0.2)
)
mdf["M_G"] = mdf['phot_g_mean_mag'] + 5*np.log10(mdf['parallax']/1e3) + 5
tempcsv = os.path.join(outdir, f"{cluster}.csv")
camd_outliercsv = os.path.join(
outdir, f"camd_outliers.csv"
)
rverror_outliercsv = os.path.join(
outdir, f"rverror_outliers.csv"
)
if not os.path.exists(tempcsv):
mdf.to_csv(tempcsv, index=False)
errmsg = (
f'Wrote {tempcsv}. Open in glue, and manually select CAMD '
f'outliers in M_G vs bp_rp, g_rp, bp_g and write '
f'to {camd_outliercsv}. Do the same in radial_velocity_error '
f'versus bp_rp, and save to {rverror_outliercsv}\n'
)
errmsg += 42*'-'
LOGINFO(42*'-')
raise AssertionError(errmsg)
assert os.path.exists(camd_outliercsv)
assert os.path.exists(rverror_outliercsv)
df_camd_outlier = pd.read_csv(camd_outliercsv)
df_rverror_outlier = pd.read_csv(rverror_outliercsv)
mdf["flag_ruwe_outlier"] = mdf.ruwe > 1.2
mdf["flag_camd_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_camd_outlier.dr3_source_id.astype(str)
)
mdf["flag_rverror_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_rverror_outlier.dr3_source_id.astype(str)
)
mdf["flag_possible_binary"] = (
(mdf.flag_ruwe_outlier)
|
(mdf.flag_camd_outlier)
|
(mdf.flag_rverror_outlier)
|
(mdf.non_single_star)
|
(mdf.flag_nbhr_count)
|
(mdf.flag_bad_gaiamatch)
)
mdf["flag_pass_author_quality"] = (
(mdf.f_Seq == 'Y')
)
mdf["flag_benchmark_period"] = (
(mdf.flag_pass_author_quality)
&
(~mdf.flag_possible_binary)
)
mdf = mdf.rename({"Per":"Prot"}, axis='columns')
mdf.to_csv(cachepath, index=False)
LOGINFO(f"Wrote {cachepath}")
return mdf
[docs]def get_Blanco1(overwrite=0):
"""
Returns the Gillen+2020 (Table 1) rotation period dataframe with keys:
"Prot", "Teff_Curtis20", "flag_benchmark_period" (for gyro calibration),
_plus_ "flag_possible_binary", "flag_ruwe_outlier", "flag_camd_outlier",
"flag_rverror_outlier", _plus_ the usual Gaia DR3 columns.
"""
cluster = 'blanco1'
authoryr = "Gillen_2020"
outdir = os.path.join(DATADIR, "interim", cluster)
if not os.path.exists(outdir): os.mkdir(outdir)
cachepath = os.path.join(outdir, f"{authoryr}_X_GDR3_supplemented.csv")
if os.path.exists(cachepath) and not overwrite:
LOGINFO(f"Found {cachepath}, and not overwrite; returning.")
return pd.read_csv(cachepath)
csvpath = os.path.join(DATADIR, "literature",
"Gillen_2020_Blanco1_table1.dat")
df = pd.read_csv(csvpath, delim_whitespace=True)
dr2_source_ids = np.array(df.Gaia_ID).astype(np.int64)
df['dr2_source_id'] = dr2_source_ids
df['dr2_bp_rp'] = df['BP_mag'] - df['RP_mag']
gdf, s_dr3 = given_dr2_get_dr3_dataframes(
dr2_source_ids, f"{authoryr}_{cluster}_DR2", f"{authoryr}_{cluster}_DR3",
overwrite=overwrite
)
mdf = deepcopy(df)
# Merge
mdf0 = left_merge(mdf, s_dr3, 'dr2_source_id', 'dr2_source_id')
mdf = left_merge(mdf0, gdf, 'dr3_source_id', 'dr3_source_id')
mdf['Teff_Curtis20'] = given_dr2_BpmRp_AV_get_Teff_Curtis2020(
np.array(mdf.dr2_bp_rp), extinction_A_V_dict["Blanco-1"]
)
dGmag = 3.25 # anything within 20x the brightness of the target
sep_arcsec = 1*21 # and 1 TESS pixel
runid = f"{authoryr}_{cluster}_neighbor_count"
count_df, _ = given_source_ids_get_neighbor_counts(
np.array(mdf.dr3_source_id).astype(np.int64), dGmag, sep_arcsec,
runid, overwrite=overwrite
)
assert (
np.all((count_df.source_id.astype(str) ==
mdf.dr3_source_id.astype(str)))
and
np.all(count_df.index == mdf.index)
)
mdf['nbhr_count'] = count_df['nbhr_count']
mdf['flag_nbhr_count'] = (
mdf['nbhr_count'] >= 1
)
if "abs_magnitude_difference" not in mdf:
mdf["abs_magnitude_difference"] = np.abs(mdf.magnitude_difference)
mdf["flag_bad_gaiamatch"] = (
(mdf.angular_distance > 2)
|
(mdf.abs_magnitude_difference > 0.2)
)
mdf["M_G"] = mdf['phot_g_mean_mag'] + 5*np.log10(mdf['parallax']/1e3) + 5
tempcsv = os.path.join(outdir, f"{cluster}.csv")
camd_outliercsv = os.path.join(
outdir, f"camd_outlier.csv"
)
rverror_outliercsv = os.path.join(
outdir, f"rverror_outlier.csv"
)
if not os.path.exists(tempcsv):
mdf.to_csv(tempcsv, index=False)
errmsg = (
f'Wrote {tempcsv}. Open in glue, and manually select CAMD '
f'outliers in M_G vs bp_rp, g_rp, bp_g and write '
f'to {camd_outliercsv}. Do the same in radial_velocity_error '
f'versus bp_rp, and save to {rverror_outliercsv}\n'
)
errmsg += 42*'-'
LOGINFO(42*'-')
raise AssertionError(errmsg)
assert os.path.exists(camd_outliercsv)
assert os.path.exists(rverror_outliercsv)
df_camd_outlier = pd.read_csv(camd_outliercsv)
df_rverror_outlier = pd.read_csv(rverror_outliercsv)
mdf["flag_ruwe_outlier"] = mdf.ruwe > 1.2
mdf["flag_camd_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_camd_outlier.dr3_source_id.astype(str)
)
mdf["flag_rverror_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_rverror_outlier.dr3_source_id.astype(str)
)
mdf["flag_possible_binary"] = (
(mdf.flag_camd_outlier)
|
(mdf.flag_rverror_outlier)
|
(mdf.non_single_star)
|
(mdf.flag_nbhr_count)
|
(mdf.flag_bad_gaiamatch)
)
mdf['flag_pass_author_quality'] = (
~mdf.mult.str.contains("r") # RV binaries from Gillen+20 table
)
mdf["flag_benchmark_period"] = (
(~mdf.flag_possible_binary)
&
(mdf.flag_pass_author_quality)
)
mdf = mdf.rename({"P_adopt": "Prot"}, axis='columns')
mdf.to_csv(cachepath, index=False)
LOGINFO(f"Wrote {cachepath}")
return mdf
[docs]def get_Pleiades(overwrite=0):
"""
Returns the Rebull+2016 (Table 2) rotation period dataframe with keys:
"Prot", "Teff_Curtis20", "flag_benchmark_period" (for gyro calibration),
_plus_ "flag_possible_binary", "flag_ruwe_outlier", "flag_camd_outlier",
"flag_rverror_outlier", _plus_ the usual Gaia DR3 columns.
"""
cluster = 'pleiades'
outdir = os.path.join(DATADIR, "interim", cluster)
if not os.path.exists(outdir): os.mkdir(outdir)
cachepath = os.path.join(outdir, "Rebull_2016_X_GDR3_supplemented.csv")
if os.path.exists(cachepath) and not overwrite:
LOGINFO(f"Found {cachepath}, and not overwrite; returning.")
return pd.read_csv(cachepath)
fitspath = os.path.join(DATADIR, "literature",
"Rebull_2016_Table2_759_stars.fits")
hdul = fits.open(fitspath)
df = Table(hdul[1].data).to_pandas()
hdul.close()
# Interpolate effective temperatures from the Mamajek table and (V-Ks)0.
# (less reddening dependence than B-V).
df['Teff_Mamajek'] = _given_VmKs_get_Teff(df['__V-K_0'].astype(float))
# Crossmatch from EPIC --> DR2, using the 1" crossmatch from
# https://gaia-kepler.fun/, downloaded to LOCALDIR
# Keep matches with the closest K2 to Gaia DR2 angular distance as the
# default best match.
# This procedure misses five objects, presumably due to errors in
# the proper motion propagation:
# EPICs ['210966700', '211059981', '210903023', '220115919', '211128979']
epic_dr2_path = os.path.join(LOCALDIR, "k2_dr2_1arcsec.fits")
hdul = fits.open(epic_dr2_path)
epic_df = Table(hdul[1].data).to_pandas()
hdul.close()
get_best_epic = lambda _df: (
_df.sort_values(by='k2_gaia_ang_dist').
drop_duplicates(subset='epic_number', keep='first')
)
epic_df = get_best_epic(epic_df)
selcols = ['source_id', 'epic_number', 'tm_name', 'k2_gaia_ang_dist',
'bp_rp']
epic_df = epic_df[selcols]
epic_df = epic_df.rename(
{'source_id':'dr2_source_id', 'bp_rp':'dr2_bp_rp'}, axis='columns'
)
epic_df['dr2_source_id'] = epic_df.dr2_source_id.astype(str)
mdf = left_merge(df, epic_df, 'EPIC', 'epic_number')
N_missing = len(mdf[pd.isnull(mdf.dr2_source_id)])
LOGINFO(f'Missing {N_missing}/{len(df)} in the Rebull16->GDR2 crossmatch.')
LOGINFO('Dropping them!')
mdf = mdf[~pd.isnull(mdf.dr2_source_id)]
mdf['dr2_source_id'] = mdf.dr2_source_id.astype(str)
assert len(mdf) == len(df) - N_missing
dr2_source_ids = np.array(mdf.dr2_source_id).astype(np.int64)
gdf, s_dr3 = given_dr2_get_dr3_dataframes(
dr2_source_ids, "Rebull_2016_DR2", "Rebull_2016_DR3_gaiadr3",
overwrite=overwrite
)
# Merge
mdf0 = left_merge(mdf, s_dr3, 'dr2_source_id', 'dr2_source_id')
mdf = left_merge(mdf0, gdf, 'dr3_source_id', 'dr3_source_id')
mdf['Teff_Curtis20'] = given_dr2_BpmRp_AV_get_Teff_Curtis2020(
np.array(mdf.dr2_bp_rp), extinction_A_V_dict["Pleiades"]
)
dGmag = 3.25 # anything within 100x the brightness of the target
sep_arcsec = 1.5*3.98 # and 1.5 Kepler pixels
runid = "Rebull_2016_neighbor_count"
count_df, _ = given_source_ids_get_neighbor_counts(
np.array(mdf.dr3_source_id).astype(np.int64), dGmag, sep_arcsec,
runid, overwrite=overwrite
)
assert np.all(
count_df.source_id.astype(str) == mdf.dr3_source_id.astype(str)
)
mdf['nbhr_count'] = count_df['nbhr_count']
mdf['flag_nbhr_count'] = (
mdf['nbhr_count'] >= 1
)
if "abs_magnitude_difference" not in mdf:
mdf["abs_magnitude_difference"] = np.abs(mdf.magnitude_difference)
mdf["flag_bad_gaiamatch"] = (
(mdf.angular_distance > 2)
|
(mdf.abs_magnitude_difference > 0.2)
)
mdf["M_G"] = mdf['phot_g_mean_mag'] + 5*np.log10(mdf['parallax']/1e3) + 5
cluster = 'pleiades'
outdir = os.path.join(DATADIR, "interim", cluster)
if not os.path.exists(outdir): os.mkdir(outdir)
tempcsv = os.path.join(outdir, f"{cluster}.csv")
camd_outliercsv = os.path.join(
outdir, f"camd_outlier.csv"
)
rverror_outliercsv = os.path.join(
outdir, f"rverror_outlier.csv"
)
rv_outliercsv = os.path.join(
outdir, f"rv_outlier.csv"
)
fdwarf_outliercsv = os.path.join(
outdir, f"fdwarf_outlier.csv"
)
if not os.path.exists(tempcsv):
mdf.to_csv(tempcsv, index=False)
errmsg = (
f'Wrote {tempcsv}. Open in glue, and manually select CAMD '
f'outliers in M_G vs bp_rp, g_rp, bp_g, V-Ks_0, and write '
f'to {camd_outliercsv}. Do the same in radial_velocity_error '
f'versus bp_rp, and save to {rverror_outliercsv}\n'
)
errmsg += 42*'-'
LOGINFO(42*'-')
raise AssertionError(errmsg)
assert os.path.exists(camd_outliercsv)
assert os.path.exists(rverror_outliercsv)
assert os.path.exists(rv_outliercsv)
assert os.path.exists(fdwarf_outliercsv)
df_camd_outlier = pd.read_csv(camd_outliercsv)
df_rverror_outlier = pd.read_csv(rverror_outliercsv)
df_rv_outlier = pd.read_csv(rv_outliercsv)
df_fdwarf_outlier = pd.read_csv(fdwarf_outliercsv)
mdf["flag_ruwe_outlier"] = mdf.ruwe > 1.2
mdf["flag_camd_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_camd_outlier.dr3_source_id.astype(str)
)
mdf["flag_rverror_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_rverror_outlier.dr3_source_id.astype(str)
)
mdf["flag_rv_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_rv_outlier.dr3_source_id.astype(str)
)
mdf["flag_fdwarf_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_fdwarf_outlier.dr3_source_id.astype(str)
)
# Benchmark Pleiades periods:
# Include:
# * "best" membership indicator from Rebull+2016
# * stars with only a single reported period from Rebull+2016
# Exclude:
# * Bad gaia DR2 <-> DR3 matches (angular distance > 2 arcsec or magnitude
# difference > 0.1 mag)
# * Stars with Gaia DR3 RVs >~ 30 km/s from the cluster mean
# * The four binaries in the "F-dwarf clump" described by Stauffer+2016 in
# their section 5.1.
# * RUWE > 1.2
# * manually identified CAMD outliers (from M_G vs bp_rp, g_rp, bp_g, V-Ks_0)
# * RV error outliers
# * Things the Gaia DR3 pipeline flagged as non-single stars
sel = mdf.Per2 == 0
mdf.loc[sel, 'Per2'] = np.nan
mdf["flag_pass_author_quality"] = (
(~mdf.flag_fdwarf_outlier)
&
(pd.isnull(mdf.Per2)) # multiple photometric periods usually implies binarity
&
(mdf.Mm == 'best')
)
mdf["flag_possible_binary"] = (
(mdf.flag_ruwe_outlier)
|
(mdf.flag_camd_outlier)
|
(mdf.flag_rverror_outlier)
|
(mdf.non_single_star)
|
(mdf.flag_nbhr_count)
|
(mdf.flag_bad_gaiamatch)
|
(mdf.flag_rv_outlier)
)
mdf["flag_benchmark_period"] = (
(mdf.flag_pass_author_quality)
&
(~mdf.flag_possible_binary)
)
mdf.to_csv(cachepath, index=False)
LOGINFO(f"Wrote {cachepath}")
return mdf
[docs]def get_NGC3532(overwrite=0):
"""
Returns the Fritzewski+2021 rotation period dataframe with keys:
"Prot", "Teff_Curtis20", "flag_benchmark_period" (for gyro calibration),
_plus_ "flag_possible_binary", "flag_ruwe_outlier", "flag_camd_outlier",
"flag_rverror_outlier", _plus_ the usual Gaia DR3 columns.
"""
cluster = 'ngc3532'
outdir = os.path.join(DATADIR, "interim", cluster)
if not os.path.exists(outdir): os.mkdir(outdir)
cachepath = os.path.join(outdir, "Fritzewski_2021_X_DR3_supplemented.csv")
if os.path.exists(cachepath) and not overwrite:
LOGINFO(f"Found {cachepath}, and not overwrite; returning.")
return pd.read_csv(cachepath)
fitspath = os.path.join(DATADIR, "literature",
"Fritzewski_2021_AA_652_60_table2_NGC3532.fits")
hdul = fits.open(fitspath)
df = Table(hdul[1].data).to_pandas()
hdul.close()
# Interpolate effective temperatures from the Mamajek table and (V-Ks)0.
# (less reddening dependence than B-V).
df['Teff_Mamajek'] = _given_VmKs_get_Teff(df['__V-Ks_0'].astype(float))
dr2_source_ids = np.array(df.GaiaDR2).astype(np.int64)
gdf, s_dr3 = given_dr2_get_dr3_dataframes(
dr2_source_ids, "Fritzewski_2021_DR2", "Fritzewski_2021_DR3_gaiadr3",
overwrite=overwrite
)
gcdf = given_source_ids_get_gaia_data(
dr2_source_ids, "Fritzewski_2021_dr2_colors", n_max=10000,
overwrite=overwrite, enforce_all_sourceids_viable=True,
which_columns='g.source_id, g.bp_rp', gaia_datarelease='gaiadr2'
)
gcdf = gcdf.rename({"source_id":"dr2_source_id"}, axis='columns')
assert np.all(gcdf.dr2_source_id.astype(str) == df.GaiaDR2.astype(str))
df['dr2_bp_rp'] = gcdf['bp_rp']
# Merge
mdf = left_merge(df, s_dr3, 'GaiaDR2', 'dr2_source_id')
mdf = left_merge(mdf, gdf, 'dr3_source_id', 'dr3_source_id')
mdf['Teff_Curtis20'] = given_dr2_BpmRp_AV_get_Teff_Curtis2020(
np.array(mdf.dr2_bp_rp), extinction_A_V_dict["NGC-3532"]
)
dGmag = 3.25 # anything within 100x the brightness of the target
sep_arcsec = 1.5*0.289 # and 1.5 Y4KCam pixels (sec 2.1 of the paper)
runid = "Fritzewski_2021_neighbor_count"
count_df, _ = given_source_ids_get_neighbor_counts(
np.array(mdf.dr3_source_id).astype(np.int64), dGmag, sep_arcsec,
runid, overwrite=overwrite
)
assert np.all(
count_df.source_id.astype(str) == mdf.dr3_source_id.astype(str)
)
mdf['nbhr_count'] = count_df['nbhr_count']
mdf['flag_nbhr_count'] = (
mdf['nbhr_count'] >= 1
)
mdf["M_G"] = mdf['phot_g_mean_mag'] + 5*np.log10(mdf['parallax']/1e3) + 5
tempcsv = os.path.join(outdir, f"{cluster}.csv")
camd_outliercsv = os.path.join(
outdir, f"{cluster}_camd_outliers.csv"
)
rverror_outliercsv = os.path.join(
outdir, f"{cluster}_rverror_outliers.csv"
)
if not os.path.exists(tempcsv):
mdf.to_csv(tempcsv, index=False)
LOGINFO(42*'-')
LOGINFO(f'Wrote {tempcsv}. Open in glue, and manually select CAMD '
'outliers in M_G vs bp_rp, g_rp, bp_g, V-Ks_0, and write '
'to {camd_outliercsv}. Do the same in radial_velocity_error '
'versus bp_rp, and save to {rverror_outliercsv}')
LOGINFO(42*'-')
raise AssertionError
assert os.path.exists(camd_outliercsv)
assert os.path.exists(rverror_outliercsv)
df_camd_outlier = pd.read_csv(camd_outliercsv)
df_rverror_outlier = pd.read_csv(rverror_outliercsv)
mdf["flag_ruwe_outlier"] = mdf.ruwe > 1.2
mdf["flag_camd_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_camd_outlier.dr3_source_id.astype(str)
)
mdf["flag_rverror_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_rverror_outlier.dr3_source_id.astype(str)
)
# Benchmark NGC-3532 periods:
# Include:
# * "First class" periods
# * "algorithmic" periods.
# Exclude:
# * RUWE > 1.2
# * manually identified CAMD outliers (from M_G vs bp_rp, g_rp, bp_g, V-Ks_0)
# * RV error outliers (there are three)
# * Things the Gaia DR3 pipeline flagged as non-single stars (there is
# one).
mdf["flag_possible_binary"] = (
(mdf.flag_ruwe_outlier)
|
(mdf.flag_camd_outlier)
|
(mdf.flag_rverror_outlier)
|
(mdf.non_single_star)
|
(mdf.flag_nbhr_count)
)
mdf["flag_pass_author_quality"] = (
#(mdf.Class == 3) # include "activity informed" periods
#|
(mdf.Class == 1)
|
(mdf.Class == 2)
)
mdf["flag_benchmark_period"] = (
(mdf.flag_pass_author_quality)
&
(~mdf.flag_possible_binary)
)
mdf.to_csv(cachepath, index=False)
LOGINFO(f"Wrote {cachepath}")
return mdf
[docs]def get_Praesepe_Rampalli_2021(overwrite=0):
"""
Returns the Rampalli+2021 Praesepe rotation period dataframe with keys:
"Prot", "Teff_Curtis20", "flag_benchmark_period" (for gyro calibration),
_plus_ "flag_possible_binary", "flag_ruwe_outlier", "flag_camd_outlier",
"flag_rverror_outlier", _plus_ the usual Gaia DR3 columns.
"""
cluster = 'praesepe_rampalli2021'
outdir = os.path.join(DATADIR, "interim", cluster)
if not os.path.exists(outdir): os.mkdir(outdir)
cachepath = os.path.join(outdir, "Rampalli_2021_X_DR3_supplemented.csv")
if os.path.exists(cachepath) and not overwrite:
LOGINFO(f"Found {cachepath}, and not overwrite; returning.")
return pd.read_csv(
cachepath, dtype={'dr2_source_id':str, 'dr3_source_id':str}
)
table_path = os.path.join(DATADIR, "literature",
"Rampalli_2021_apjac0c1et3_mrt.txt")
t = Table.read(table_path, format='cds')
df_t3 = t.to_pandas()
table_path = os.path.join(DATADIR, "literature",
"Rampalli_2021_apjac0c1et4_mrt.txt")
t = Table.read(table_path, format='cds')
df_t4 = t.to_pandas()
# drop 17 cases without any assessment of previous rotation period quality
df_t4 = df_t4[~pd.isnull(df_t4.QFClean)]
assert len(df_t3) == len(df_t4)
# get all the rotation period data and the star data in the same dataframe.
# unsurprisingly, a few columns are duplicated. In such cases, treat
# "table 3" as the default ones.
df = left_merge(df_t3, df_t4, 'Gaia', 'Gaia')
df = df_t3.merge(
df_t4, left_on="Gaia", right_on="Gaia", how='left',
suffixes=("","_t4")
)
assert len(df) == len(df_t3)
dr3_source_ids = np.array(
df.Gaia.str.extract(r'(Gaia EDR3 )(\d*)')[1]
).astype(str)
df['dr3_source_id'] = dr3_source_ids
mdf = df
dr3_source_ids = dr3_source_ids.astype(np.int64)
runid_dr3 = 'Rampalli_2021_Praesepe_DR3'
gdf = given_source_ids_get_gaia_data(
dr3_source_ids, runid_dr3, n_max=10000, overwrite=overwrite,
enforce_all_sourceids_viable=True, which_columns='*',
gaia_datarelease='gaiadr3'
)
gdf = gdf.rename({"source_id":"dr3_source_id"}, axis='columns')
selcols = ['dr3_source_id', 'ra', 'dec', 'parallax', 'parallax_error',
'parallax_over_error', 'pmra', 'pmdec', 'ruwe',
'phot_g_mean_flux_over_error', 'phot_g_mean_mag',
'phot_bp_mean_flux_over_error', 'phot_rp_mean_flux',
'phot_rp_mean_flux_over_error', 'phot_bp_rp_excess_factor',
'phot_bp_n_contaminated_transits', 'phot_bp_n_blended_transits',
'phot_rp_n_contaminated_transits', 'phot_rp_n_blended_transits',
'bp_rp', 'bp_g', 'g_rp', 'radial_velocity',
'radial_velocity_error', 'rv_method_used',
'rv_expected_sig_to_noise', 'vbroad', 'vbroad_error', 'l', 'b',
'ecl_lon', 'ecl_lat', 'non_single_star', 'teff_gspphot',
'teff_gspphot_lower', 'teff_gspphot_upper', 'azero_gspphot',
'ag_gspphot', 'ebpminrp_gspphot']
gdf = gdf[selcols]
#
# Get the Gaia DR2 BP-RP color
#
dr2_x_dr3_df = given_dr3_sourceids_get_dr2_xmatch(
dr3_source_ids, "Rampalli_2021_dr3_to_dr2", overwrite=overwrite,
enforce_all_sourceids_viable=True
)
# Take the closest magnitude difference as the single match.
get_dr2_xm = lambda _df: (
_df.sort_values(by='abs_magnitude_difference').
drop_duplicates(subset='dr3_source_id', keep='first')
)
s_dr2 = get_dr2_xm(dr2_x_dr3_df)
LOGINFO(10*'-')
LOGINFO(s_dr2.describe())
LOGINFO(10*'-')
dr2_source_ids = np.array(s_dr2.dr2_source_id).astype(np.int64)
gcdf = given_source_ids_get_gaia_data(
dr2_source_ids, "Rampalli_2021_dr2_color", n_max=10000,
overwrite=overwrite, enforce_all_sourceids_viable=True,
which_columns='g.source_id, g.bp_rp', gaia_datarelease='gaiadr2'
)
gcdf = gcdf.rename(
{"source_id":"dr2_source_id", "bp_rp":"dr2_bp_rp"}, axis='columns'
)
assert np.all(
np.array(gcdf.dr2_source_id) == np.array(s_dr2.dr2_source_id)
)
s_dr2 = s_dr2.merge(
gcdf, on='dr2_source_id', how='left'
)
s_dr2['dr2_source_id'] = s_dr2['dr2_source_id'].astype(str)
# Merge
mdf0 = left_merge(mdf, s_dr2, 'dr3_source_id', 'dr3_source_id')
mdf = left_merge(mdf0, gdf, 'dr3_source_id', 'dr3_source_id')
if "abs_magnitude_difference" not in mdf:
mdf["abs_magnitude_difference"] = np.abs(mdf.magnitude_difference)
mdf["flag_bad_gaiamatch"] = (
(mdf.angular_distance > 2)
|
(mdf.abs_magnitude_difference > 0.2)
)
mdf['Teff_Curtis20'] = given_dr2_BpmRp_AV_get_Teff_Curtis2020(
np.array(mdf.dr2_bp_rp), extinction_A_V_dict["Praesepe"]
)
dGmag = 3.25 # anything within 100x the brightness of the target
sep_arcsec = 1.5*3.98 # and 1.5 Kepler pixels
runid = "Rampalli_2021_neighbor_count"
count_df, _ = given_source_ids_get_neighbor_counts(
np.array(mdf.dr3_source_id).astype(np.int64), dGmag, sep_arcsec,
runid, overwrite=overwrite
)
assert np.all(
count_df.source_id.astype(str) == mdf.dr3_source_id.astype(str)
)
mdf['nbhr_count'] = count_df['nbhr_count']
mdf['flag_nbhr_count'] = (
mdf['nbhr_count'] >= 1
)
mdf["M_G"] = mdf['phot_g_mean_mag'] + 5*np.log10(mdf['parallax']/1e3) + 5
tempcsv = os.path.join(outdir, f"{cluster}.csv")
camd_outliercsv = os.path.join(
outdir, f"camd_outlier.csv"
)
rverror_outliercsv = os.path.join(
outdir, f"rverror_outlier.csv"
)
if not os.path.exists(tempcsv):
mdf.to_csv(tempcsv, index=False)
LOGINFO(42*'-')
LOGINFO(f'Wrote {tempcsv}. Open in glue, and manually select CAMD '
'outliers in M_G vs bp_rp, g_rp, bp_g, V-Ks_0, and write '
'to {camd_outliercsv}. Do the same in radial_velocity_error '
'versus bp_rp, and save to {rverror_outliercsv}')
LOGINFO(42*'-')
raise AssertionError
assert os.path.exists(camd_outliercsv)
assert os.path.exists(rverror_outliercsv)
df_camd_outlier = pd.read_csv(camd_outliercsv)
df_rverror_outlier = pd.read_csv(rverror_outliercsv)
mdf["flag_ruwe_outlier"] = mdf.ruwe > 1.2
mdf["flag_camd_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_camd_outlier.dr3_source_id.astype(str)
)
mdf["flag_rverror_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_rverror_outlier.dr3_source_id.astype(str)
)
# Benchmark periods:
# Include:
# * Clean quality flag only
# Exclude:
# * All possible signs of binarity
# * Any missing EDR3 measurements.
mdf["flag_possible_binary"] = (
(mdf.flag_ruwe_outlier)
|
(mdf.flag_camd_outlier)
|
(mdf.flag_rverror_outlier)
|
(mdf.non_single_star)
|
(mdf.flag_nbhr_count)
)
mdf["flag_pass_author_quality"] = (
~(
(mdf.Multi == 1)
|
(mdf.Neigh == 1)
|
(mdf.Bin == 1)
|
(mdf.Binary == 1)
|
(mdf.Flag) # missing Gaia EDR3 measurement
)
&
(mdf.QFClean == 1)
)
mdf["flag_benchmark_period"] = (
(~mdf.flag_possible_binary)
&
(mdf.flag_pass_author_quality)
)
mdf['dr2_source_id'] = mdf.dr2_source_id.astype(str)
mdf = mdf.drop({'source_id'}, axis='columns')
mdf.to_csv(cachepath, index=False)
LOGINFO(f"Wrote {cachepath}")
return mdf
[docs]def get_PscEri(overwrite=0):
"""
Returns the Curtis+2019 (Table 2) rotation period dataframe with keys:
"Prot", "Teff_Curtis20", "flag_benchmark_period" (for gyro calibration),
_plus_ "flag_possible_binary", "flag_ruwe_outlier", "flag_camd_outlier",
"flag_rverror_outlier", _plus_ the usual Gaia DR3 columns.
"""
cluster = 'psc-eri'
authoryr = "Curtis_2019"
outdir = os.path.join(DATADIR, "interim", cluster)
if not os.path.exists(outdir): os.mkdir(outdir)
cachepath = os.path.join(outdir, f"{authoryr}_X_GDR3_supplemented.csv")
if os.path.exists(cachepath) and not overwrite:
LOGINFO(f"Found {cachepath}, and not overwrite; returning.")
return pd.read_csv(cachepath)
fitspath = os.path.join(DATADIR, "literature",
"Curtis_2019_PscEri_table2_101rows.fits")
hdul = fits.open(fitspath)
df = Table(hdul[1].data).to_pandas()
hdul.close()
dr2_source_ids = np.array(df.Source).astype(np.int64)
df['dr2_source_id'] = dr2_source_ids
gdf, s_dr3 = given_dr2_get_dr3_dataframes(
dr2_source_ids, f"{authoryr}_{cluster}_DR2", f"{authoryr}_{cluster}_DR3",
overwrite=overwrite
)
mdf = deepcopy(df)
mdf['dr2_bp_rp'] = mdf['GBP-GRP']
# Merge
mdf0 = left_merge(mdf, s_dr3, 'dr2_source_id', 'dr2_source_id')
mdf = left_merge(mdf0, gdf, 'dr3_source_id', 'dr3_source_id')
mdf['Teff_Curtis20'] = given_dr2_BpmRp_AV_get_Teff_Curtis2020(
np.array(mdf.dr2_bp_rp), extinction_A_V_dict["Psc-Eri"]
)
dGmag = 3.25 # anything within 20x the brightness of the target
sep_arcsec = 1*21 # and 1 TESS pixel
runid = f"{authoryr}_{cluster}_neighbor_count"
count_df, _ = given_source_ids_get_neighbor_counts(
np.array(mdf.dr3_source_id).astype(np.int64), dGmag, sep_arcsec,
runid, overwrite=overwrite
)
assert (
np.all((count_df.source_id.astype(str) ==
mdf.dr3_source_id.astype(str)))
and
np.all(count_df.index == mdf.index)
)
mdf['nbhr_count'] = count_df['nbhr_count']
mdf['flag_nbhr_count'] = (
mdf['nbhr_count'] >= 1
)
if "abs_magnitude_difference" not in mdf:
mdf["abs_magnitude_difference"] = np.abs(mdf.magnitude_difference)
mdf["flag_bad_gaiamatch"] = (
(mdf.angular_distance > 2)
|
(mdf.abs_magnitude_difference > 0.2)
)
mdf["M_G"] = mdf['phot_g_mean_mag'] + 5*np.log10(mdf['parallax']/1e3) + 5
mdf = mdf.drop(['Simbad'], axis='columns')
tempcsv = os.path.join(outdir, f"{cluster}.csv")
camd_outliercsv = os.path.join(
outdir, f"camd_outliers.csv"
)
rverror_outliercsv = os.path.join(
outdir, f"rverror_outliers.csv"
)
if not os.path.exists(tempcsv):
mdf.to_csv(tempcsv, index=False)
errmsg = (
f'Wrote {tempcsv}. Open in glue, and manually select CAMD '
f'outliers in M_G vs bp_rp, g_rp, bp_g and write '
f'to {camd_outliercsv}. Do the same in radial_velocity_error '
f'versus bp_rp, and save to {rverror_outliercsv}\n'
)
errmsg += 42*'-'
LOGINFO(42*'-')
raise AssertionError(errmsg)
assert os.path.exists(camd_outliercsv)
assert os.path.exists(rverror_outliercsv)
df_camd_outlier = pd.read_csv(camd_outliercsv)
df_rverror_outlier = pd.read_csv(rverror_outliercsv)
mdf["flag_ruwe_outlier"] = mdf.ruwe > 1.2
mdf["flag_camd_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_camd_outlier.dr3_source_id.astype(str)
)
mdf["flag_rverror_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_rverror_outlier.dr3_source_id.astype(str)
)
mdf["flag_possible_binary"] = (
(mdf.flag_camd_outlier)
|
(mdf.flag_rverror_outlier)
|
(mdf.non_single_star)
|
(mdf.flag_nbhr_count)
|
(mdf.flag_bad_gaiamatch)
)
mdf['flag_pass_author_quality'] = True
mdf["flag_benchmark_period"] = (
(~mdf.flag_possible_binary)
&
(mdf.flag_pass_author_quality)
)
mdf.to_csv(cachepath, index=False)
LOGINFO(f"Wrote {cachepath}")
return mdf
[docs]def get_GroupX(overwrite=0):
"""
Returns the Messina+2022 rotation period dataframe with keys: "Prot",
"Teff_Curtis20", "flag_benchmark_period" (for gyro calibration),
_plus_ "flag_possible_binary", "flag_ruwe_outlier", "flag_camd_outlier",
"flag_rverror_outlier", _plus_ the usual Gaia DR3 columns.
"""
cluster = 'groupx'
outdir = os.path.join(DATADIR, "interim", cluster)
if not os.path.exists(outdir): os.mkdir(outdir)
cachepath = os.path.join(outdir, "Messina_2022_X_GDR3_supplemented.csv")
if os.path.exists(cachepath) and not overwrite:
LOGINFO(f"Found {cachepath}, and not overwrite; returning.")
return pd.read_csv(cachepath)
fitspath = os.path.join(DATADIR, "literature",
"Messina_2022_AAP_groupx_173_stars.fits")
hdul = fits.open(fitspath)
df = Table(hdul[1].data).to_pandas()
hdul.close()
tic_ids = np.array(df.TIC)
dr2_source_ids = []
N_stars = len(tic_ids)
from astrobase.services.identifiers import tic_to_gaiadr2
for ix, tic_id in enumerate(tic_ids):
LOGINFO(f"{ix}/{N_stars}...")
dr2_source_ids.append(tic_to_gaiadr2(tic_id))
assert len(dr2_source_ids) == len(tic_ids)
dr2_source_ids = np.array(dr2_source_ids).astype(np.int64)
df['dr2_source_id'] = dr2_source_ids
gdf, s_dr3 = given_dr2_get_dr3_dataframes(
dr2_source_ids, "Messina_2022_groupx_DR2", "Messina_2022_groupx_DR3",
overwrite=overwrite
)
mdf = deepcopy(df)
gcdf = given_source_ids_get_gaia_data(
dr2_source_ids, "Messina_2022_dr2_color", n_max=10000,
overwrite=overwrite, enforce_all_sourceids_viable=True,
which_columns='g.source_id, g.bp_rp', gaia_datarelease='gaiadr2'
)
gcdf = gcdf.rename(
{"source_id":"dr2_source_id", "bp_rp":"dr2_bp_rp"}, axis='columns'
)
assert np.all(np.array(gcdf.dr2_source_id) == np.array(mdf.dr2_source_id))
mdf['dr2_bp_rp'] = gcdf['dr2_bp_rp']
# Merge
mdf0 = left_merge(mdf, s_dr3, 'dr2_source_id', 'dr2_source_id')
mdf = left_merge(mdf0, gdf, 'dr3_source_id', 'dr3_source_id')
mdf['Teff_Curtis20'] = given_dr2_BpmRp_AV_get_Teff_Curtis2020(
np.array(mdf.dr2_bp_rp), extinction_A_V_dict["Group-X"]
)
mdf['Teff_Mamajek'] = _given_GmKs_get_Teff(mdf['__G-K_0'])
dGmag = 3.25 # anything within 20x the brightness of the target
sep_arcsec = 1*21 # and 1 TESS pixel
runid = "Messina_2022_neighbor_count"
count_df, _ = given_source_ids_get_neighbor_counts(
np.array(mdf.dr3_source_id).astype(np.int64), dGmag, sep_arcsec,
runid, overwrite=overwrite
)
assert np.all(
count_df.source_id.astype(str) == mdf.dr3_source_id.astype(str)
)
mdf['nbhr_count'] = count_df['nbhr_count']
mdf['flag_nbhr_count'] = (
mdf['nbhr_count'] >= 1
)
if "abs_magnitude_difference" not in mdf:
mdf["abs_magnitude_difference"] = np.abs(mdf.magnitude_difference)
mdf["flag_bad_gaiamatch"] = (
(mdf.angular_distance > 2)
|
(mdf.abs_magnitude_difference > 0.2)
)
mdf["M_G"] = mdf['phot_g_mean_mag'] + 5*np.log10(mdf['parallax']/1e3) + 5
tempcsv = os.path.join(outdir, f"{cluster}.csv")
camd_outliercsv = os.path.join(
outdir, f"camd_outliers.csv"
)
rverror_outliercsv = os.path.join(
outdir, f"rverror_outliers.csv"
)
if not os.path.exists(tempcsv):
mdf.to_csv(tempcsv, index=False)
errmsg = (
f'Wrote {tempcsv}. Open in glue, and manually select CAMD '
f'outliers in M_G vs bp_rp, g_rp, bp_g and write '
f'to {camd_outliercsv}. Do the same in radial_velocity_error '
f'versus bp_rp, and save to {rverror_outliercsv}\n'
)
errmsg += 42*'-'
LOGINFO(42*'-')
raise AssertionError(errmsg)
assert os.path.exists(camd_outliercsv)
assert os.path.exists(rverror_outliercsv)
df_camd_outlier = pd.read_csv(camd_outliercsv)
df_rverror_outlier = pd.read_csv(rverror_outliercsv)
mdf["flag_ruwe_outlier"] = mdf.ruwe > 1.2
mdf["flag_camd_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_camd_outlier.dr3_source_id.astype(str)
)
mdf["flag_rverror_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_rverror_outlier.dr3_source_id.astype(str)
)
mdf["flag_possible_binary"] = (
(mdf.flag_ruwe_outlier)
|
(mdf.flag_camd_outlier)
|
(mdf.flag_rverror_outlier)
|
(mdf.non_single_star)
|
(mdf.flag_nbhr_count)
|
(mdf.flag_bad_gaiamatch)
)
mdf["flag_pass_author_quality"] = (
(mdf.Grade == 'A')
&
(mdf.n_Seq == ' ')
)
mdf["flag_benchmark_period"] = (
(mdf.flag_pass_author_quality)
&
(~mdf.flag_possible_binary)
)
mdf = mdf.rename({"Per":"Prot"}, axis='columns')
mdf.to_csv(cachepath, index=False)
LOGINFO(f"Wrote {cachepath}")
return mdf
[docs]def get_alphaPer(overwrite=0):
"""
Returns the Boyle & Bouma 2023 rotation period dataframe with keys: "Prot",
"Teff_Curtis20", "flag_benchmark_period" (for gyro calibration),
_plus_ "flag_possible_binary", "flag_ruwe_outlier", "flag_camd_outlier",
"flag_rverror_outlier", _plus_ the usual Gaia DR3 columns.
"""
# NOTE: overwrite has no effect for alpha-Per CSV.
cluster = 'alpha-per'
authoryr = "Boyle_table3_full"
outdir = os.path.join(DATADIR, "interim", cluster)
cachepath = os.path.join(outdir, f"{authoryr}_DR3_supp.csv")
if os.path.exists(cachepath):
LOGINFO(f"Found {cachepath}; returning.")
return pd.read_csv(cachepath)
df0 = pd.read_csv(os.path.join(
outdir, "Boyle_table3_full.csv")
)
df1 = pd.read_csv(os.path.join(
outdir, "Boyle_inprep_X_GDR3_supplemented.csv")
)
core_csv_path = os.path.join(outdir, "core_select_vl_vb_X_Y_Z.csv")
if os.path.exists(core_csv_path):
# hand-tuned core selection to match Boyle+Bouma 2023, for purposes of
# the appendix figure. these cuts were performed in the
# core_select_vl_vb_X_Y_Z.glu glue session, to match the vl/vb/X/Y/Z
# cuts described in the aforementioned manuscript.
df2 = pd.read_csv(core_csv_path)
else:
df2 = None
df = df0.merge(df1, how='left', left_on='dr3_source_id',
right_on='dr3_source_id', suffixes=("", "_supp"))
if df2 is not None:
in_core = df.dr3_source_id.isin(df2.dr3_source_id)
df['flag_in_core'] = in_core
df = df.rename(
{'Teff_Curtis20':'Teff_Curtis20_fixedreddening'},
axis='columns'
)
df = df.rename({
"period":"Prot",
"flag_benchmark_period":"flag_benchmark_period_Andy",
"teff_curtis20":"Teff_Curtis20" #variable reddening,
}, axis='columns')
df['flag_pass_author_quality'] = df['in_gyro_sample']
df['flag_benchmark_period'] = df['flag_pass_author_quality']
df.to_csv(cachepath, index=False)
LOGINFO(f"Wrote {cachepath}; returning.")
return df
[docs]def get_alphaPer_construct(overwrite=0):
"""
Constructor for ``getters.get_alphaPer``
"""
cluster = 'alpha-per'
authoryr = "Boyle_inprep"
outdir = os.path.join(DATADIR, "interim", cluster)
if not os.path.exists(outdir): os.mkdir(outdir)
cachepath = os.path.join(outdir, f"{authoryr}_X_GDR3_supplemented.csv")
if os.path.exists(cachepath) and not overwrite:
LOGINFO(f"Found {cachepath}, and not overwrite; returning.")
return pd.read_csv(cachepath)
csvpath = os.path.join(DATADIR, "literature",
"Boyle_in_prep_alpha-Per.csv")
df = pd.read_csv(csvpath)
dr2_source_ids = np.array(df.dr2_source_id).astype(np.int64)
df['dr2_source_id'] = dr2_source_ids
gdf, s_dr3 = given_dr2_get_dr3_dataframes(
dr2_source_ids, f"{authoryr}_{cluster}_DR2", f"{authoryr}_{cluster}_DR3",
overwrite=overwrite
)
mdf = deepcopy(df)
gcdf = given_source_ids_get_gaia_data(
dr2_source_ids, f"{authoryr}_dr2_color", n_max=10000,
overwrite=overwrite, enforce_all_sourceids_viable=True,
which_columns='g.source_id, g.bp_rp', gaia_datarelease='gaiadr2'
)
gcdf = gcdf.rename(
{"source_id":"dr2_source_id", "bp_rp":"dr2_bp_rp"}, axis='columns'
)
# careful with this... better to actually just .merge!
assert (
np.all(np.array(gcdf.dr2_source_id) == np.array(mdf.dr2_source_id))
and
np.all(gcdf.index == mdf.index)
)
mdf['dr2_bp_rp'] = gcdf['dr2_bp_rp']
# Merge
mdf0 = left_merge(mdf, s_dr3, 'dr2_source_id', 'dr2_source_id')
mdf = left_merge(mdf0, gdf, 'dr3_source_id', 'dr3_source_id')
mdf['Teff_Curtis20'] = given_dr2_BpmRp_AV_get_Teff_Curtis2020(
np.array(mdf.dr2_bp_rp), extinction_A_V_dict["alpha-Per"]
)
dGmag = 3.25 # anything within 20x the brightness of the target
sep_arcsec = 1*21 # and 1 TESS pixel
runid = f"{authoryr}_{cluster}_neighbor_count"
count_df, _ = given_source_ids_get_neighbor_counts(
np.array(mdf.dr3_source_id).astype(np.int64), dGmag, sep_arcsec,
runid, overwrite=overwrite
)
assert (
np.all((count_df.source_id.astype(str) ==
mdf.dr3_source_id.astype(str)))
and
np.all(count_df.index == mdf.index)
)
mdf['nbhr_count'] = count_df['nbhr_count']
mdf['flag_nbhr_count'] = (
mdf['nbhr_count'] >= 1
)
if "abs_magnitude_difference" not in mdf:
mdf["abs_magnitude_difference"] = np.abs(mdf.magnitude_difference)
mdf["flag_bad_gaiamatch"] = (
(mdf.angular_distance > 2)
|
(mdf.abs_magnitude_difference > 0.2)
)
mdf["M_G"] = mdf['phot_g_mean_mag'] + 5*np.log10(mdf['parallax']/1e3) + 5
tempcsv = os.path.join(outdir, f"{cluster}.csv")
camd_outliercsv = os.path.join(
outdir, f"camd_outliers.csv"
)
rverror_outliercsv = os.path.join(
outdir, f"rv_error_outliers.csv"
)
if not os.path.exists(tempcsv):
mdf.to_csv(tempcsv, index=False)
errmsg = (
f'Wrote {tempcsv}. Open in glue, and manually select CAMD '
f'outliers in M_G vs bp_rp, g_rp, bp_g and write '
f'to {camd_outliercsv}. Do the same in radial_velocity_error '
f'versus bp_rp, and save to {rverror_outliercsv}\n'
)
errmsg += 42*'-'
LOGINFO(42*'-')
raise AssertionError(errmsg)
assert os.path.exists(camd_outliercsv)
assert os.path.exists(rverror_outliercsv)
df_camd_outlier = pd.read_csv(camd_outliercsv)
df_rverror_outlier = pd.read_csv(rverror_outliercsv)
mdf["flag_ruwe_outlier"] = mdf.ruwe > 1.2
mdf["flag_camd_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_camd_outlier.dr3_source_id.astype(str)
)
mdf["flag_rverror_outlier"] = mdf.dr3_source_id.astype(str).isin(
df_rverror_outlier.dr3_source_id.astype(str)
)
mdf["flag_possible_binary"] = (
(mdf.flag_camd_outlier)
|
(mdf.flag_rverror_outlier)
|
(mdf.non_single_star)
|
(mdf.flag_nbhr_count)
)
mdf["flag_benchmark_period"] = (
(~mdf.flag_possible_binary)
&
(~mdf.flag_bad_gaiamatch)
)
mdf = mdf.drop(['Unamed: 0'], axis='columns')
mdf.to_csv(cachepath, index=False)
LOGINFO(f"Wrote {cachepath}")
return mdf
[docs]def get_M67(overwrite=0):
"""
Return concatenation of Barnes+2016, Dungee+2022, and Gruner+2023 M67
sequences, with keys:
"Prot", "Teff_Curtis20", "flag_benchmark_period" (for gyro calibration).
"""
cluster = 'M67'
outdir = os.path.join(DATADIR, "interim", cluster)
if not os.path.exists(outdir): os.mkdir(outdir)
cachepath = os.path.join(outdir, "M67_supplemented.csv")
if os.path.exists(cachepath) and not overwrite:
LOGINFO(f"Found {cachepath}, and not overwrite; returning.")
return pd.read_csv(
cachepath, dtype={'dr2_source_id':str, 'dr3_source_id':str}
)
table_path = os.path.join(
DATADIR, "literature", "Dungee_2022_apjac90bet2_mrt.txt"
)
t = Table.read(table_path, format='cds')
df_d22 = t.to_pandas()
df_d22 = df_d22[df_d22.converged == 'True']
df_d22['Teff_Curtis20'] = given_dr2_BpmRp_AV_get_Teff_Curtis2020(
np.array(df_d22["gaiaBPmag"]) - np.array(df_d22["gaiaRPmag"]),
extinction_A_V_dict['M67']
)
csvpath = os.path.join(
DATADIR, "literature", "Gruner_Barnes_Weingrill_2023_M67_tableC2.csv"
)
df_g23 = pd.read_csv(csvpath)
df_g23['Teff_Curtis20'] = given_dr2_BpmRp_AV_get_Teff_Curtis2020(
np.array(df_g23["(BP-RP)0"]), 0
)
csvpath = os.path.join(
DATADIR, "literature",
"Barnes_2016_apj523472t1_ascii_X_DR2_supplemented.txt"
)
df_b16 = pd.read_csv(csvpath)
df_b16['Teff_Curtis20'] = given_dr2_BpmRp_AV_get_Teff_Curtis2020(
np.array(df_b16["bp_rp"]), extinction_A_V_dict['M67']
)
DEBUG_PLOT = 1
if DEBUG_PLOT:
from gyrointerp.models import slow_sequence
import matplotlib.pyplot as plt
plt.close("all")
fig,ax = plt.subplots()
ax.scatter(df_d22['Teff_Curtis20'], df_d22['prot'])
ax.scatter(df_g23['Teff_Curtis20'], df_g23['period'])
ax.scatter(df_b16['Teff_Curtis20'], df_b16['P'])
_teffs = np.linspace(3800,6200,500)
ax.plot(
_teffs, slow_sequence(_teffs, 4000), c='k', zorder=-1, lw=0.5
)
ax.update({
'xlim': ax.get_xlim()[::-1]
})
outpath = os.path.join(RESULTSDIR, "debug", "M67_lit_data.png")
fig.savefig(outpath, bbox_inches='tight')
df_d22 = df_d22.rename({'prot':'Prot'}, axis='columns')
df_g23 = df_g23.rename({'period':'Prot'}, axis='columns')
df_b16 = df_b16.rename({'P':'Prot'}, axis='columns')
outdf = pd.concat((
df_b16[['Teff_Curtis20', 'Prot']],
df_d22[['Teff_Curtis20', 'Prot']],
df_g23[['Teff_Curtis20', 'Prot']]
))
# This seems to be concensus from Gruner+2023 and Dungee+2022.
outdf['flag_benchmark_period'] = (
(outdf.Prot > 12)
&
(outdf.Prot < 60)
)
outdf.to_csv(cachepath, index=False)
LOGINFO(f"Wrote {cachepath}")
return outdf