'''
This module implements the DataFrame class, which manages the storage and
manipulation of the data tables (multivisits detections, average visits detections, stamps, photometry, psf subtraction, false positives, etc)
through all the pipeline
'''
from straklip.utils.ancillary import keys_list_from_dic
import pandas as pd
from straklip.stralog import getLogger
from glob import glob
[docs]
class DataFrame():
# def __getstate__(self):
# return {"data": self.values, "columns": self.columns}
def __init__(self,path2data='',path2out='',path2database='',path2pam='',target='',inst='',pixscale=1,gain=1,PAMdict={},tilebase=15,radec=[],filters=[],xyaxis=[],fitsext='_flt',skipphot=False,dq2mask=[],zpt={},Av={},dist=0,kmodes=[],type='type',maxsep=2,minsep=0,df_ext_in='.h5',df_ext_out='.h5',steps=[]):
'''
Create the dataframe object
Parameters
----------
target : str, optional
name of the target.
inst : str, optional
name of the instrument.
pixscale : int, optional
instrument pixscale.
gain : int, optional
instrument gain.
PAMdict : dict, optional
instrument Pixel Area Map dictionary linking each PAM to the image extenxtion.
tilebase: int, optipbal
side of the square tile.
radec : list, optional
list of ra/dec labels.
filters : list, optional
list of filters labels.
xyaxis : list, optional
list of xy axis length of the camera (in pixels).
skipphot: bool, optional
If True, skip pipeline aperture photometry and use the input catalog photometry instead. Default is False.
dq2mask : TYPE, optional
list of data quality values to mask in data array.
fitsext: str,optional
name extension of fits file. The default is flt.
Av : list, optional
list of Av=1 extinctions for each filter.
kmodes : list, optional
list of kmodes for PSF subtraction.
dist : float, optional
distance of the target in parsec. The default is 0.
load : bool, optional
automatically load existing Data Frame.
name : str, optional
name of the Data Frame.
Returns
-------
None.
'''
self.df_ext_in=df_ext_in
self.df_ext_out=df_ext_out
self.path2out=path2out
self.path2data=path2data
self.path2database=path2database
self.path2pam=path2pam
self.target=target
self.inst=inst
self.filters=filters
self.radec=radec
self.gain=gain
self.PAMdict=PAMdict
self.xyaxis=xyaxis
self.fitsext=fitsext
self.skipphot=skipphot
self.dq2mask=dq2mask
self.zpt=zpt
self.Av=Av
self.kmodes=kmodes
self.pixscale=pixscale
self.tilebase=tilebase
self.dist=dist
self.type=type
self.maxsep=maxsep
self.minsep=minsep
self.steps=steps
######################
# Ancillary routines #
######################
[docs]
def save_dataframes(self,step):
'''
Save DataFrame to file
Parameters
----------
Returns
-------
None.
'''
if step not in self.steps: self.steps.append(step)
self.keys=keys_list_from_dic(self.__dict__,'_df')
getLogger(__name__).info(f'Saving the the following keys in %s to %s files in %s'%(self.keys,self.df_ext_out,str(self.path2out)))
for elno in range(len(self.keys)):
key = self.keys[elno]
filename=str(self.path2out + '/' + key.split('_df')[0] + self.df_ext_out)
if key == 'crossmatch_ids_df':
for label in vars(self).keys():
if '_df' not in label:
getattr(self,key).attrs[label] = vars(self)[label]
if self.df_ext_out == '.h5':
getattr(self,key).to_hdf(filename, key=key, mode='w')
elif self.df_ext_out == '.csv':
getattr(self,key).to_csv(filename, mode='w', encoding='utf-8-sig', index=False)
else:
getLogger(__name__).error(f'DataFrame extension {self.df_ext_out} not supported. Please use .h5 or .csv')
[docs]
def load_dataframe(self):
'''
Load DataFrame from file
Returns
-------
None.
'''
self.list_of_HDF5_keys(self.path2out)
for key in self.keys:
filename = self.path2out+'/'+key+self.df_ext_in
if self.df_ext_in == '.h5':
df = pd.read_hdf(filename, mode='r')
elif self.df_ext_in == '.csv':
df = pd.read_csv(filename, encoding='utf-8-sig')
else:
getLogger(__name__).error(f'DataFrame extension {self.df_ext_in} not supported. Please use .h5 or .csv')
setattr(self, key+'_df', df)
[docs]
def list_of_HDF5_keys(self,path, ext=None):
'''
generate list of keys in dataframe
Parameters
----------
verbose : bool, optional
choose to show prints. The default is False.
Returns
-------
None.
'''
if ext is None:
file = glob(path + f'/*{self.df_ext_in}')
else:
file = glob(path + f'/*{ext}')
self.keys = []
for name in file:
self.keys.append(name.split('/')[-1].split('.')[0])
[docs]
def remove_HDF5_key(self):
'''
remove keys from dataframe
Returns
-------
None.
'''
with pd.HDFStore(self.df_path/self.name) as store:
keys = store.keys()
print('List of keys:',keys)
key_pos = input("Enter position of the key to remove (i.e 1,2,3...):")
if len(key_pos)>=1:
key_pos=key_pos.split(',')
for key_pos in key_pos:
key_name=keys[int(key_pos)-1]
out=input('Removing \'%s\' key from dataframe. Please confirm (y/n):'%key_name)
if out=='y': store.remove(str(key_name))
else:print('No change made')
else: print('No change made')
store.close()
with pd.HDFStore(self.df_path/self.name) as store:
keys = store.keys()
print('List of keys:',keys)
store.close()