Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • main
  • v0.1
  • v0.2
  • v0.2.1
  • v0.3
  • v0.3.1
  • v0.3.2
  • v0.3.3
8 results

Target

Select target project
  • andrea.giannetti/swiss_army_knife_stable
1 result
Select Git revision
  • main
  • v0.1
  • v0.2
  • v0.2.1
  • v0.3
  • v0.3.1
  • v0.3.2
  • v0.3.3
8 results
Show changes
Showing
with 442 additions and 0 deletions
import pandas as pd
import yaml
import os
df = pd.read_csv(os.path.join('assets', 'data', 'ch3oh_data_top35.csv'))
config_dict = {
'simulated_ratio_realizations': 1000,
'recompute_kde': True,
'probability_threshold': 0.37,
'best_bandwidths': {
'87-86': 0.1,
'88-86': 0.1,
'88-87': 0.1,
'257-256': 0.4,
'381-380': 0.2,
},
'ratio_limits': {
'87-86': [0, 1.7],
'88-86': [0, 1.7],
'88-87': [0, 1.7],
'257-256': [0, 15],
'381-380': [0, 4],
},
'use_model_for_inference': 'PLACEHOLDER',
'ratios_to_include': [
"87-86",
"88-86",
"88-87",
"257-256",
"381-380"
]
}
intensities_dict = {}
uncertainties_dict = {}
data_dict = df.to_dict(orient='records')
for line_id in ['86', '87', '88', '257', '256', '381', '380']:
intensities_dict[line_id] = {}
uncertainties_dict[line_id] = {}
for row in data_dict:
intensities_dict[line_id][str(row['source_name'])] = row[f'area_{line_id}']
uncertainties_dict[line_id][str(row['source_name'])] = row[f'area_unc_{line_id}']
config_dict['measured_integrated_intensities'] = intensities_dict
config_dict['integrated_intensities_uncertainties'] = uncertainties_dict
with open('config/density_inference_input.yml', 'w') as outfile:
yaml.dump(config_dict, outfile)
import numpy as np
import os
import matplotlib.pyplot as plt
import pickle
from scipy.interpolate import splrep, BSpline, RegularGridInterpolator
from scipy.optimize import curve_fit
from assets.commons import (get_data,
load_config_file,
setup_logger,
validate_parameter)
from typing import Union, List
def approx(x, x_shift, a, b, exponent, scale=1):
return (np.abs(np.arctanh(2 * (x - x_shift) / scale) + a)) ** exponent + b
def main(ratios_to_fit: Union[List[str], None] = None):
config = load_config_file(config_file_path=os.path.join('config', 'config.yml'))
_ratio_to_fit = validate_parameter(
param_to_validate=ratios_to_fit,
default=['-'.join(lines_ratio) for lines_ratio in config['overrides']['lines_to_process']])
data = get_data(limit_rows=None,
use_model_for_inference=config['run_type'])
logger.info(f'Using {config["run_type"]} to fit data...')
logger.info(f'Data contains {data.shape[0]} rows')
bw = {
'87-86': 0.1,
'88-87': 0.1,
'88-86': 0.1,
'257-256': 0.4,
'381-380': 0.2}
x0 = {
'87-86': ((0.55, 2.3, 3.75, 0.9),),
'88-87': ((0.59, 2.3, 3.9, 1.05, 0.96),),
'88-86': ((0.55, 2.5, 4.3, 0.83, 1.08),),
'257-256': ((5, 2.5, 2.45, 0.75, 8), (5, 2.5, 3.33, 0.75, -8)),
'381-380': ((2.04, 2.5, 3.1, 0.6, 2.1), (2.18, 2.5, 3.7, 0.79, -2.4)),
}
min_ratio = {
'87-86': 0.08,
'88-87': 0.08,
'88-86': 0.03,
'257-256': 1,
'381-380': 1}
max_ratio = {
'87-86': 1.04,
'88-87': 1.02,
'88-86': 1.07,
'257-256': 7.5,
'381-380': 2.9}
for ratio_string in _ratio_to_fit:
_column_to_fit = f'ratio_{ratio_string}'
data.sort_values(by=_column_to_fit, inplace=True)
plt.clf()
plt.scatter(data[_column_to_fit], data['avg_nh2'], marker='+', alpha=0.1)
x_reg = np.linspace(min_ratio[ratio_string], max_ratio[ratio_string], 100)
data_clean = data[[_column_to_fit, 'avg_nh2']].loc[data[_column_to_fit] <= max_ratio[ratio_string]].copy()
kde = pickle.load(
open(f'prs/output/run_type/{config["run_type"]}/trained_model/ratio_density_kde_{ratio_string}.pickle',
'rb'))
rgi = RegularGridInterpolator((kde['y'] * bw[ratio_string], kde['x'] * 0.2), kde['values'].reshape(200, 200).T)
density = rgi(data_clean[[_column_to_fit, 'avg_nh2']])
data_clean = data_clean.loc[(density > density.mean() - density.std())]
plt.scatter(data_clean[_column_to_fit], data_clean['avg_nh2'], marker='+', alpha=0.1, color='red')
for components in x0[ratio_string]:
plt.plot(x_reg, approx(x_reg, *components), color='cyan')
plt.show()
if __name__ == '__main__':
logger = setup_logger(name='POLYFIT')
main()
from assets.utils import get_poc_results
from scipy.stats import anderson_ksamp
from assets.commons import setup_logger
import numpy as np
logger = setup_logger(name='PRS - AD test')
df_full = get_poc_results(line_fit_filename='ch3oh_data_top35.csv')
df_full['best_fit'] /= 1e5
df_full['best_fit'] = df_full['best_fit'].round(1)
df_full['mass'] = (df_full['mass'] / 100).round(1)
df_full['distance'] = (df_full['distance']).round(1)
df_full['diameter'] = (df_full['diameter']).round(1)
df_full = df_full[(df_full['mass'] < 100) & (df_full['mass'] > 3)]
# df_full = df_full[df_full['class'] != 'HII']
# df_full = df_full[df_full['class'].isin(['IRb', 'IRw'])]
ad_results = {}
for column in ['mass', 'distance', 'best_fit', 'diameter']:
array_list = [d[column].values for _, d in df_full.groupby(['class'])]
ad_results[column] = anderson_ksamp(array_list)[2]
logger.info(f'The p values of the AD test are: {ad_results}')
import os
import sqlalchemy
import numpy as np
import astropy.units as u
from datetime import datetime
from astropy.io import fits
from astropy import constants
from typing import Union, List
from assets.commons import (validate_parameter,
get_value_if_specified,
load_config_file,
setup_logger,
compute_unique_hash_filename)
from assets.commons.db_utils import upsert, get_pg_engine
from assets.constants import aggregation_function_mapping
from stg.stg_build_db_structure import (MomentZeroMaps,
RatioMaps)
logger = setup_logger(name='PRS')
def populate_mom_zero_table(config_prs: dict,
fits_cube_name: str,
moment_zero_map_fits: str,
aggregated_moment_zero: float,
executed_on: datetime.timestamp,
engine: sqlalchemy.engine,
run_id: str):
"""
Insert record in DB for the moment zero table
:param config_prs: configuration of the presentation layer
:param fits_cube_name: name of the fits cube
:param moment_zero_map_fits: filename of the moment zero map
:param aggregated_moment_zero: the aggregated value of the moment zero
:param executed_on: timestamp of execution
:param engine: SQLAlchemy engine to use
:param run_id: the run unique identifier
"""
_integration_limit_low = get_value_if_specified(config_prs, 'integration_limits')[0] if get_value_if_specified(
config_prs, 'integration_limits') != 'all' else None
_integration_limit_high = get_value_if_specified(config_prs, 'integration_limits')[1] if get_value_if_specified(
config_prs, 'integration_limits') != 'all' else None
moment_zero_dict = {
'mom_zero_name': f'{moment_zero_map_fits}',
'fits_cube_name': f'{fits_cube_name}',
'integration_limit_low': _integration_limit_low,
'integration_limit_high': _integration_limit_high,
'aggregated_moment_zero': aggregated_moment_zero,
'aggregation_function': config_prs['moment_zero_aggregation_function'],
'created_on': executed_on,
'run_id': run_id,
}
upsert(
table_object=MomentZeroMaps,
row_dict=moment_zero_dict,
conflict_keys=[MomentZeroMaps.mom_zero_name,
MomentZeroMaps.run_id],
engine=engine
)
def populate_line_ratios_table(config_prs: dict,
moment_zero_map_fits_list: List[str],
ratio_map_name: str,
aggregated_ratio: float,
engine: sqlalchemy.engine,
executed_on: datetime.timestamp,
run_id: str):
"""
Insert record in DB for the line ratios table
:param config_prs: configuration of the presentation layer
:param moment_zero_map_fits_list: list of the moment 0 maps to use to compute the ratio
:param ratio_map_name: filename of the ratio map
:param aggregated_ratio: aggregated value of the ratio
:param engine: SQLAlchemy engine to use
:param executed_on: timestamp of execution
:param run_id: the run unique identifier
"""
_integration_limit_low = get_value_if_specified(config_prs, 'integration_limits')[0] if get_value_if_specified(
config_prs, 'integration_limits') != 'all' else None
_integration_limit_high = get_value_if_specified(config_prs, 'integration_limits')[1] if get_value_if_specified(
config_prs, 'integration_limits') != 'all' else None
assert len(moment_zero_map_fits_list) == 2
line_ratio_dict = {
'ratio_map_name': f'{ratio_map_name}',
'mom_zero_name_1': f'{moment_zero_map_fits_list[0]}',
'mom_zero_name_2': f'{moment_zero_map_fits_list[1]}',
'aggregated_ratio': aggregated_ratio,
'aggregation_function': config_prs['aggregation_function'],
'created_on': executed_on,
'run_id': run_id,
}
upsert(
table_object=RatioMaps,
row_dict=line_ratio_dict,
conflict_keys=[RatioMaps.ratio_map_name,
RatioMaps.run_id],
engine=engine
)
def compute_moment_zero(cube: str,
config: dict,
cube_path: Union[str, None] = None,
moment_zero_path: Union[str, None] = None,
moment_zero_fits_name: Union[str, None] = None,
hdu_idx: int = 0) -> float:
"""
Compute moment zero map, given a cube
:param cube: filename of the fits cube
:param config: configuration of the presentation layer
:param cube_path: path where the fits cube is to be found
:param moment_zero_path: path where the moment 0 map is to be saved
:param moment_zero_fits_name: filename for the moment zero map
:param hdu_idx: index of the HDU to extract the data and the header
:return: the aggregated value of the moment zero, according to the function specified in the configuration
"""
_cube_path = validate_parameter(cube_path, default=os.path.join('prs', 'fits', 'cubes'))
_moment_zero_path = validate_parameter(moment_zero_path, default=os.path.join('prs', 'fits', 'moments'))
_moment_zero_fits_name = validate_parameter(moment_zero_fits_name, default='test_mom0.fits')
fitsfile = open_fits_file_duck_typing(fitsfile=cube, fits_path=_cube_path)
header = fitsfile[hdu_idx].header.copy()
data = fitsfile[hdu_idx].data
conversion = header['CUNIT3'].strip() == 'HZ'
spectral_unit = 'km/s' if conversion is True else 'Hz'
conversion_factor = 1 if conversion is False else (-constants.c * header['CDELT3'] / header['CRVAL3']).to('km/s').value
mom0 = (data.sum(axis=0) * abs(conversion_factor))
keywords_to_delete = ['NAXIS3', 'CRPIX3', 'CDELT3', 'CRVAL3', 'CUNIT3', 'CTYPE3']
header['BUNIT'] += f' {spectral_unit}'
header['BTYPE'] = 'MOMENT0'
header['NAXIS'] = 2
for key in keywords_to_delete:
del header[key]
fits.writeto(os.path.join(_moment_zero_path, _moment_zero_fits_name), data=mom0, header=header, overwrite=True)
try:
return aggregation_function_mapping[config['moment_zero_aggregation_function']](mom0)
except KeyError:
logger.warning(
'Moment zero aggregation function not set or misconfigured. Trying to use the one for the ratio...')
return aggregation_function_mapping[config['aggregation_function']](mom0)
def open_fits_file_duck_typing(fitsfile: Union[str, fits.PrimaryHDU],
fits_path: str = None) -> fits.PrimaryHDU:
_fits_path = validate_parameter(fits_path, default='.')
try:
hdu = fits.open(os.path.join(_fits_path, fitsfile))
except TypeError:
hdu = fitsfile
return hdu
def compute_image_ratios(fits1: str,
fits2: str,
config: dict,
fits_path: Union[str, None] = None,
ratio_fits_path: Union[str, None] = None,
ratio_fits_name: Union[str, None] = None,
hdu1_idx: int = 0,
hdu2_idx: int = 0):
"""
Compute the ratio of the images and save it to fits. The ratio is computed as fits1/fits2
:param fits1: filename of the first moment 0 map
:param fits2: filename of the second moment 0 map
:param config: configuration of the presentation layer
:param fits_path: path where the fits files are to be found
:param ratio_fits_path: path where the ratio map is to be saved
:param ratio_fits_name: filename for the ratio map
:param hdu1_idx: index of the HDU to extract the data and the header, for the first fits file
:param hdu2_idx: index of the HDU to extract the data and the header, for the second fits file
:return:
"""
_fits_path = validate_parameter(fits_path, default=os.path.join('prs', 'fits', 'moments'))
_ratio_fits_path = validate_parameter(ratio_fits_path, default=os.path.join('prs', 'fits', 'ratios'))
_ratio_fits_name = validate_parameter(ratio_fits_name, default='ratio.fits')
hdu1 = open_fits_file_duck_typing(fitsfile=fits1,
fits_path=_fits_path)
hdu2 = open_fits_file_duck_typing(fitsfile=fits2,
fits_path=_fits_path)
ratio_image_data = hdu1[hdu1_idx].data / hdu2[hdu2_idx].data
if config['aggregation_function'] in ('mean', 'sum'):
aggregated_ratio = aggregation_function_mapping[config['aggregation_function']](ratio_image_data)
elif config['aggregation_function'] == 'weighted_mean':
aggregated_ratio = np.nansum(hdu1[hdu1_idx].data) / np.nansum(hdu2[hdu2_idx].data)
else:
raise NotImplementedError('Aggregation function not configured')
output_header = hdu1[hdu1_idx].header
output_header['BUNIT'] = ''
output_header['BTYPE'] = 'INT.RATIO'
fits.writeto(os.path.join(_ratio_fits_path, _ratio_fits_name),
data=ratio_image_data,
header=output_header,
overwrite=True)
return aggregated_ratio
def main(cube_fits_list: List[str],
run_id: str,
mom0_out_cube1: Union[str, None] = None,
mom0_out_cube2: Union[str, None] = None,
engine: sqlalchemy.engine = None) -> str:
assert len(cube_fits_list) == 2
_mom0_out_cube1 = validate_parameter(mom0_out_cube1, default=cube_fits_list[0].replace('.fits', '_mom0.fits'))
_mom0_out_cube2 = validate_parameter(mom0_out_cube2, default=cube_fits_list[1].replace('.fits', '_mom0.fits'))
config_prs = load_config_file(os.path.join('prs', 'config', 'config.yml'))['flux_computation']
executed_on = datetime.now()
if engine is None:
engine = get_pg_engine(logger=logger)
config_prs.update({
'cube_fits_list': cube_fits_list,
'mom0_image_list': [_mom0_out_cube1, _mom0_out_cube2]
})
ratio_fits_name = f'{compute_unique_hash_filename(config=config_prs)}.fits'
aggregated_mom0_1 = compute_moment_zero(cube=cube_fits_list[0],
config=config_prs,
moment_zero_fits_name=_mom0_out_cube1)
aggregated_mom0_2 = compute_moment_zero(cube=cube_fits_list[1],
config=config_prs,
moment_zero_fits_name=_mom0_out_cube2)
aggregated_image_ratio = compute_image_ratios(fits1=_mom0_out_cube1,
fits2=_mom0_out_cube2,
config=config_prs,
ratio_fits_name=ratio_fits_name)
populate_mom_zero_table(config_prs=config_prs,
fits_cube_name=cube_fits_list[0],
moment_zero_map_fits=_mom0_out_cube1,
aggregated_moment_zero=aggregated_mom0_1,
executed_on=executed_on,
engine=engine,
run_id=run_id)
populate_mom_zero_table(config_prs=config_prs,
fits_cube_name=cube_fits_list[1],
aggregated_moment_zero=aggregated_mom0_2,
moment_zero_map_fits=_mom0_out_cube2,
executed_on=executed_on,
engine=engine,
run_id=run_id)
populate_line_ratios_table(config_prs=config_prs,
moment_zero_map_fits_list=[_mom0_out_cube1, _mom0_out_cube2],
ratio_map_name=ratio_fits_name,
aggregated_ratio=aggregated_image_ratio,
engine=engine,
executed_on=executed_on,
run_id=run_id)
return ratio_fits_name
if __name__ == '__main__':
main(cube_fits_list=['test_cube.fits', 'test_cube.fits'],
mom0_out_cube1='test_cube_mom0.fits',
mom0_out_cube2='test_cube_mom0_1.fits',
run_id='test_run')
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
numpy==1.23.4
PyYAML==6.0
astropy==5.1
matplotlib==3.6.2
scipy==1.9.3
tqdm==4.64.1
xarray==2022.10.0
sqlalchemy==1.4.42
psycopg2-binary==2.9.5
seaborn==0.12.2
scikit-learn==1.3.0
xgboost==2.0.0
astropy==5.1
keras==2.12.0
\ No newline at end of file
#!/bin/bash
read -p "This will delete all of the results, are you sure? " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]
then
folder_list=("prs/fits/cubes" "prs/fits/moments" "prs/fits/ratios" "prs/fits/grids")
for folder_name in ${folder_list[@]}
do
rm $folder_name/*.fits
done
rm prs/output/*.png
rm stg/archive/*.zip
rm full_dataset*.csv
fi
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
DB_USER: cGlwcG8=
DB_PASS: cGx1dG8=
DB_HOST: localhost
DB_NAME: postgres
DB_PORT: 5432
\ No newline at end of file