0e6e7597dc2d5fa08dbf2efd76320c6dc4879c49 to dab489def2cfecd141616e54feec787d3617d1f1 · Andrea Giannetti / swiss_army_knife_stable

.dockerignore

0 → 100644

+11 −0

Original line number	Diff line number	Diff line
		/etl/credentials/db_credentials*.yml
		/etl/stg/archive/*
		/etl/prs/fits/cubes/*
		/etl/prs/fits/moments/*
		/etl/prs/fits/ratios/*
		/etl/prs/fits/grids/*
		/etl/mdl/radmc_files/*
		/etl/mdl/scratches/*
		/etl/prs/output/*
		/etl/*.sif
		No newline at end of file

.gitignore

0 → 100644

+14 −0

Original line number	Diff line number	Diff line
		/etl/tests/test_files/config.yml
		/etl/trn/trn_convert_grid_to_polaris.py
		/etl/posteriors.yml
		/etl/run_id.txt
		/etl/test_sak.html
		/etl/prs/output/
		/etl/prs/fits/
		/etl/stg/archive/
		/etl/assets/internal_files/
		/etl/credentials/db_credentials.yml
		/etl/credentials/db_credentials_local.yml
		/etl/mdl/polaris_input_file.cmd
		/etl/mdl/radmc3d_postprocessing.sh
		/etl/mdl/scratches/

Dockerfile

0 → 100644

+34 −0

Original line number	Diff line number	Diff line
		FROM python:3.9-slim


		# STEP 1) Setting up environment
		# apt packages
		RUN apt-get update && \
		apt-get --yes upgrade && \
		apt-get install --yes libgomp1 && \
		apt-get install --yes postgresql-client && \
		apt-get install --yes --reinstall build-essential && \
		apt-get -y install curl vim less nano git && \
		apt-get -y install zip unzip && \
		apt-get clean

		WORKDIR /usr/src/etl
		RUN python -m pip install --upgrade pip
		COPY etl/requirements.txt .
		RUN pip install -r requirements.txt

		RUN mkdir /sak_user
		# RUN /usr/sbin/usermod -d /sak_user root
		RUN HOME=/sak_user
		COPY documentation/radmc3d_install.sh .
		RUN chmod 755 radmc3d_install.sh
		RUN HOME=/sak_user;./radmc3d_install.sh
		ENV PATH="/sak_user/bin:${PATH}"
		ENV PYTHONPATH="/sak_user/bin/python:${PYTHONPATH}"
		RUN chmod 755 /sak_user/bin/radmc3d

		# STEP 2) Bundling app
		COPY etl .

		CMD python main.py
		No newline at end of file

README.md

0 → 100644

+4 −0

Original line number	Diff line number	Diff line
		The Swiss Army Knife project has the objective to assess whether CH3OH can be used as an effective volume density probe and in which regime.

		Important resources are listed in the [documentation.md file](documentation/documentation.md).
		No newline at end of file

apptainer.def

0 → 100644

+7 −0

Original line number	Diff line number	Diff line
		Bootstrap: docker
		From: postgres:14.1-alpine

		%environment
		export POSTGRES_DB=$DB_NAME
		export POSTGRES_USER=$DB_USER
		export POSTGRES_PASSWORD=$DB_PASS

create_apptainer_image_file.sh

0 → 100644

+3 −0

Original line number	Diff line number	Diff line
		rm swiss_army_knife_latest.sif
		singularity pull --disable-cache docker://git.ia2.inaf.it:5050/andrea.giannetti/swiss_army_knife

docker-compose.yaml

0 → 100644

+37 −0

Original line number	Diff line number	Diff line
		version: '3.8'
		services:
		db:
		image: postgres:14.1-alpine
		container_name: db_container_sak
		restart: always
		environment:
		- POSTGRES_DB=$DB_NAME
		- POSTGRES_USER=$DB_USER
		- POSTGRES_PASSWORD=$DB_PASS
		ports:
		- '31000:5432'
		volumes:
		- db:/var/lib/postgresql/data
		networks:
		- sak_network

		etl:
		build:
		dockerfile: Dockerfile
		container_name: 'etl_sak'
		image: 'sak_etl'
		depends_on:
		- db
		stdin_open: true
		tty: true
		networks:
		- sak_network
		volumes:
		- ./etl:/usr/src/etl
		volumes:
		db:
		driver: local
		networks:
		sak_network:
		name: 'sak_app_network'
		No newline at end of file

documentation/documentation.md

0 → 100644

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

documentation/radmc3d_install.sh

0 → 100644

+28 −0

Original line number	Diff line number	Diff line
		#!/bin/bash
		# Set PYCHARM_HOME, INSTALL_DEPENDENCIES environment variable
		PYCHARM_HOME=${PYCHARM_HOME:=$HOME}
		INSTALL_DEPENDENCIES="${INSTALL_DEPENDENCIES:=true}"
		echo $PYCHARM_HOME
		cd $PYCHARM_HOME
		export RADMC_HOME=$PYCHARM_HOME/radmc3d-2.0
		if [[ ! -d "$RADMC_HOME" ]]
		then
		echo RADMC not found, cloning into "$(pwd)"
		git clone https://github.com/dullemond/radmc3d-2.0.git
		fi

		if $INSTALL_DEPENDENCIES
		then
		apt-get install -y gfortran
		fi

		cd $RADMC_HOME/src
		make
		echo "y\n y\n"\| make install

		# Add this to the .bashrc configuration file (or equivalent)
		export PATH=$HOME/bin:$PATH
		export PYTHONPATH=$HOME/bin/python:$PYTHONPATH

		cd $RADMC_HOME/python/radmc3dPy
		python setup.py install

etl/assets/commons/init.py

0 → 100644

+291 −0

Original line number	Diff line number	Diff line
		import glob
		import logging
		import sys
		import yaml
		import numpy as np
		import shutil
		import os
		import urllib.request
		import json
		import pandas as pd
		from hashlib import sha1
		from typing import (List,
		Union,
		Tuple)
		from astropy import units as u
		from assets.constants import (leiden_url_mapping)


		def setup_logger(name: str,
		log_level: str = None) -> logging.Logger:
		"""
		Configure default logger
		:param name: Logger name
		:param log_level: Logging levels, as defined by logging
		:return: the logger object
		"""
		log_level = 'DEBUG' if log_level is None else log_level
		"""general logger configurator"""
		logger = logging.getLogger(name)
		logger.setLevel(log_level)
		handler = logging.StreamHandler(sys.stdout)
		handler.setLevel(log_level)
		formatter = logging.Formatter("%(asctime)s - %(name)-7s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
		handler.setFormatter(formatter)
		logger.addHandler(handler)
		return logger


		def validate_parameter(param_to_validate,
		default):
		"""
		Substitute default value if the parameter is set to None
		:param param_to_validate: input to validate
		:param default: default value
		:return: default value if parameter is set to None, else the passed value
		"""
		return param_to_validate if param_to_validate is not None else default


		def load_config_file(config_file_path: str,
		override_config: Union[dict, None] = None) -> dict:
		"""
		Load the information in the YAML configuration file into a python dictionary
		:param config_file_path: path to the configuration file
		:param override_config: parameters of the input file to override (e.g. for grid creation)
		:return: a dictionary with the parsed information
		"""
		_override_config = validate_parameter(override_config, default={})
		with open(config_file_path) as config_file:
		config = yaml.load(config_file, Loader=yaml.FullLoader)
		for key in _override_config:
		if key not in config.keys():
		config[key] = {}
		for subkey in _override_config[key]:
		try:
		config[key][subkey] = _override_config[key][subkey]
		except KeyError:
		config[key] = {subkey: _override_config[key][subkey]}

		try:
		if config['density_powerlaw_idx'] == 0:
		config['central_density'] = config['density_at_reference']
		config['density_at_reference'] = None
		except KeyError:
		pass

		try:
		if config['dust_temperature_powerlaw_idx'] == 0:
		config['dust_temperature'] = config['dust_temperature_at_reference']
		config['dust_temperature_at_reference'] = None
		except KeyError:
		pass
		return config


		def get_moldata(species_names: list,
		logger: logging.Logger,
		path: Union[str, None] = None,
		use_cache: bool = False):
		"""
		Downloads the molecular data from the Leiden molecular database; check whether the molecule in mapped in
		leiden_url_mapping
		:param species_names: the names of the species for which to download data
		:param logger: logger for output printing
		:param path: path where the files must be saved
		:param use_cache: use molecular input files in cache, if possible
		"""
		_path = validate_parameter(path, default=os.path.join('mdl', 'radmc_files'))
		for species in species_names:
		molecular_file_name = f'molecule_{species}.inp'
		if use_cache is True:
		shutil.copy(os.path.join('mdl', 'data', molecular_file_name),
		os.path.join(_path, molecular_file_name))
		if not os.path.isfile(os.path.join(_path, molecular_file_name)):
		logger.info(f'Downloading file molecule_{species}.inp...')
		data = urllib.request.urlopen(leiden_url_mapping[species]).read().decode()
		with open(os.path.join(_path, f'molecule_{species}.inp'), 'w') as outfile:
		outfile.writelines(data)
		else:
		logger.info(f'File molecule_{species}.inp found, skipping download...')


		def compute_unique_hash_filename(config: dict) -> str:
		"""
		Compute a unique and reproducible filename given a configuration dictionary
		:param config: the configuration dictionary to hash
		:return: a unique hash of the dictionary, to use as a key and filename
		"""
		hashed_dict = sha1(json.dumps(config, sort_keys=True).encode())
		return f'{hashed_dict.hexdigest()}'


		def make_archive(output_filename: str,
		source_dir: str,
		archive_path: Union[str, None] = None):
		"""
		Compresses all files in a directory into a .zip file
		:param output_filename: the output filename
		:param source_dir: the directory to compress
		:param archive_path: path to store the compressed-grid archives
		"""
		_archive_path = validate_parameter(archive_path,
		default=os.path.join('stg', 'archive'))
		filename_root, filename_format = output_filename.rsplit('.', maxsplit=1)
		try:
		os.remove(os.path.join(_archive_path, output_filename))
		except FileNotFoundError:
		pass
		shutil.make_archive(base_name=os.path.join(_archive_path, filename_root),
		format=filename_format,
		root_dir=source_dir)


		def cleanup_directory(directory: str,
		logger: logging.Logger):
		"""
		Removes all files from the specified directory.
		:param directory: directory to clean up
		:param logger: logger to use
		"""
		if os.path.isdir(directory):
		file_list = glob.glob(f'{directory}/*')
		for filename in file_list:
		if os.path.isfile(filename) is True:
		logger.debug(f'Removing file {filename}')
		os.remove(filename)
		else:
		logger.debug(f'Skipping {filename}')


		def convert_frequency_to_wavelength(frequency: u.Quantity,
		output_units: u.Unit):
		return frequency.to(output_units, equivalencies=u.spectral())


		def get_value_if_specified(parameters_dict: dict,
		key: str):
		try:
		return parameters_dict[key]
		except KeyError:
		return None


		def get_postprocessed_data(limit_rows: Union[None, int] = None,
		use_model_for_inference: Union[None, str] = None) -> Tuple[List[List[str]], pd.DataFrame]:
		"""
		Retrieve data and line pairs from the main config file
		:param use_model_for_inference: the prs/output/run_type folder from which to get the data from inference;
		defaults to fiducial model (constant_abundance_p15_q05)
		:param limit_rows: the number of rows to use from the original dataset; useful to run tests and limit
		computation time
		:return: the line pairs list and the dataset
		"""
		_use_model_for_inference = validate_parameter(
		use_model_for_inference,
		default='constant_abundance_p15_q05'
		)
		_data_file = os.path.join('prs', 'output', 'run_type', _use_model_for_inference, 'data', 'full_dataset.csv')
		config = load_config_file(os.path.join('config', 'config.yml'))
		line_pairs = config['overrides']['lines_to_process']
		results_df_dict = {
		'ratios_and_los_averages': add_px_index_column(_data_file)
		}
		if limit_rows is not None:
		data = results_df_dict['ratios_and_los_averages'].head(limit_rows)
		else:
		data = results_df_dict['ratios_and_los_averages']
		return line_pairs, data


		def prepare_matrix(filename: str,
		columns: list,
		use_model_for_inference: Union[None, str] = None) -> pd.DataFrame:
		"""
		Retrieve and prepare the data matrix from a specified file and columns.

		:param filename: The name of the file to read the data from.
		:param columns: The list of columns to extract from the dataframe.
		:param use_model_for_inference: The folder within prs/output/run_type to get the data for inference;
		defaults to the fiducial model ('constant_abundance_p15_q05') if None is provided.
		:return: A pandas DataFrame containing the specified columns from the file with 'nh2' and 'tdust' columns
		rounded to one decimal place and converted to string type.
		"""
		_use_model_for_inference = validate_parameter(
		use_model_for_inference,
		default='constant_abundance_p15_q05'
		)
		df = pd.read_csv(os.path.join('prs', 'output', 'run_type', _use_model_for_inference, 'data', filename))
		df['nh2'] = pd.Series(df['nh2'].round(1), dtype='string')
		df['tdust'] = pd.Series(df['tdust'].round(1), dtype='string')
		return df[columns]


		def get_data(limit_rows: Union[int, None] = None,
		use_model_for_inference: Union[None, str] = None,
		log_columns: Union[None, List] = None):
		"""
		Retrieve and preprocess dataset.

		:param limit_rows: The number of rows to use from the original dataset; useful for running tests and limiting
		computation time. Defaults to None, which uses all rows.
		:param use_model_for_inference: The folder within prs/output/run_type to get the data for inference;
		defaults to the fiducial model ('constant_abundance_p15_q05') if None is provided.
		:param log_columns: The list of columns to apply a logarithmic transformation to. Defaults to
		['log_nh2', 'log_tdust', 'avg_nh2', 'avg_tdust', 'molecule_column_density', 'std_nh2'] if None is provided.
		:return: A pandas DataFrame containing the merged and processed data from multiple sources, with specified
		columns logarithmically transformed.
		"""
		_use_model_for_inference = validate_parameter(
		use_model_for_inference,
		default='constant_abundance_p15_q05'
		)
		_log_columns = validate_parameter(
		log_columns,
		default=['log_nh2', 'log_tdust', 'avg_nh2', 'avg_tdust', 'molecule_column_density', 'std_nh2']
		)
		_, data = get_postprocessed_data(limit_rows=limit_rows,
		use_model_for_inference=_use_model_for_inference)
		data['nh2'] = pd.Series(data['nh2'].round(1), dtype='string')
		data['tdust'] = pd.Series(data['tdust'].round(1), dtype='string')
		df87 = prepare_matrix(filename='full_dataset_NH2_lines_87-86.csv',
		columns=['px_index', 'nh2', 'tdust', 'mom_zero_87', 'mom_zero_86', 'molecule_column_density'],
		use_model_for_inference=_use_model_for_inference)
		df88 = prepare_matrix(filename='full_dataset_NH2_lines_88-86.csv',
		columns=['px_index', 'nh2', 'tdust', 'mom_zero_88'],
		use_model_for_inference=_use_model_for_inference)
		df257 = prepare_matrix(filename='full_dataset_NH2_lines_257-256.csv',
		columns=['px_index', 'nh2', 'tdust', 'mom_zero_257', 'mom_zero_256'],
		use_model_for_inference=_use_model_for_inference)
		df381 = prepare_matrix(filename='full_dataset_NH2_lines_381-380.csv',
		columns=['px_index', 'nh2', 'tdust', 'mom_zero_381', 'mom_zero_380'],
		use_model_for_inference=_use_model_for_inference)
		merged = data.merge(df87, on=['px_index', 'nh2', 'tdust'])
		merged = merged.merge(df88, on=['px_index', 'nh2', 'tdust'])
		merged = merged.merge(df257, on=['px_index', 'nh2', 'tdust'])
		merged = merged.merge(df381, on=['px_index', 'nh2', 'tdust'])
		# These are transformed later on, to allow splitting
		merged['nh2'] = pd.to_numeric(merged['nh2'])
		merged['tdust'] = pd.to_numeric(merged['tdust'])
		merged['log_nh2'] = pd.to_numeric(merged['nh2'])
		merged['log_tdust'] = pd.to_numeric(merged['tdust'])
		for column in _log_columns:
		merged[column] = np.log10(merged[column])
		npixels = 101
		refpix = 50
		merged['px_distance'] = np.sqrt(
		((merged['px_index'] / npixels).astype(int) - refpix) 2 + ((merged['px_index'] % npixels) - refpix) 2)
		merged['core_radius_px'] = (1e8 / merged['nh2']) ** -(2 / 3) * 0.5 * 101 / 2
		return merged


		def add_px_index_column(filename: str) -> pd.DataFrame:
		"""
		Reset index to add pixel index column to data
		:param filename: The filename of the csv file that contains the data
		:return: a pandas dataframe with the px_index column
		"""
		df = pd.read_csv(filename, index_col=0)
		if 'px_index' not in df.columns:
		df = df.reset_index().rename(columns={'index': 'px_index'})
		return df

etl/assets/commons/db_utils.py

0 → 100644

+71 −0

Original line number	Diff line number	Diff line
		import base64
		import logging
		import os
		from contextlib import closing
		from typing import Union, List
		import sqlalchemy as sqla
		import yaml
		from sqlalchemy.dialects.postgresql import insert
		from sqlalchemy.orm import Session
		from assets.commons import validate_parameter


		def get_credentials(logger: logging.Logger,
		credentials_filename: Union[None, str] = None,
		set_envs: bool = False) -> Union[None, dict]:
		"""
		Retrieves credentials, if available, for logging in to the requested service.
		:param domain: the name of the service, as it appears in the credentials file (case sensitive!).
		:param logger: the logger to use.
		:param credentials_filename: the name of the credentials file to be used. If None, uses the default.
		:param set_envs: whether to set the credentials as environment variables.
		:return: a dictionary with username and password, as retrieved from the credentials file or None.
		"""
		_credentials_filename = credentials_filename if credentials_filename is not None \
		else os.path.join('credentials', 'credentials.yml')
		try:
		with open(_credentials_filename) as credentials_file:
		credentials = yaml.load(credentials_file, Loader=yaml.FullLoader)
		credentials['DB_USER'] = base64.b64decode(credentials['DB_USER'].encode()).decode()
		credentials['DB_PASS'] = base64.b64decode(credentials['DB_PASS'].encode()).decode()
		if set_envs is True:
		for key in credentials:
		os.environ[key] = str(credentials[key])

		return credentials
		except FileNotFoundError:
		logger.info('Credentials not found!')
		return None


		def upsert(table_object: sqla.orm.decl_api.DeclarativeMeta,
		row_dict: dict,
		conflict_keys: List[sqla.Column],
		engine: sqla.engine):
		"""
		Upsert the row into the specified table, according to the indicated conflict columns
		:param table_object: the table into which the row must be inserted
		:param row_dict: the dictionary representing the row to insert
		:param conflict_keys: the conflict columns list, representing the primary key of the table
		:param engine: the SQLAlchemy engine to use
		"""
		statement = insert(table_object).values(row_dict)
		statement = statement.on_conflict_do_update(
		index_elements=conflict_keys, set_=row_dict)
		with closing(Session(engine)) as session:
		session.execute(statement)
		session.commit()


		def get_pg_engine(logger: logging.Logger, engine_kwargs: Union[dict, None] = None) -> sqla.engine.Engine:
		"""
		Return the SQLAlchemy engine, given the credentials in the external file
		:param logger: the logger to use
		:return: the SQLAlchemy engine
		"""
		_kwargs = validate_parameter(engine_kwargs, default={})
		credentials = get_credentials(logger=logger,
		credentials_filename=os.path.join('credentials', 'db_credentials.yml'))
		url = f'postgresql://{credentials["DB_USER"]}:{credentials["DB_PASS"]}@{credentials["DB_HOST"]}:{credentials["DB_PORT"]}/{credentials["DB_NAME"]}'
		engine = sqla.create_engine(url, **_kwargs)
		return engine

etl/assets/commons/grid_utils.py

0 → 100644

+280 −0

Original line number	Diff line number	Diff line
		import logging
		import os
		from typing import Union, List
		import numpy as np
		from astropy import units as u
		from assets.commons import validate_parameter, load_config_file, setup_logger
		from assets.commons.parsing import get_grid_properties
		from assets.constants import mean_molecular_mass, radmc_grid_map, radmc_coord_system_map


		def compute_power_law_radial_profile(
		central_value: float,
		power_law_index: float,
		distance_matrix: np.array,
		maximum_radius: Union[float, None] = None,
		value_at_reference: Union[float, None] = None,
		distance_reference: Union[float, u.Quantity] = 1.0,
		fill_reference_pixel: bool = True) -> np.array:
		"""
		Compute a power law distribution over the specified grid
		:param central_value: value in the center of the grid
		:param power_law_index: index of the power law used to scale the profile
		:param distance_matrix: the matrix of distances from the reference point
		:param maximum_radius: the maximum radius to populate with gas within the grid in the same unit as the distance
		matrix and distance reference
		:param value_at_reference: value of the profile at the reference distance; defaults to central value if not provided
		:param distance_reference: value of the reference distance
		:param fill_reference_pixel: whether to fill the reference point with central_value and set this to the maximum in
		the grid
		:return: the distance matrix
		"""
		_value_at_reference = validate_parameter(value_at_reference, default=central_value)
		_distance_matrix = np.where(distance_matrix == 0, np.nanmin(distance_matrix[distance_matrix > 0]),
		distance_matrix) if fill_reference_pixel is True else distance_matrix
		try:
		_distance_reference = distance_reference.to(u.cm).value
		except AttributeError:
		_distance_reference = distance_reference
		profile = _value_at_reference * (_distance_matrix / _distance_reference) ** power_law_index
		# If the routine fills the 0-distance point (the centre), it fixes the profile making the central value the maximum
		if (fill_reference_pixel is True) and (power_law_index != 0):
		profile = np.where(profile > central_value, central_value, profile)
		if maximum_radius is not None:
		profile = np.where(distance_matrix > maximum_radius + maximum_radius / 1e5, 0, profile)
		return profile


		def compute_cartesian_coordinate_grid(indices: np.array,
		physical_px_size: np.array) -> np.array:
		"""
		Compute the physical coordinate grid, for a regular grid
		:param indices: the indices of the grid pixels
		:param physical_px_size: the array of physical pixel sizes or the pixel size, as astropy.Quantities
		:return: the numpy.array with the physical grid coordinates
		"""
		try:
		_physical_px_size_cm = [px_size.to(u.cm).value for px_size in physical_px_size]
		except TypeError:
		_physical_px_size_cm = physical_px_size.to(u.cm).value
		return (indices.T * _physical_px_size_cm).T


		def get_centered_indices(grid_metadata: dict) -> np.array:
		"""
		Recompute the indices using as reference the reference pixel in the configuration file
		:param grid_metadata: the dictionary with the grid metadata, to obtain the grid shape and the reference pixel
		:return: a numpy array of indices, wrt. the reference
		"""
		return (np.indices(grid_metadata['grid_shape']).T - grid_metadata['grid_refpix']).T


		def get_grid_edges(grid_metadata: dict) -> np.array:
		"""
		Compute the physical coordinates at the grid boundaries
		:param grid_metadata: the dictionary with the grid metadata, to obtain the grid shape, the reference pixel, and the
		physical pixel size
		:return: a numpy array with the coordinates of the edges, per axis
		"""
		grid_edges = []
		for axis_idx in range(len(grid_metadata['grid_shape'])):
		indices = np.arange(grid_metadata['grid_shape'][axis_idx] + 1) - grid_metadata['grid_refpix'][axis_idx] - 0.5
		# valid for regular grid
		grid_edges.append(compute_cartesian_coordinate_grid(indices=indices,
		physical_px_size=grid_metadata['physical_px_size'][
		axis_idx]))
		# Transposed for consistent flattening
		return np.array(grid_edges).T


		def get_distance_matrix(grid_metadata: dict,
		indices: np.array) -> np.array:
		"""
		Compute physical distance from the reference pixel, in cm
		:param grid_metadata: the dictionary with the grid metadata, to obtain the grid shape, and the physical pixel size
		:param indices: numpy array of the grid pixel indices
		:return: a numpy array with the euclidean distance from the reference pixel
		"""
		distance_matrix = np.zeros(grid_metadata['grid_shape'])
		for axis_idx in range(len(grid_metadata['grid_shape'])):
		distance_matrix += (indices[axis_idx, ...] * grid_metadata['physical_px_size'][axis_idx].to(u.cm).value) ** 2
		return np.sqrt(distance_matrix)


		def get_physical_px_size(grid_metadata: dict) -> List[u.Quantity]:
		"""
		Compute the physical pixel size
		:param grid_metadata: the dictionary with the grid metadata, to obtain the grid shape, the grid size, and the grid
		size units
		:return: a list of physical pixel sizes, per axis
		"""
		physical_px_size = []
		for axis_idx in range(len(grid_metadata['grid_shape'])):
		physical_px_size.append(
		(grid_metadata['grid_size'][axis_idx] * u.Unit(grid_metadata['grid_size_units'][axis_idx])).to(u.cm) /
		grid_metadata['grid_shape'][axis_idx])
		return physical_px_size


		def compute_analytic_profile(central_value: float,
		power_law_index: float,
		maximum_radius: float,
		value_at_reference: float,
		distance_reference: float) -> np.array:
		"""
		Compute the analytic radial profile based on a power-law model.

		:param central_value (float): Central value of the profile, where the power-law could be undefined.
		:param power_law_index (float): Power-law index determining the shape of the profile.
		:param maximum_radius (float): Maximum radius within which to compute the profile; values outside this radius are
		set to zero.
		:param value_at_reference (float): Value of the profile at the reference distance.
		:param distance_reference (float): Reference distance at which the profile value is specified.

		:return: np.array: Computed radial profile.

		Example:
		# Compute an analytic profile with given parameters
		profile = compute_analytic_profile(central_value=10.0,
		power_law_index=-1.5,
		maximum_radius=5.0,
		value_at_reference=5.0,
		distance_reference=1.0)
		"""
		config = load_config_file(config_file_path=os.path.join('stg', 'config', 'config.yml'))
		grid_metadata = extract_grid_metadata(config=config)
		distance_matrix = get_distance_matrix(grid_metadata=grid_metadata,
		indices=get_centered_indices(grid_metadata=grid_metadata))
		profile = compute_power_law_radial_profile(central_value=central_value,
		power_law_index=power_law_index,
		distance_matrix=distance_matrix,
		maximum_radius=maximum_radius,
		value_at_reference=value_at_reference,
		distance_reference=distance_reference)
		return profile


		def compute_los_average_weighted_profile(profile: np.array,
		weights: Union[float, np.array]) -> np.array:
		"""
		Compute the line-of-sight (LOS) average weighted profile.
		This function computes the weighted average profile along the LOS, where each profile is weighted
		according to the provided weights. The computation is performed by summing the products of each
		profile value and its corresponding weight across all LOS, and then dividing by the sum of weights
		to obtain the average.

		Note:
		- If a profile value is 0, it is treated as missing data and not included in the computation.
		- If a weight is 0, the corresponding profile is effectively excluded from the computation.

		:param profile: (np.array) Array containing the profiles along different lines of sight (LOS).

		:param weights: (Union[float, np.array]) Array containing the weights corresponding to each profile in the input array,
		or a single weight value if uniform weighting is desired.

		:return: (np.array) LOS average weighted profile.
		"""
		return np.nansum(np.where(profile == 0, np.nan, profile * weights), axis=2) / \
		np.nansum(np.where(profile == 0, np.nan, weights), axis=2)


		def compute_density_and_temperature_avg_profiles(density_profile: np.array,
		temperature_profile: np.array,
		molecular_abundance: float,
		threshold: float,
		abundance_jump: Union[float, int],
		logger: Union[logging.Logger, None] = None) -> tuple:
		"""
		Compute the average density and temperature profiles along the line-of-sight along with related quantities.
		This function computes the average density and temperature profiles along the LOS, along with related quantities
		such as molecular column density and the standard deviation of the species' number density. It first computes the
		molecular number density grid using the given density, temperature, abundance, temperature threshold, and abundance
		jump for simulating hot core-like abundance profiles. Then, it computes the average density and temperature profiles
		using the molecular number density grid as weights along the LOS. Finally, it calculates the
		molecular column density and its standard deviation.

		:param density_profile (np.array): Array containing the density profiles.
		:param temperature_profile (np.array): Array containing the temperature profiles.
		:param molecular_abundance (float): Molecular abundance for computing molecular number density.
		:param threshold (float): Threshold for adjusting the abundance in hot cores, where the molecule is potentially
		evaporated from dust grains.
		:param abundance_jump (Union[float, int]): Abundance jump in hot cores.
		:param logger (Union[logging.Logger, None], optional): Logger object for logging messages. If not specified, creates
		a generic logger for reporting the total mass.

		:return: Tuple[np.array, np.array, np.array, np.array]
		- Average density profile.
		- Average temperature profile.
		- Molecular column density.
		- Standard deviation of molecular number density.
		"""
		_logger = validate_parameter(logger, setup_logger('GENERIC'))
		config = load_config_file(config_file_path=os.path.join('stg', 'config', 'config.yml'))
		grid_metadata = extract_grid_metadata(config=config)
		molecule_grid = compute_molecular_number_density_hot_core(gas_number_density_profile=density_profile,
		abundance=molecular_abundance,
		temperature_profile=temperature_profile,
		threshold=threshold,
		abundance_jump=abundance_jump)
		avg_density_profile = compute_los_average_weighted_profile(profile=density_profile,
		weights=molecule_grid)
		mass = (np.nansum(density_profile * u.cm ** -3 * grid_metadata['physical_px_size'][0]
		* grid_metadata['physical_px_size'][1]
		* grid_metadata['physical_px_size'][2]) * mean_molecular_mass * m_p).to(u.M_sun)
		_logger.debug(f'Total mass: {mass}')
		avg_temperature_profile = compute_los_average_weighted_profile(profile=temperature_profile,
		weights=molecule_grid)
		return (avg_density_profile,
		avg_temperature_profile,
		np.nansum(molecule_grid, axis=2) * grid_metadata['physical_px_size'][2].value,
		np.nanstd(molecule_grid, axis=2))


		def compute_molecular_number_density_hot_core(gas_number_density_profile: np.array,
		abundance: float,
		temperature_profile: np.array,
		threshold: float,
		abundance_jump: Union[float, int]) -> np.array:
		"""
		Compute the molecular number density, using a step function abundance, changing above a specific temperature to
		simulate evaporation
		:param gas_number_density_profile: the gas number density profile array
		:param abundance: the gas abundance of the species
		:param temperature_profile: the temperature profile of the source
		:param threshold: the threshold at which the species evaporate
		:param abundance_jump: the factor describing the abundance variation wrt the bas level
		:return: the array of molecular gas density computed using a step function profile
		"""
		return np.where(temperature_profile < threshold,
		gas_number_density_profile * abundance,
		gas_number_density_profile * abundance * abundance_jump)


		def extract_grid_metadata(config: dict) -> dict:
		"""
		Enrich grid metadata from the information in the configuration file
		:param config: configuration dictionary
		:return: a dictionary with the metadata
		"""
		grid_config = config['grid']

		grid_metadata = grid_config.copy()
		grid_metadata['grid_type'] = radmc_grid_map[grid_config['grid_type']]
		grid_metadata['coordinate_system'] = radmc_coord_system_map[grid_config['coordinate_system']]
		grid_properties_keywords = ['shape', 'refpix', 'size', 'size_units']
		for key in grid_properties_keywords:
		grid_metadata[f'grid_{key}'] = get_grid_properties(grid_config=grid_config,
		keyword=key)

		grid_metadata['physical_px_size'] = get_physical_px_size(grid_metadata)

		grid_metadata['centered_indices'] = get_centered_indices(grid_metadata)
		grid_metadata['coordinate_grid'] = compute_cartesian_coordinate_grid(indices=grid_metadata['centered_indices'],
		physical_px_size=grid_metadata[
		'physical_px_size'])

		grid_metadata['distance_matrix'] = get_distance_matrix(grid_metadata, grid_metadata['centered_indices'])

		grid_metadata['grid_edges'] = get_grid_edges(grid_metadata)
		grid_metadata['continuum_lambdas'] = 100
		return grid_metadata

etl/assets/commons/parsing.py

0 → 100644

+93 −0

Original line number	Diff line number	Diff line
		import os
		from typing import Tuple

		import numpy as np

		from assets.commons import (load_config_file,
		validate_parameter)


		def parse_grid_overrides(par_name: str,
		config: dict) -> np.array:
		"""
		Parse the global configuration for constructing the grid, computing the parameter values for which the grid has to
		be computed.
		:param par_name: parameter to parse
		:param config: configuration dictionary to extract the grid properties
		:return: the array of values to be considered in the grid
		"""
		config_overrides = config['overrides']
		if config_overrides[f'{par_name}_grid_type'] == 'linear':
		grid_values = np.arange(float(config_overrides[f'{par_name}_limits'][0]),
		float(config_overrides[f'{par_name}_limits'][1]),
		float(config_overrides[f'{par_name}_step']))
		else:
		_step = float(config_overrides[f'{par_name}_step']) if float(config_overrides[f'{par_name}_step']) > 1 else 10 ** float(config_overrides[f'{par_name}_step'])
		grid_values = 10 ** np.arange(np.log10(float(config_overrides[f'{par_name}_limits'][0])),
		np.log10(float(config_overrides[f'{par_name}_limits'][1])),
		np.log10(_step))
		return np.around(grid_values, 2)


		def get_grid_properties(grid_config: dict,
		keyword: str) -> list:
		"""
		Compute the grid axes properties using the grid configuration dictionary. If less than 3 axes are provided, fill
		the missing ones with the first axis.
		:param grid_config: the dictionary containing the grid configuration metadata
		:param keyword: the keyword to read
		:return: a list of the requested keyword values over the three spatial axes
		"""
		grid_property_list = [grid_config['dim1'][keyword]]
		for axis_idx in range(2):
		try:
		grid_property_list.append(grid_config[f'dim{axis_idx + 2}'][keyword])
		except KeyError:
		grid_property_list.append(grid_config['dim1'][keyword])
		return grid_property_list


		def parse_input_main() -> Tuple[str, str, np.array, np.array, list, int, str]:
		"""
		Parse the stg and mdl configuration files, as well as the main one, to apply overrides, compute the arrays of
		temperatures and number densities and the type of models defined in the grid (isothermal/uniform density vs.
		power-law profiles)
		:return: a tuple containing the dust profile type (isothermal/heated), the density profile (homogeneous/spherical),
		the distinct values of dust temperatures and densities in the grid, the line pairs used to compute ratios,
		the number of processes to be used in case of multiprocessing, and the model run_type string
		"""
		default_config_name = 'config.yml'
		stg_config = load_config_file(os.path.join('stg', 'config', default_config_name))
		pl_density_idx = float(stg_config['grid']['density_powerlaw_idx'])
		pl_dust_temperature_idx = float(stg_config['grid']['dust_temperature_powerlaw_idx'])
		_model_type = 'spherical' if pl_density_idx != 0 else 'homogeneous'
		_tdust_model_type = 'heated' if pl_dust_temperature_idx != 0 else 'isothermal'
		config = load_config_file(os.path.join('config', default_config_name))
		# grid definition
		dust_temperatures = parse_grid_overrides(par_name='dust_temperature',
		config=config)
		densities = parse_grid_overrides(par_name='gas_density',
		config=config)
		line_pairs = config['overrides']['lines_to_process']
		n_processes = validate_parameter(config['computation']['threads'], default=10)
		return _tdust_model_type, _model_type, dust_temperatures, densities, line_pairs, n_processes, config['run_type']


		def read_abundance_variation_schema(line_config: dict) -> dict:
		"""
		Read and fill the abundance variation schema, defined as a step function
		:param line_config: the dictionary of the line configurations, for the abundances
		:return: the dictionary of the abundance variations per species, with the abundance jump factor and the temperature
		threshold at which evaporation happens
		"""
		species_list = list(line_config['molecular_abundances'].keys())
		results_dict = {}
		for species in species_list:
		results_dict[species] = {
		'threshold': 20,
		'abundance_jump': 1
		}
		if 'hot_core_specs' in line_config.keys():
		if species in line_config['hot_core_specs']:
		results_dict[species] = line_config['hot_core_specs'][species]
		return results_dict

etl/assets/commons/training_utils.py

0 → 100644

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

etl/assets/constants/init.py

0 → 100644

+183 −0

Original line number	Diff line number	Diff line
		import numpy as np
		from .physical_constants import mean_molecular_mass

		polaris_grid_map = {
		'spherical': 30
		}

		polaris_data_map = {
		"gas_number_density": 0,
		"dust_temperature": 2,
		"gas_temperature": 3,
		"molecular_abundance_mass_ratio": 17
		}

		radmc_grid_map = {
		'regular': 0,
		'spherical': 100
		}

		radmc_coord_system_map = {
		'cartesian': 0,
		'polar': 100
		}

		radmc_input_headers = {
		'amr_grid.inp': ['iformat', 'grid_type', 'coordinate_system', 'grid_info', 'active_axes', 'ncells_per_axis'],
		'dust_density.inp': ['iformat', 'ncells', 'dust_species'],
		'dust_temperature.dat': ['iformat', 'ncells', 'dust_species'],
		'microturbulence.inp': ['iformat', 'ncells'],
		'gas_velocity.inp': ['iformat', 'ncells'],
		'gas_temperature.inp': ['iformat', 'ncells'],
		'wavelength_micron.inp': ['continuum_lambdas'],
		'numberdens_mol.inp': ['iformat', 'ncells'],
		'escprob_lengthscale.inp': ['iformat', 'ncells'],
		'stars.inp': ['iformat', 'nstars', 'continuum_lambdas', 'stars_properties', 'lambdas']
		}

		leiden_url_mapping = {
		'co': 'https://home.strw.leidenuniv.nl/~moldata/datafiles/co.dat',
		'e-ch3oh': 'https://home.strw.leidenuniv.nl/~moldata/datafiles/e-ch3oh.dat',
		'a-ch3oh': 'https://home.strw.leidenuniv.nl/~moldata/datafiles/ch3oh_a.dat'
		}

		radmc_lines_mode_mapping = {
		'lte': 1,
		'user_defined_populations': 2,
		'lvg': 3,
		'optically_thin_non_lte': 4,
		}

		radmc_options_mapping = {
		'inclination': 'incl',
		'position_angle': 'phi',
		'imolspec': 'imolspec',
		'iline': 'iline',
		'width_kms': 'widthkms',
		'nchannels': 'linenlam',
		'npix': 'npix',
		'image_size_pc': 'sizepc',
		'threads': 'setthreads'
		}

		aggregation_function_mapping = {
		'mean': np.nanmean,
		'sum': np.nansum,
		'median': np.nanmedian
		}

		line_ratio_mapping = {
		'87-86': r'$I(2_{0}-1_{0})/I(2_{-1}-1_{-1})$',
		'88-86': r'$I(2_{1}-1_{1})/I(2_{-1}-1_{-1})$',
		'88-87': r'$I(2_{1}-1_{1})/I(2_{0}-1_{0})$',
		'257-256': r'$I(5_{-1}-4_{-1})/I(5_{0}-4_{0})$',
		'381-380': r'$I(7_{-1}-6_{-1})/I(7_{0}-6_{0})$'
		}

		config_overrides = {
		'isothermal_p15': {
		'grid': {
		'power_law_temperature': 0
		}
		},
		'isothermal_p12': {
		'grid': {
		'power_law_density': -1.2,
		'power_law_temperature': 0
		}
		},
		'isothermal_p18': {
		'grid': {
		'power_law_density': -1.8,
		'power_law_temperature': 0
		}
		},
		'constant_abundance_p15_q05': {},
		'constant_abundance_p12_q05': {
		'grid': {
		'power_law_density': -1.2,
		}
		},
		'constant_abundance_p18_q05': {
		'grid': {
		'power_law_density': -1.8,
		}
		},
		'constant_abundance_p15_q05_x01': {
		'lines': {
		'molecular_abundances': {
		"e-ch3oh": 1e-10
		}
		}
		},
		'constant_abundance_p15_q05_x10': {
		'lines': {
		'molecular_abundances': {
		"e-ch3oh": 1e-8
		}
		}
		},
		'hot_core_p15_q05': {
		'lines': {
		'hot_core_specs': {
		"e-ch3oh": {
		"threshold": 90,
		"abundance_jump": 100
		}
		}
		}
		},
		'hot_core_p12_q05': {
		'grid': {
		'power_law_density': -1.2,
		},
		'lines': {
		'hot_core_specs': {
		"e-ch3oh": {
		"threshold": 90,
		"abundance_jump": 100
		}
		}
		}
		},
		'hot_core_p18_q05': {
		'grid': {
		'power_law_density': -1.8,
		},
		'lines': {
		'hot_core_specs': {
		"e-ch3oh": {
		"threshold": 90,
		"abundance_jump": 100
		}
		}
		}
		},
		'hot_core_x10': {
		'lines': {
		'molecular_abundances': {
		"e-ch3oh": 1e-8
		},
		'hot_core_specs': {
		"e-ch3oh": {
		"threshold": 90,
		"abundance_jump": 100
		}
		}
		}
		},
		'hot_core_x01': {
		'lines': {
		'molecular_abundances': {
		"e-ch3oh": 1e-10
		},
		'hot_core_specs': {
		"e-ch3oh": {
		"threshold": 90,
		"abundance_jump": 100
		}
		}
		}
		},
		'double_microturbulence': {},
		}

etl/assets/constants/physical_constants.py

0 → 100644

+2 −0

Original line number	Diff line number	Diff line
		mean_molecular_mass = 2.34
		speed_of_light = 2.99792458e10 #cm/s

etl/assets/data/ch3oh_data_top35.csv

0 → 100644

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

etl/assets/data/tab_top100_carsten_sort.csv

0 → 100644

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

etl/assets/data/top100_density.csv

0 → 100644

+116 −0

Original line number	Diff line number	Diff line
		# select
		# agal_name
		# , round("N_H2_peak" / (2 * r_fwhm * 3.0856e18)) as volume_density
		# from top100_sed_data tsd
		"agal_name","volume_density"
		AGAL006.216-00.609,89795.0
		AGAL008.671-00.356,190969.0
		AGAL008.684-00.367,68138.0
		AGAL008.706-00.414,27785.0
		AGAL008.831-00.027,NaN
		AGAL010.444-00.017,22907.0
		AGAL010.472+00.027,366225.0
		AGAL010.624-00.384,299726.0
		AGAL012.804-00.199,375774.0
		AGAL013.178+00.059,68826.0
		AGAL013.658-00.599,52684.0
		AGAL014.114-00.574,75908.0
		AGAL014.194-00.194,78815.0
		AGAL014.492-00.139,74017.0
		AGAL014.632-00.577,127991.0
		AGAL015.029-00.669,209966.0
		AGAL018.606-00.074,43782.0
		AGAL018.734-00.226,19630.0
		AGAL018.888-00.474,56893.0
		AGAL019.609-00.234,92787.0
		AGAL019.882-00.534,136234.0
		AGAL022.376+00.447,82445.0
		AGAL023.206-00.377,163688.0
		AGAL024.416+00.101,15957.0
		AGAL024.629+00.172,15443.0
		AGAL024.651-00.169,7869.0
		AGAL024.789+00.082,108737.0
		AGAL028.564-00.236,37289.0
		AGAL028.861+00.066,19217.0
		AGAL030.818-00.056,386655.0
		AGAL030.848-00.081,23220.0
		AGAL030.893+00.139,49987.0
		AGAL031.412+00.307,389457.0
		AGAL034.258+00.154,1700013.0
		AGAL034.401+00.226,218025.0
		AGAL034.411+00.234,495365.0
		AGAL034.821+00.351,68475.0
		AGAL035.197-00.742,228801.0
		AGAL035.579+00.007,8844.0
		AGAL037.554+00.201,27842.0
		AGAL043.166+00.011,221873.0
		AGAL049.489-00.389,864473.0
		AGAL053.141+00.069,184095.0
		AGAL059.782+00.066,94354.0
		AGAL301.136-00.226,259527.0
		AGAL305.192-00.006,44264.0
		AGAL305.209+00.206,183284.0
		AGAL305.562+00.014,50295.0
		AGAL305.794-00.096,30607.0
		AGAL309.384-00.134,34424.0
		AGAL310.014+00.387,38275.0
		AGAL313.576+00.324,56581.0
		AGAL316.641-00.087,119766.0
		AGAL317.867-00.151,159134.0
		AGAL318.779-00.137,34706.0
		AGAL320.881-00.397,16371.0
		AGAL326.661+00.519,93292.0
		AGAL326.987-00.032,74912.0
		AGAL327.119+00.509,29492.0
		AGAL327.293-00.579,1286938.0
		AGAL327.393+00.199,36543.0
		AGAL328.809+00.632,350565.0
		AGAL329.029-00.206,34454.0
		AGAL329.066-00.307,14237.0
		AGAL330.879-00.367,218474.0
		AGAL330.954-00.182,322707.0
		AGAL331.709+00.582,23291.0
		AGAL332.094-00.421,83157.0
		AGAL332.826-00.549,407894.0
		AGAL333.134-00.431,200007.0
		AGAL333.284-00.387,117459.0
		AGAL333.314+00.106,53337.0
		AGAL333.604-00.212,210139.0
		AGAL333.656+00.059,21760.0
		AGAL335.789+00.174,135994.0
		AGAL336.958-00.224,28640.0
		AGAL337.176-00.032,11767.0
		AGAL337.258-00.101,40725.0
		AGAL337.286+00.007,21253.0
		AGAL337.406-00.402,266919.0
		AGAL337.704-00.054,76912.0
		AGAL337.916-00.477,386876.0
		AGAL338.066+00.044,15655.0
		AGAL338.786+00.476,38848.0
		AGAL338.926+00.554,154575.0
		AGAL339.623-00.122,40057.0
		AGAL340.374-00.391,66279.0
		AGAL340.746-01.001,43744.0
		AGAL340.784-00.097,30196.0
		AGAL341.217-00.212,77596.0
		AGAL342.484+00.182,20980.0
		AGAL343.128-00.062,280086.0
		AGAL343.756-00.164,319987.0
		AGAL344.227-00.569,609039.0
		AGAL345.003-00.224,303461.0
		AGAL345.488+00.314,225021.0
		AGAL345.504+00.347,168516.0
		AGAL345.718+00.817,90728.0
		AGAL351.131+00.771,43470.0
		AGAL351.161+00.697,743236.0
		AGAL351.244+00.669,238178.0
		AGAL351.416+00.646,2489024.0
		AGAL351.444+00.659,1201754.0
		AGAL351.496+00.691,78226.0
		AGAL351.571+00.762,61074.0
		AGAL351.581-00.352,234338.0
		AGAL351.774-00.537,2754463.0
		AGAL353.066+00.452,153199.0
		AGAL353.409-00.361,202379.0
		AGAL353.417-00.079,8376.0
		AGAL354.944-00.537,46271.0

etl/assets/utils.py

0 → 100644

+52 −0

Original line number	Diff line number	Diff line
		import os
		import pandas as pd
		import yaml
		from assets.constants import config_overrides
		from typing import (Union,
		Tuple)
		from assets.commons import validate_parameter


		def get_poc_results(line_fit_filename: str) -> pd.DataFrame:
		df_class = pd.read_csv(os.path.join('assets', 'data', line_fit_filename))
		with open(os.path.join('prs', 'output', 'run_type', 'constant_abundance_p15_q05', 'volume_density_results.yml'),
		'r') as infile:
		fit_results = yaml.load(infile, Loader=yaml.FullLoader)
		df_results = pd.DataFrame.from_dict(fit_results, orient='index')
		df_full = (df_results.merge(df_class, left_index=True, right_on='source_name', how='inner', validate='1:1')
		.dropna(subset='best_fit'))
		df_full['class_phys'] = 'HII'
		df_full.loc[df_full['class'] == '70w', 'class_phys'] = 'Quiescent'
		df_full.loc[df_full['class'] == 'IRw', 'class_phys'] = 'Protostellar'
		df_full.loc[df_full['class'] == 'IRb', 'class_phys'] = 'MYSOs'
		return df_full


		def get_profile_input(
		model_root_folder: str,
		species: Union[None, str] = None) -> Tuple[dict, dict]:
		overrides = config_overrides[model_root_folder.split(os.path.sep)[-1]]
		_species = validate_parameter(species, default='e-ch3oh')
		try:
		overrides_grid = overrides['grid']
		except KeyError:
		overrides_grid = {}
		try:
		overrides_lines = overrides['lines']
		except KeyError:
		overrides_lines = {}
		overrides_abundance = {}

		try:
		overrides_abundance['molecular_abundance'] = overrides_lines['molecular_abundances'][_species]
		except KeyError:
		pass

		for key in ['abundance_jump', 'threshold']:
		try:
		overrides_abundance[key] = overrides_lines['hot_core_specs'][_species][key]
		except KeyError:
		pass

		return overrides_grid, overrides_abundance

etl/config/config.yml

0 → 100644

+14 −0

Original line number	Diff line number	Diff line
		run_type: constant_abundance_p15_q05

		computation:
		threads: 10

		overrides:
		dust_temperature_grid_type: 'linear'
		dust_temperature_limits: [10, 35]
		dust_temperature_step: 2.5
		gas_density_grid_type: 'log'
		gas_density_limits: [1e3, 1e7]
		gas_density_step: 3
		gas_density_unit: cm^-3
		lines_to_process: [['87', '86'], ['88', '87'], ['88', '86'], ['257', '256'], ['381', '380']]

etl/config/density_inference_input.yml

0 → 100644

+473 −0

Original line number	Diff line number	Diff line
		best_bandwidths:
		257-256: 0.4
		381-380: 0.2
		87-86: 0.1
		88-86: 0.1
		88-87: 0.1
		integrated_intensities_uncertainties:
		'256':
		G08.71-0.41: 0.3772371124870143
		G10.45-0.02: 0.3438149090944644
		G10.47+0.03: 4.67062615566759
		G10.62-0.38: 5.717801509967671
		G12.81-0.20: 2.088877254036277
		G13.18+0.06: 1.43381518306444
		G13.66-0.60: 1.1620189309770297
		G14.11-0.57: 0.3933488163719442
		G14.19-0.19: 1.9966333535808136
		G14.49-0.14: 0.5232957299744114
		G14.63-0.58: 0.6760059683722098
		G15.03-0.67: 1.3201864103978675
		G18.61-0.07: 0.449933406945426
		G18.73-0.23: 1.1925529389006952
		G18.89-0.47: 1.2420175138135696
		G19.88-0.54: 1.1775666735052563
		G22.37+0.45: 0.8397889239422417
		G23.21-0.38: 2.366268748274346
		G28.56-0.24: 0.7853900725084517
		G28.86+0.07: 0.5
		G30.85-0.08: 0.5698331975250233
		G31.41+0.31: 2.794406439049449
		G34.26+0.15: 5.571485634832507
		G34.40+0.23: 2.4222527304195376
		G34.41+0.23: 2.455950237377704
		G34.82+0.35: 0.1947198188044887
		G35.20-0.74: 1.9116488388286448
		G37.55+0.20: 0.7800057066048043
		G49.49-0.39: 13.79415939155848
		G53.14+0.07: 0.9832922403050368
		G59.78+0.07: 0.5
		'257':
		G08.71-0.41: 0.4136351969963581
		G10.45-0.02: 0.3567836016634692
		G10.47+0.03: 4.737191542189759
		G10.62-0.38: 5.828217319934533
		G12.81-0.20: 2.159742388795141
		G13.18+0.06: 1.528153771626887
		G13.66-0.60: 1.2364636458899043
		G14.11-0.57: 0.4315807681683414
		G14.19-0.19: 2.1114871424317903
		G14.49-0.14: 0.5631062582040194
		G14.63-0.58: 0.7362604233371383
		G15.03-0.67: 1.3883181061379544
		G18.61-0.07: 0.4918960784434188
		G18.73-0.23: 1.2489705007902534
		G18.89-0.47: 1.30997858860221
		G19.88-0.54: 1.2556873263733366
		G22.37+0.45: 0.9012219713131504
		G23.21-0.38: 2.4463347346958884
		G28.56-0.24: 0.8256372552866202
		G28.86+0.07: 0.5
		G30.85-0.08: 0.5939589107713851
		G31.41+0.31: 2.830674270184918
		G34.26+0.15: 5.689969110997285
		G34.40+0.23: 2.5501966207773785
		G34.41+0.23: 2.586820094338446
		G34.82+0.35: 0.2153653834587912
		G35.20-0.74: 2.009289929523078
		G37.55+0.20: 0.8302500355158099
		G49.49-0.39: 13.996345324993044
		G53.14+0.07: 1.037736129080711
		G59.78+0.07: 0.5
		'380':
		G08.71-0.41: 0.1719132347425028
		G10.45-0.02: 0.3350748768496078
		G10.47+0.03: 2.5238526974413102
		G10.62-0.38: 1.9730421303059005
		G12.81-0.20: 0.4938645969760634
		G13.18+0.06: 0.3130453732259515
		G13.66-0.60: 0.3896020198637059
		G14.11-0.57: 0.1604869867792384
		G14.19-0.19: 0.3718943617922549
		G14.49-0.14: 0.3980987437341203
		G14.63-0.58: 0.1942770443673398
		G15.03-0.67: 0.4226996779770496
		G18.61-0.07: 0.3104707669438686
		G18.73-0.23: 0.3983825447868095
		G18.89-0.47: 0.3283273384950035
		G19.88-0.54: 0.297825582498799
		G22.37+0.45: 0.2435883035421827
		G23.21-0.38: 0.4720878606853434
		G28.56-0.24: 0.3689771528703325
		G28.86+0.07: 0.2133053661117121
		G30.85-0.08: 0.4002321844656681
		G31.41+0.31: 1.7677726204007878
		G34.26+0.15: 2.2053525649662755
		G34.40+0.23: 0.3157737464671486
		G34.41+0.23: 0.552143182260122
		G34.82+0.35: 0.3762696495121889
		G35.20-0.74: 0.3277361642527309
		G37.55+0.20: 0.3505786313165484
		G49.49-0.39: 6.458962659096731
		G53.14+0.07: 0.2659096119306583
		G59.78+0.07: 0.1958691834258987
		'381':
		G08.71-0.41: 0.1912145881150111
		G10.45-0.02: 0.348675803268881
		G10.47+0.03: 2.527338233134208
		G10.62-0.38: 2.000038955895338
		G12.81-0.20: 0.5078276831867471
		G13.18+0.06: 0.3327236010567068
		G13.66-0.60: 0.4023699128083662
		G14.11-0.57: 0.1778607797165793
		G14.19-0.19: 0.3901217659238757
		G14.49-0.14: 0.410805519891472
		G14.63-0.58: 0.2096518477508132
		G15.03-0.67: 0.4404189229260346
		G18.61-0.07: 0.3250267274896626
		G18.73-0.23: 0.4111428033705079
		G18.89-0.47: 0.3562705793927619
		G19.88-0.54: 0.3054447929243074
		G22.37+0.45: 0.2565267403612208
		G23.21-0.38: 0.47971744770208
		G28.56-0.24: 0.3858032381571517
		G28.86+0.07: 0.2244369344927569
		G30.85-0.08: 0.4287274799425559
		G31.41+0.31: 1.7849749705573348
		G34.26+0.15: 2.2213861922198466
		G34.40+0.23: 0.3283727163284978
		G34.41+0.23: 0.5631980961359404
		G34.82+0.35: 0.3873761358129582
		G35.20-0.74: 0.3362751271550414
		G37.55+0.20: 0.3531542428043672
		G49.49-0.39: 6.498419983328799
		G53.14+0.07: 0.278825242987693
		G59.78+0.07: 0.2103971057794001
		'86':
		G08.71-0.41: 0.1141338092612959
		G10.45-0.02: 0.1513227343419588
		G10.47+0.03: 0.2436313155314365
		G10.62-0.38: 0.2543853719744313
		G12.81-0.20: 0.2120144583210657
		G13.18+0.06: 0.1444849558699236
		G13.66-0.60: 0.1880743486996585
		G14.11-0.57: 0.0991920101690233
		G14.19-0.19: 0.2591102799195194
		G14.49-0.14: 0.1932137455379076
		G14.63-0.58: 0.0920867956240891
		G15.03-0.67: 0.1519740165468335
		G18.61-0.07: 0.1336149687864388
		G18.73-0.23: 0.1544601974519147
		G18.89-0.47: 0.284814022368405
		G19.88-0.54: 0.1238378758928978
		G22.37+0.45: 0.1935045511182322
		G23.21-0.38: 0.1601721389776916
		G28.56-0.24: 0.1699145015331222
		G28.86+0.07: 0.1037452815160559
		G30.85-0.08: 0.1481775324641313
		G31.41+0.31: 0.2194502880473983
		G34.26+0.15: 0.1627530591328137
		G34.40+0.23: 0.1351065672329636
		G34.41+0.23: 0.2465062371409508
		G34.82+0.35: 0.0865664840020827
		G35.20-0.74: 0.1340658506805993
		G37.55+0.20: 0.1253184895008262
		G49.49-0.39: 0.2791842617996957
		G53.14+0.07: 0.0768660912353171
		G59.78+0.07: 0.0574168367111523
		'87':
		G08.71-0.41: 0.111937365388756
		G10.45-0.02: 0.1491063536449446
		G10.47+0.03: 0.2419807697773685
		G10.62-0.38: 0.2507814699869695
		G12.81-0.20: 0.2090805176901697
		G13.18+0.06: 0.1418278810377906
		G13.66-0.60: 0.1841371839860628
		G14.11-0.57: 0.097470593166154
		G14.19-0.19: 0.2519127822081297
		G14.49-0.14: 0.188762628907307
		G14.63-0.58: 0.0892839165361122
		G15.03-0.67: 0.1499962128329128
		G18.61-0.07: 0.1315931235851593
		G18.73-0.23: 0.1517892338894
		G18.89-0.47: 0.2784484436939946
		G19.88-0.54: 0.121808079112921
		G22.37+0.45: 0.1892876467707792
		G23.21-0.38: 0.1576533484208159
		G28.56-0.24: 0.1640239796382693
		G28.86+0.07: 0.1023581134022235
		G30.85-0.08: 0.145424188960802
		G31.41+0.31: 0.2170064162555401
		G34.26+0.15: 0.16145840413561
		G34.40+0.23: 0.1324170108062351
		G34.41+0.23: 0.2427485184738539
		G34.82+0.35: 0.0851539809088609
		G35.20-0.74: 0.1319126641393791
		G37.55+0.20: 0.122848111292009
		G49.49-0.39: 0.2773996831748604
		G53.14+0.07: 0.075643530717601
		G59.78+0.07: 0.0555749088316803
		'88':
		G08.71-0.41: 0.1118095857454675
		G10.45-0.02: 0.1483259532833787
		G10.47+0.03: 0.2409668716291754
		G10.62-0.38: 0.2495329007627918
		G12.81-0.20: 0.2084495919578603
		G13.18+0.06: 0.1414209323698791
		G13.66-0.60: 0.1831553534762108
		G14.11-0.57: 0.0972920392752167
		G14.19-0.19: 0.2507189304794556
		G14.49-0.14: 0.1882827587313301
		G14.63-0.58: 0.0890047760093749
		G15.03-0.67: 0.1495172793656858
		G18.61-0.07: 0.1313898333367211
		G18.73-0.23: 0.1513505926924855
		G18.89-0.47: 0.2779301291723239
		G19.88-0.54: 0.121507432969849
		G22.37+0.45: 0.1889958485649907
		G23.21-0.38: 0.1568877828888521
		G28.56-0.24: 0.1635890088005906
		G28.86+0.07: 0.1020862031904043
		G30.85-0.08: 0.1451689199961562
		G31.41+0.31: 0.2158655413134478
		G34.26+0.15: 0.1607951465382442
		G34.40+0.23: 0.1320891607520888
		G34.41+0.23: 0.2421195456012388
		G34.82+0.35: 0.0850472508528964
		G35.20-0.74: 0.1316317053747658
		G37.55+0.20: 0.1222906026734552
		G49.49-0.39: 0.2758738141000653
		G53.14+0.07: 0.0755106793427082
		G59.78+0.07: 0.0553352170005506
		measured_integrated_intensities:
		'256':
		G08.71-0.41: 0.2225977131194544
		G10.45-0.02: 0.6483658492610417
		G10.47+0.03: 12.781079959863508
		G10.62-0.38: 14.026812217900536
		G12.81-0.20: 4.594177385112016
		G13.18+0.06: 2.2560621827614065
		G13.66-0.60: 2.2460391103183417
		G14.11-0.57: 0.4209070251778891
		G14.19-0.19: 3.080405790891515
		G14.49-0.14: 0.7458559310555399
		G14.63-0.58: 1.2815176351639286
		G15.03-0.67: 2.5627713663667446
		G18.61-0.07: 0.6336927890231682
		G18.73-0.23: 1.7197328016314029
		G18.89-0.47: 2.044797825251944
		G19.88-0.54: 2.689820011325686
		G22.37+0.45: 1.0252721670025875
		G23.21-0.38: 5.128448315345341
		G28.56-0.24: 0.3548360822544717
		G28.86+0.07: 0.1
		G30.85-0.08: 0.5697324971999391
		G31.41+0.31: 8.11815427336179
		G34.26+0.15: 14.800657803071704
		G34.40+0.23: 3.986394686990907
		G34.41+0.23: 5.298745541451453
		G34.82+0.35: 0.2244740966733627
		G35.20-0.74: 3.557121906245741
		G37.55+0.20: 1.45314493723405
		G49.49-0.39: 36.47299209277803
		G53.14+0.07: 1.769951086391564
		G59.78+0.07: 0.1
		'257':
		G08.71-0.41: 1.6896248544507602
		G10.45-0.02: 1.6172560812119725
		G10.47+0.03: 17.90790753460651
		G10.62-0.38: 23.51945555648909
		G12.81-0.20: 9.576422834532323
		G13.18+0.06: 6.614500597410293
		G13.66-0.60: 5.099529057565239
		G14.11-0.57: 2.328801003580471
		G14.19-0.19: 8.648952235029466
		G14.49-0.14: 2.4117084959115784
		G14.63-0.58: 4.22783491577442
		G15.03-0.67: 6.482321394374812
		G18.61-0.07: 2.119319392382522
		G18.73-0.23: 4.953380016604151
		G18.89-0.47: 5.763854872422209
		G19.88-0.54: 6.538781173715317
		G22.37+0.45: 3.680142077328448
		G23.21-0.38: 9.457619850247136
		G28.56-0.24: 2.68014308698633
		G28.86+0.07: 0.1
		G30.85-0.08: 2.087101701007916
		G31.41+0.31: 10.657245054323353
		G34.26+0.15: 23.76415857208871
		G34.40+0.23: 10.868969226850243
		G34.41+0.23: 11.460979034617344
		G34.82+0.35: 1.0392490902135514
		G35.20-0.74: 9.236714367808798
		G37.55+0.20: 3.44395156070276
		G49.49-0.39: 53.88508148720575
		G53.14+0.07: 4.882683930286012
		G59.78+0.07: 0.1
		'380':
		G08.71-0.41: 0.1764976739921008
		G10.45-0.02: 0.3956084197099886
		G10.47+0.03: 17.205686694830824
		G10.62-0.38: 16.829857896249838
		G12.81-0.20: 3.77800331658647
		G13.18+0.06: 1.7426684781184258
		G13.66-0.60: 2.050893367850886
		G14.11-0.57: 0.4213659117392063
		G14.19-0.19: 3.0914972380702115
		G14.49-0.14: 0.5203058400694469
		G14.63-0.58: 0.7159602280790358
		G15.03-0.67: 2.1151044891765216
		G18.61-0.07: 0.6989085703534818
		G18.73-0.23: 2.341561030185213
		G18.89-0.47: 1.1824614382740015
		G19.88-0.54: 5.43307241666044
		G22.37+0.45: 0.8920116613769251
		G23.21-0.38: 6.836653617571014
		G28.56-0.24: 0.0440146014079702
		G28.86+0.07: 1.4210884372776502
		G30.85-0.08: 0.5707347171006769
		G31.41+0.31: 12.47776135190902
		G34.26+0.15: 22.863445215418928
		G34.40+0.23: 3.531757873095086
		G34.41+0.23: 7.573594043835107
		G34.82+0.35: 0.5068421046687672
		G35.20-0.74: 3.735886344534172
		G37.55+0.20: 2.1481844033314563
		G49.49-0.39: 59.72041757348511
		G53.14+0.07: 2.1621252984074437
		G59.78+0.07: 0.9953421793454204
		'381':
		G08.71-0.41: 0.6722454928574747
		G10.45-0.02: 0.9251788351499816
		G10.47+0.03: 18.155676944590947
		G10.62-0.38: 26.31767222497715
		G12.81-0.20: 6.359416798755954
		G13.18+0.06: 5.105986039174396
		G13.66-0.60: 3.5815529807897217
		G14.11-0.57: 1.4017924550811132
		G14.19-0.19: 7.297072065038203
		G14.49-0.14: 1.440371736841573
		G14.63-0.58: 3.008997339860133
		G15.03-0.67: 5.842826943986542
		G18.61-0.07: 1.8679581479443947
		G18.73-0.23: 4.779917207406943
		G18.89-0.47: 3.8325153184359935
		G19.88-0.54: 10.285222566660105
		G22.37+0.45: 2.6850343064569504
		G23.21-0.38: 9.739053502995969
		G28.56-0.24: 0.9680884829289998
		G28.86+0.07: 2.7304343388726755
		G30.85-0.08: 1.7951582288444188
		G31.41+0.31: 15.666989012027273
		G34.26+0.15: 27.670510995076157
		G34.40+0.23: 8.838480648374599
		G34.41+0.23: 12.543875121955834
		G34.82+0.35: 0.912504455201019
		G35.20-0.74: 8.213603364738846
		G37.55+0.20: 2.427780024280013
		G49.49-0.39: 71.80734717626261
		G53.14+0.07: 4.9231567416211615
		G59.78+0.07: 2.24196075698127
		'86':
		G08.71-0.41: 5.770415738358244
		G10.45-0.02: 3.585944537913708
		G10.47+0.03: 19.61767281185442
		G10.62-0.38: 17.24610289004276
		G12.81-0.20: 8.431310860817968
		G13.18+0.06: 10.867003106610996
		G13.66-0.60: 8.534961548014586
		G14.11-0.57: 3.616973808521042
		G14.19-0.19: 15.749893351960294
		G14.49-0.14: 6.365582995343145
		G14.63-0.58: 5.623662981046849
		G15.03-0.67: 4.060482703151656
		G18.61-0.07: 4.924693385096532
		G18.73-0.23: 10.095062635647638
		G18.89-0.47: 13.902280291349596
		G19.88-0.54: 7.349704071521969
		G22.37+0.45: 8.934333184369493
		G23.21-0.38: 10.589381466381129
		G28.56-0.24: 7.718943160772005
		G28.86+0.07: 3.744479978305277
		G30.85-0.08: 3.600081120187853
		G31.41+0.31: 10.334408688143643
		G34.26+0.15: 17.148782015804713
		G34.40+0.23: 14.72159545617476
		G34.41+0.23: 17.151716147853218
		G34.82+0.35: 2.3557453894152305
		G35.20-0.74: 11.441136922618975
		G37.55+0.20: 5.218930784342653
		G49.49-0.39: 40.047671620058914
		G53.14+0.07: 3.998167013285548
		G59.78+0.07: 2.88330892529155
		'87':
		G08.71-0.41: 1.3070165152749178
		G10.45-0.02: 1.8754428022340208
		G10.47+0.03: 14.654313712932716
		G10.62-0.38: 10.36368884187998
		G12.81-0.20: 4.037590978170881
		G13.18+0.06: 3.998842732080239
		G13.66-0.60: 3.965098726041119
		G14.11-0.57: 1.0980674058983302
		G14.19-0.19: 5.9799020315281455
		G14.49-0.14: 1.94278246727516
		G14.63-0.58: 1.7565446299584897
		G15.03-0.67: 1.9599610541713943
		G18.61-0.07: 1.4529504405718856
		G18.73-0.23: 3.946558643004384
		G18.89-0.47: 3.87746848292546
		G19.88-0.54: 2.725369130194149
		G22.37+0.45: 2.2334635415722928
		G23.21-0.38: 5.3215576604812105
		G28.56-0.24: 2.00135317412325
		G28.86+0.07: 1.6724447166657392
		G30.85-0.08: 1.0384292597032818
		G31.41+0.31: 7.634235223264329
		G34.26+0.15: 11.79339165343781
		G34.40+0.23: 4.893358882791944
		G34.41+0.23: 6.975706242571226
		G34.82+0.35: 0.6233212838879257
		G35.20-0.74: 4.120487302502623
		G37.55+0.20: 2.3647012203938407
		G49.49-0.39: 30.638278402401056
		G53.14+0.07: 1.2755372740021984
		G59.78+0.07: 1.051817414971558
		'88':
		G08.71-0.41: 0.2555605423861423
		G10.45-0.02: 0.5654586375969508
		G10.47+0.03: 10.621236220690491
		G10.62-0.38: 6.590559744325095
		G12.81-0.20: 2.2391932908285748
		G13.18+0.06: 1.043058241991689
		G13.66-0.60: 1.388687476672136
		G14.11-0.57: 0.2400487988472261
		G14.19-0.19: 1.3734457205383674
		G14.49-0.14: 0.1719527527259724
		G14.63-0.58: 0.6227318765911359
		G15.03-0.67: 0.9484169569327036
		G18.61-0.07: 0.2409772077030058
		G18.73-0.23: 1.43132088364319
		G18.89-0.47: 1.240400088439042
		G19.88-0.54: 0.940430197609148
		G22.37+0.45: 0.4850629557476013
		G23.21-0.38: 1.900341099965392
		G28.56-0.24: 0.3811019169488633
		G28.86+0.07: 0.8361276696375168
		G30.85-0.08: 0.2353506461831835
		G31.41+0.31: 6.015384520155143
		G34.26+0.15: 7.861370828025846
		G34.40+0.23: 1.3090143608612992
		G34.41+0.23: 3.094135827250596
		G34.82+0.35: 0.1891164402500769
		G35.20-0.74: 1.7277506260977236
		G37.55+0.20: 0.934902424028627
		G49.49-0.39: 19.50272354704647
		G53.14+0.07: 0.4187857086494883
		G59.78+0.07: 0.4564477452483653
		probability_threshold: 0.37
		ratio_limits: {
		'87-86': [ 0, 1.7 ],
		'88-86': [ 0, 1.7 ],
		'88-87': [ 0, 1.7 ],
		'257-256': [ 0, 15 ],
		'381-380': [ 0, 4 ],
		}
		ratios_to_include:
		- 87-86
		- 88-86
		- 88-87
		- 257-256
		- 381-380
		recompute_kde: true
		simulated_ratio_realizations: 1000
		use_model_for_inference: PLACEHOLDER

etl/config/ml_modelling.yml

0 → 100644

+71 −0

Original line number	Diff line number	Diff line
		retrain: True
		use_model_for_training: constant_abundance_p15_q05

		overrides: {
		'tdust_grid_type': 'linear',
		'tdust_limits': [ 10, 35 ],
		'tdust_step': 2.5,
		'nh2_grid_type': 'log',
		'nh2_limits': [ 1e3, 6.5e7 ],
		'nh2_step': 1.316074013
		}

		#model_type: 'XGBoost'
		#model_parameters: {
		# 'n_estimators': 300,
		# 'max_depth': 4,
		# 'learning_rate': 0.02,
		# 'subsample': 0.5
		#}

		use_validation: True

		model_type: 'XGBoost_gridsearch'
		model_parameters: {
		'param_grid':
		{
		'n_estimators': [ 300, 320, 50 ],
		'max_depth': [ 5, 7, 2 ],
		'learning_rate': [ 0.05, 0.1, 0.03 ],
		'subsample': [ 1, 2, 1 ],
		'lambda': [ 10, 20, 10 ],
		'colsample_bynode': [ 0.75, 0.8, 0.1 ],
		# 'colsample_bylevel': [ 0.5, 1, 0.1 ],
		'colsample_bytree': [ 0.75, 0.8, 0.2 ],
		'min_child_weight': [ 100, 200, 100 ]
		},
		'param_gridsearch':
		{
		'cv': 5,
		'n_jobs': 16
		},
		}
		#model_parameters: {
		# 'param_grid':
		# {
		# 'n_estimators': [ 400, 900, 100 ],
		# 'max_depth': [ 3, 7, 2 ],
		# 'learning_rate': [ 0.01, 0.06, 0.02 ],
		# 'subsample': [ 0.3, 0.9, 0.2 ]
		# },
		# 'param_gridsearch':
		# {
		# 'cv': 3,
		# 'n_jobs': 16
		# },
		#}

		#model_type: 'auto_skl'
		#model_parameters: {
		# 'time_left_for_this_task': 3600,
		# 'per_run_time_limit': 600,
		# 'n_jobs': 12
		#}

		#model_type: 'RandomForest'
		#model_parameters: {
		# 'n_estimators': 20,
		# 'criterion': 'friedman_mse',
		# 'max_samples': 0.5
		#}

etl/credentials/example_credentials.yml

0 → 100644

+7 −0

Original line number	Diff line number	Diff line
		# add a db_credentials.yml file with your credentials. The structure should be:
		#DB_USER: db_user B64-encoded
		#DB_PASS: db_pass B64-encoded
		#DB_HOST: db_host
		#DB_NAME: db_name
		#DB_PORT: db_port
		No newline at end of file

etl/main.py

0 → 100644

+384 −0

Original line number	Diff line number	Diff line
		import glob
		import os
		import uuid
		import sqlalchemy
		import argparse
		import sys
		from multiprocessing import Pool
		from stg.stg_build_db_structure import init_db, TmpExecutionQueue
		from itertools import product, chain
		from typing import Union, Tuple, Iterator
		from assets.commons import (cleanup_directory,
		setup_logger,
		validate_parameter)
		from assets.commons.parsing import parse_input_main
		from assets.commons.db_utils import upsert, get_pg_engine
		from stg.stg_radmc_input_generator import main as stg_main
		from mdl.mdl_execute_radmc_command import main as execute_radmc_script
		from prs.prs_compute_integrated_fluxes_and_ratios import main as prs_main
		from prs.prs_inspect_results import main as prs_inspection_main


		def compute_full_grid(tdust: float,
		nh2: float,
		line: int,
		density_keyword: str,
		dust_temperature_keyword: str) -> Tuple[float, float, int, str]:
		"""
		Compute the full grid for a given dust temperature, hydrogen density, and line identifier, and return the results.

		:param tdust: The dust temperature.
		:param nh2: The H2 number density.
		:param line: The line identifier for the RADMC-3D observation.
		:param density_keyword: The keyword for the density in the grid configuration.
		:param dust_temperature_keyword: The keyword for the dust temperature in the grid configuration.
		:return: A tuple containing the dust temperature, H2 number density, line identifier, and the name of the
		resulting FITS file.
		"""
		scratch_dir = os.path.join('mdl', 'scratches', str(uuid.uuid4()))
		stg_overrides = {
		'grid': {
		dust_temperature_keyword: tdust,
		density_keyword: nh2,
		}
		}
		overrides = {
		'grid_lines': stg_overrides,
		'model': {
		'radmc_observation': {
		'iline': line
		}
		}
		}
		tarname = stg_main(override_config=overrides,
		path_radmc_files=scratch_dir,
		run_id=run_id)
		cube_fits_name = execute_radmc_script(grid_zipfile=tarname,
		override_config=overrides,
		radmc_input_path=scratch_dir,
		run_id=run_id)
		return tdust, nh2, line, cube_fits_name


		def initialize_queue(engine: sqlalchemy.engine,
		run_id: str,
		run_arguments: Iterator):
		"""
		Initialize the execution queue for a specific run ID with given run arguments if not already initialized.

		:param engine: The SQLAlchemy engine used to interact with the database.
		:param run_id: The unique identifier for the run.
		:param run_arguments: An iterator of run arguments, each containing dust temperature, density, line,
		density keyword, and dust temperature keyword.
		:return: None. Inserts entries into the execution queue if the queue is not already initialized.
		"""
		is_initialized = engine.execute(f"select count(*) from tmp_execution_queue where run_id='{run_id}'").first()[0] != 0
		if is_initialized is False:
		for arguments in run_arguments:
		raw_insert_entry = {'run_id': run_id,
		'dust_temperature': arguments[0],
		'density': arguments[1],
		'line': arguments[2],
		'density_keyword': arguments[3],
		'dust_temperature_keyword': arguments[4],
		'done': False}
		upsert(
		table_object=TmpExecutionQueue,
		row_dict=raw_insert_entry,
		conflict_keys=[
		TmpExecutionQueue.run_id,
		TmpExecutionQueue.dust_temperature,
		TmpExecutionQueue.density,
		TmpExecutionQueue.line,
		TmpExecutionQueue.density_keyword,
		TmpExecutionQueue.dust_temperature_keyword
		],
		engine=engine
		)


		def get_run_pars(engine: sqlalchemy.engine,
		run_id: str):
		"""
		Get and mark the next pending row from the execution queue associated with the given run ID as done.

		:param engine: The SQLAlchemy engine used to interact with the database.
		:param run_id: The unique identifier for the run.
		:return: The row corresponding to the next pending task in the execution queue not marked as done, or None if there
		are no more pending tasks.
		"""
		sql_query = sqlalchemy.text(f"""UPDATE tmp_execution_queue
		SET done = true
		WHERE row_id = (SELECT row_id
		FROM tmp_execution_queue
		WHERE (run_id = '{run_id}')
		AND (done is false)
		AND pg_try_advisory_xact_lock(row_id)
		LIMIT 1 FOR UPDATE)
		RETURNING *""")
		return engine.execution_options(autocommit=True).execute(sql_query).first()


		def verify_run(engine: sqlalchemy.engine,
		run_id: str):
		"""
		Verify the completion status of a run by resetting completed but unfinished tasks to pending.

		:param engine: The SQLAlchemy engine used to interact with the database.
		:param run_id: The unique identifier for the run.
		:return: True if all completed tasks for the run have associated FITS cube names, indicating completion,
		otherwise False.
		"""
		sql_query = sqlalchemy.text(f"""UPDATE tmp_execution_queue
		SET done = false
		WHERE row_id in (SELECT row_id
		FROM tmp_execution_queue
		WHERE run_id = '{run_id}'
		AND (done is true)
		AND (fits_cube_name is null))""")
		engine.execution_options(autocommit=True).execute(sql_query)
		_remaining_models = compute_remaining_models(run_id=run_id)
		return True if _remaining_models == 0 else False


		def insert_fits_name(engine: sqlalchemy.engine,
		row_id: int,
		fits_cube_name: str):
		"""
		Insert the FITS cube name into the row of the execution queue with the specified row ID.

		:param engine: The SQLAlchemy engine used to interact with the database.
		:param row_id: The unique identifier for the row in the execution queue.
		:param fits_cube_name: The name of the FITS cube associated with the row.
		:return: None. Updates the row in the execution queue with the FITS cube name.
		"""
		sql_query = sqlalchemy.text(f"""UPDATE tmp_execution_queue
		SET fits_cube_name = '{fits_cube_name}'
		WHERE row_id = {row_id}""")
		engine.execution_options(autocommit=True).execute(sql_query)


		def compute_grid_elements(run_id: str):
		"""
		Compute grid elements for a given run ID by initializing the execution queue with parallel arguments.

		:param run_id: The unique identifier for the run.
		:return: None. Initializes the execution queue for the specified run ID.
		"""
		init_db()
		parallel_args, _ = get_parallel_args_and_nprocesses()
		engine = get_pg_engine(logger=logger)
		initialize_queue(engine=engine,
		run_id=run_id,
		run_arguments=parallel_args)
		engine.dispose()


		def get_parallel_args_and_nprocesses() -> Tuple[Iterator, int]:
		"""
		Get parallel computation arguments and the number of processes for computation.

		:return: A tuple containing an iterator of parallel arguments and the number of processes to use.
		"""
		_tdust_model_type, _model_type, dust_temperatures, densities, line_pairs, n_processes, _ = parse_input_main()
		line_set = set(chain.from_iterable(line_pairs))
		density_keyword = 'central_density' if _model_type == 'homogeneous' else 'density_at_reference'
		dust_temperature_keyword = 'dust_temperature' if _tdust_model_type == 'isothermal' else 'dust_temperature_at_reference'
		parallel_args = product(dust_temperatures, densities, line_set, [density_keyword], [dust_temperature_keyword])
		return parallel_args, n_processes


		def compute_model(run_id: str):
		"""
		Compute a model associated with a given run ID from parameters retrieved from the execution queue.

		:param run_id: The unique identifier for the run.
		:return: None. Computes a model and updates the database with the associated FITS cube name if parameters are
		available in the execution queue.
		"""
		engine = get_pg_engine(logger=logger)
		parameters_set = get_run_pars(engine=engine,
		run_id=run_id)
		engine.dispose()
		if parameters_set is not None:
		_, _, _, fits_cube_name = compute_full_grid(tdust=parameters_set[2],
		nh2=parameters_set[3],
		line=parameters_set[4],
		density_keyword=parameters_set[5],
		dust_temperature_keyword=parameters_set[6])
		engine = get_pg_engine(logger=logger)
		insert_fits_name(engine=engine,
		row_id=parameters_set[0],
		fits_cube_name=fits_cube_name)
		engine.dispose()
		else:
		logger.info('All models were completed.')


		def initialize_run():
		"""
		Initialize a new run by generating a run ID if not provided, computing grid elements for the run,
		and saving the run ID to a file for future reference.

		:return: The generated or provided run ID.
		"""
		if args.run_id is not None:
		run_id = args.run_id
		else:
		logger.info('Generating new run_id')
		run_id = str(uuid.uuid4())
		compute_grid_elements(run_id=run_id)
		sys.stdout.write(run_id)
		with open('run_id.txt', 'w') as run_id_file:
		run_id_file.write(f'{run_id}\n')
		return run_id


		def compute_remaining_models(run_id: Union[None, str] = None) -> int:
		"""
		Compute the number of pending models for a given run ID.

		:param run_id: Optional. The unique identifier for the run. If None, it defaults to the value of the
		'run_id' environment variable.
		:return: The number of remaining models.
		"""
		_run_id = validate_parameter(run_id, default=os.getenv('run_id'))
		logger.info(_run_id)
		sql_query = sqlalchemy.text(f"""SELECT count(*)
		FROM tmp_execution_queue
		WHERE (run_id = '{run_id}')
		AND ((done is false)
		OR ((done is true) AND (fits_cube_name is null)))""")
		engine = get_pg_engine(logger=logger)
		n_models = engine.execution_options(autocommit=True).execute(sql_query).first()[0]
		engine.dispose()
		sys.stdout.write(str(n_models))
		return n_models


		def get_results(engine: sqlalchemy.engine,
		run_id: str):
		"""
		Retrieve the results of a given run ID from the execution queue.

		:param engine: The SQLAlchemy engine used to interact with the database.
		:param run_id: The unique identifier for the run.
		:return: A list of tuples containing the dust temperature, density, line, and FITS cube name for each model
		associated with the run ID.
		"""
		sql_query = sqlalchemy.text(f"""SELECT dust_temperature
		, density
		, line
		, fits_cube_name
		FROM tmp_execution_queue
		WHERE run_id = '{run_id}'""")
		return engine.execution_options(autocommit=True).execute(sql_query).all()


		def cleanup_tmp_table(run_id: str,
		engine: sqlalchemy.engine):
		"""
		Cleanup the temporary execution queue for a given run ID.

		:param run_id: The unique identifier for the run.
		:param engine: The SQLAlchemy engine used to interact with the database.
		:return: None. Deletes all rows from the execution queue associated with the specified run ID.
		"""
		sql_query = sqlalchemy.text(f"""DELETE
		FROM tmp_execution_queue
		WHERE run_id = '{run_id}'""")
		return engine.execution_options(autocommit=True).execute(sql_query)


		def main_presentation_step(run_id: str,
		cleanup_scratches: bool = True,
		results_dict: Union[dict, None] = None) -> bool:
		_tdust_model_type, _model_type, dust_temperatures, densities, line_pairs, n_processes, run_type = parse_input_main()

		for folder in ('data', 'figures', 'trained_model'):
		os.makedirs(os.path.join(
		'prs',
		'output',
		'run_type',
		run_type,
		folder
		), exist_ok=True)

		engine = get_pg_engine(logger=logger)
		_results_dict = validate_parameter(results_dict,
		default=get_results(engine=engine,
		run_id=run_id))

		results_map = {}
		for (tdust, nh2, line, cube_fits_name) in _results_dict:
		results_map[f'{str(nh2)}_{str(tdust)}_{line}'] = cube_fits_name

		for line_pair in line_pairs:
		for tdust, nh2 in product(dust_temperatures, densities):
		prs_main(cube_fits_list=[results_map[f'{str(nh2)}_{str(tdust)}_{line_pair[0]}'],
		results_map[f'{str(nh2)}_{str(tdust)}_{line_pair[1]}']],
		run_id=run_id,
		engine=engine)

		if cleanup_scratches is True:
		scratches_dirs = glob.glob(os.path.join('mdl', 'scratches', '*'))
		for scratches in scratches_dirs:
		cleanup_directory(directory=scratches, logger=logger)

		prs_inspection_main(run_id=run_id,
		is_isothermal=_tdust_model_type == 'isothermal',
		is_homogeneous=_model_type == 'homogeneous',
		engine=engine,
		run_type=run_type)
		_run_success = verify_run(run_id=run_id,
		engine=engine)
		if _run_success is True:
		cleanup_tmp_table(run_id=run_id,
		engine=engine)
		engine.dispose()
		return _run_success


		def process_models(distributed: bool = False) -> Tuple[Union[None, dict], int]:
		"""
		Process models either in a distributed environment or on a single machine (with multiprocessing).

		:param distributed: A boolean flag indicating whether to process models in parallel. Defaults to False.
		:return: A tuple containing results (if processed in parallel) and the number of remaining models.
		"""
		if distributed is True:
		compute_model(run_id=run_id)
		results = None
		remaining_models = compute_remaining_models(run_id)
		else:
		parallel_args, n_processes = get_parallel_args_and_nprocesses()
		with Pool(n_processes) as pool:
		results = pool.starmap(compute_full_grid, parallel_args)
		remaining_models = 0
		return results, remaining_models


		logger = setup_logger(name='MAIN')
		parser = argparse.ArgumentParser()
		parser.add_argument('--run_id')
		parser.add_argument('--cleanup_scratches')
		parser.add_argument('--distributed')
		args = parser.parse_args()

		if __name__ == '__main__':
		run_id = initialize_run()
		assert run_id is not None
		_distributed = validate_parameter(args.distributed, default='false').lower() == 'true'

		results, remaining_models = process_models(distributed=_distributed)
		if remaining_models == 0:
		logger.info('All grid points processed. Summarizing results.')
		_cleanup = validate_parameter(args.cleanup_scratches,
		default='true').lower() == 'true'
		_run_success = main_presentation_step(run_id=run_id,
		cleanup_scratches=_cleanup,
		results_dict=results)
		if (_run_success is True) or (_distributed is False):
		logger.info('The run completed successfully!')
		else:
		logger.error('The run was incomplete. I have reset the done flag in the database for incomplete models')

etl/mdl/config/config.yml

0 → 100644

+16 −0

Original line number	Diff line number	Diff line
		radmc_postprocessing:
		nphot: 1000000
		scattering_mode_max: 0
		iranfreqmode: 1
		tgas_eq_tdust: 1
		lines_slowlvg_as_alternative: 1

		radmc_observation:
		inclination: 0
		position_angle: 0
		iline: 2
		width_kms: 10
		nchannels: 100
		npix: 101
		threads: 12
		image_size_pc: 2

etl/mdl/data/dustkappa_silicate.inp

0 → 100644

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

etl/mdl/data/dustopac.inp

0 → 100644

+7 −0

Original line number	Diff line number	Diff line
		2 Format number of this file
		1 Nr of dust species
		============================================================================
		1 Way in which this dust species is read
		0 0=Thermal grain
		silicate Extension of name of dustkappa_***.inp file
		----------------------------------------------------------------------------

etl/mdl/data/molecule_e-ch3oh.inp

0 → 100644

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

etl/mdl/mdl_execute_radmc_command.py

0 → 100644

+249 −0

Original line number	Diff line number	Diff line
		import os
		import sys
		from itertools import product
		from typing import Union
		import sqlalchemy
		import shutil
		from astropy import units as u
		from datetime import datetime
		from radmc3dPy import image
		from stg.stg_build_db_structure import (LinePars,
		SpeciesAndPartners,
		ModelPars,
		StarsPars)
		from assets.commons import (load_config_file,
		validate_parameter,
		setup_logger,
		compute_unique_hash_filename,
		get_value_if_specified,
		cleanup_directory)
		from assets.commons.parsing import read_abundance_variation_schema
		from assets.commons.db_utils import upsert, get_pg_engine
		from assets.constants import (radmc_options_mapping,
		radmc_lines_mode_mapping)

		logger = setup_logger(name='MDL')


		def save_cube_as_fits(cube_out_name: Union[str, None] = None,
		cube_out_path: Union[str, None] = None,
		path_radmc_files: Union[str, None] = None):
		"""
		Convert the RADMC output to fits
		:param cube_out_name: outfile name
		:param cube_out_path: outfile path
		:param path_radmc_files: path where the radmc input/output files are stored
		"""
		_cube_out_name = validate_parameter(cube_out_name, default='test_cube.fits')
		_cube_out_path = validate_parameter(cube_out_path, default=os.path.join('prs', 'fits', 'cubes'))
		_path_radmc_files = validate_parameter(path_radmc_files, default=os.path.join('mdl', 'radmc_files'))
		imdata = image.readImage(fname=os.path.join(_path_radmc_files, 'image.out'))
		output_name = os.path.join(_cube_out_path, _cube_out_name)
		if os.path.isfile(output_name):
		os.remove(output_name)
		imdata.writeFits(fname=output_name)


		def check_config(config: dict) -> bool:
		"""
		Check if all mandatory keys are present in the configuration dictionary.
		:param config: the configuration dictionary
		:return: True is all keys are present, False otherwise
		"""
		mandatory_keys = {
		'inclination',
		'position_angle',
		}
		return set(mandatory_keys).difference(set(config.keys())) == set()


		def get_command_options(config: dict) -> set:
		"""
		Parse the radmc3d command options from the configuration dictionary
		:param config: the configuration dictionary
		:return: a set of string to postpone to the radmc3d image command
		"""
		options_list = []
		for key in config["radmc_observation"]:
		try:
		options_list.append(f'{radmc_options_mapping[key]} {config["radmc_observation"][key]}')
		except KeyError:
		pass
		return set(options_list)


		def populate_model_table(config_mdl: dict,
		grid_zipfile: str,
		cube_filename: str,
		engine: sqlalchemy.engine,
		executed_on: datetime.timestamp,
		run_id: str):
		"""
		Populate the model_parameters table in the DB
		:param config_mdl: the model configuration
		:param grid_zipfile: the name of the grid tarfile
		:param cube_filename: the name of the fits cube
		:param engine: the SQLAlchemy engine
		:param executed_on: the timestamp of execution
		:param run_id: the run unique identifier
		"""
		model_pars_dict = {
		'zipped_grid_name': grid_zipfile,
		'fits_cube_name': cube_filename,
		'nphotons': config_mdl['radmc_postprocessing']['nphot'],
		'scattering_mode_max': int(config_mdl['radmc_postprocessing']['scattering_mode_max']),
		'iranfreqmode': config_mdl['radmc_postprocessing']['iranfreqmode'],
		'tgas_eq_tdust': config_mdl['radmc_postprocessing']['tgas_eq_tdust'],
		'inclination': config_mdl['radmc_observation']['inclination'],
		'position_angle': config_mdl['radmc_observation']['position_angle'],
		'imolspec': get_value_if_specified(config_mdl['radmc_observation'], 'imolspec'),
		'iline': get_value_if_specified(config_mdl['radmc_observation'], 'iline'),
		'width_kms': get_value_if_specified(config_mdl['radmc_observation'], 'width_kms'),
		'nchannels': get_value_if_specified(config_mdl['radmc_observation'], 'nchannels'),
		'npix': get_value_if_specified(config_mdl['radmc_observation'], 'npix'),
		'created_on': executed_on,
		'run_id': run_id
		}
		upsert(
		table_object=ModelPars,
		row_dict=model_pars_dict,
		conflict_keys=[ModelPars.fits_cube_name, ModelPars.run_id],
		engine=engine
		)


		def populate_species_and_partner_table(config_lines: dict,
		engine: sqlalchemy.engine,
		executed_on: datetime.timestamp,
		grid_zipfile: str,
		run_id: str):
		"""
		Populate the species_and_partners table in the DB
		:param config_lines: the line configuration
		:param engine: the SQLAlchemy engine
		:param executed_on: the timestamp of execution
		:param grid_zipfile: the name of the grid tarfile
		:param run_id: the run unique identifier
		"""
		hot_core_specs = read_abundance_variation_schema(line_config=config_lines)
		for (species, collision_partner) in product(config_lines['species_to_include'], config_lines['collision_partners']):
		species_partner_dict = {
		'zipped_grid_name': f'{grid_zipfile}',
		'species_to_include': species,
		'molecular_abundance': config_lines['molecular_abundances'][species],
		'threshold': hot_core_specs[species]['threshold'],
		'abundance_jump': hot_core_specs[species]['abundance_jump'],
		'collision_partner': collision_partner,
		'molecular_abundance_collision_partner': config_lines['molecular_abundances'][collision_partner],
		'created_on': executed_on,
		'run_id': run_id
		}
		upsert(
		table_object=SpeciesAndPartners,
		row_dict=species_partner_dict,
		conflict_keys=[SpeciesAndPartners.zipped_grid_name,
		SpeciesAndPartners.species_to_include,
		SpeciesAndPartners.collision_partner,
		SpeciesAndPartners.run_id],
		engine=engine
		)


		def populate_line_table(config_lines: dict,
		engine: sqlalchemy.engine,
		executed_on: datetime.timestamp,
		grid_zipfile: str,
		run_id: str):
		"""
		Populate the lines table in the DB
		:param config_lines: the dictionary containing the line configuration
		:param engine: the SQLAlchemy engine to use
		:param executed_on: the timestamp of execution, to add to the record
		:param grid_zipfile: the grid tarfile name, to be used as key
		:param run_id: the run unique identifier
		"""
		line_pars_dict = {
		'zipped_grid_name': f'{grid_zipfile}',
		'lines_mode': config_lines['lines_mode'],
		'created_on': executed_on,
		'run_id': run_id
		}
		upsert(
		table_object=LinePars,
		row_dict=line_pars_dict,
		conflict_keys=[LinePars.zipped_grid_name, LinePars.run_id],
		engine=engine
		)


		def main(grid_zipfile: str,
		run_id: str,
		override_config: Union[dict, None] = None,
		radmc_input_path: Union[str, None] = None,
		engine: sqlalchemy.engine = None) -> str:
		# This is necessary, because the lines_mode is needed both in the lines.inp and radmc3d.inp files
		# The reason for splitting the main input file from the rest is that some parameters can be changed
		# independently of the grid for the modeling. The mdl hash should depend on all the mdl parameters, not a subset
		_override_config = validate_parameter(override_config, default={'grid_lines': {}, 'model': {}})
		executed_on = datetime.now()
		_radmc_input_path = validate_parameter(radmc_input_path, default=os.path.join('mdl', 'radmc_files'))
		config_stg = load_config_file(os.path.join('stg', 'config', 'config.yml'),
		override_config=_override_config['grid_lines'])
		config_lines = config_stg['lines']
		config_mdl = load_config_file(os.path.join('mdl', 'config', 'config.yml'),
		override_config=_override_config['model'])
		assert check_config(config=config_mdl['radmc_observation'])
		with open(os.path.join('mdl', 'radmc3d_postprocessing.sh'), 'w') as outfile:
		outfile.write(f'cd {_radmc_input_path}\n')
		options_set = get_command_options(config_mdl)
		radmc_command = f'radmc3d image {" ".join(options_set)}'
		outfile.write(radmc_command)

		config_full = config_mdl.copy()
		config_full.update(config_stg)
		cube_filename = f'{compute_unique_hash_filename(config=config_full)}.fits'

		# Execute radmc if not done already
		if not os.path.isfile(os.path.join('prs', 'fits', 'cubes', cube_filename)):
		logger.debug(f'Executing command: {radmc_command}')
		execution_dir = os.getcwd()
		os.chdir(_radmc_input_path)
		os.system(radmc_command)
		os.chdir(execution_dir)
		logger.debug(f'Checking presence of file: {os.path.join(_radmc_input_path, "image.out")}')
		assert os.path.isfile(os.path.join(_radmc_input_path, 'image.out'))

		save_cube_as_fits(cube_out_name=cube_filename,
		cube_out_path=os.path.join('prs', 'fits', 'cubes'),
		path_radmc_files=radmc_input_path)
		else:
		logger.info('Computation performed already! Skipping...')

		if engine is None:
		engine = get_pg_engine(logger=logger, engine_kwargs={'pool_size': 2})

		populate_line_table(config_lines=config_lines,
		engine=engine,
		executed_on=executed_on,
		grid_zipfile=grid_zipfile,
		run_id=run_id)

		populate_species_and_partner_table(config_lines=config_lines,
		engine=engine,
		executed_on=executed_on,
		grid_zipfile=grid_zipfile,
		run_id=run_id)

		populate_model_table(config_mdl=config_mdl,
		grid_zipfile=grid_zipfile,
		cube_filename=cube_filename,
		engine=engine,
		executed_on=executed_on,
		run_id=run_id)
		engine.dispose()
		return cube_filename


		if __name__ == '__main__':
		main(grid_zipfile='459295aa894dffa8c521e606d14dbb6927638a2c.zip',
		run_id='test_run')

etl/mdl/radmc_files/readme

0 → 100644

+2 −0

Original line number	Diff line number	Diff line
		Here SAK will store the radmc files for each run.
		No newline at end of file

etl/mdl/scratches/readme

0 → 100644

+2 −0

Original line number	Diff line number	Diff line
		Folder that contains the scratch files used by SAK in order to avoid concurrency when running in parallel.
		No newline at end of file

etl/prs/config/config.yml

0 → 100644

+7 −0

Original line number	Diff line number	Diff line
		flux_computation:
		central_frequency: 230.000
		central_frequency_units: "GHz"
		integration_limits: "all"
		moment_zero_aggregation_function: 'sum'
		aggregation_function: 'weighted_mean'
		No newline at end of file

etl/prs/fits/cubes/readme

0 → 100644

+2 −0

Original line number	Diff line number	Diff line
		In this directory you will find the postprocessed fits cubes.
		No newline at end of file

etl/prs/fits/grids/readme

0 → 100644

+2 −0

Original line number	Diff line number	Diff line
		In this directory you will find the fits files representing the model grid.
		No newline at end of file

etl/prs/fits/moments/readme

0 → 100644

+2 −0

Original line number	Diff line number	Diff line
		In this directory you will find the moment zero images of the lines.
		No newline at end of file

etl/prs/fits/ratios/readme

0 → 100644

+2 −0

Original line number	Diff line number	Diff line
		In this directory you will find the imagrs of the line ratios.
		No newline at end of file

etl/prs/output/lte_approx/readme

0 → 100644

+2 −0

Original line number	Diff line number	Diff line
		Here you will find the figure referring to the LTE column density approximation comparison.
		No newline at end of file

etl/prs/output/readme

0 → 100644

+2 −0

Original line number	Diff line number	Diff line
		In this directory you will find the final (summarized) output of the run(s).
		No newline at end of file

etl/prs/output/run_type/readme

0 → 100644

+2 −0

Original line number	Diff line number	Diff line
		Here you will find the results of the run, in the folder corresponding to the run type.
		No newline at end of file

etl/prs/prepare_inference_input.py

0 → 100644

+49 −0

Original line number	Diff line number	Diff line
		import pandas as pd
		import yaml
		import os

		df = pd.read_csv(os.path.join('assets', 'data', 'ch3oh_data_top35.csv'))

		config_dict = {
		'simulated_ratio_realizations': 1000,
		'recompute_kde': True,
		'probability_threshold': 0.37,
		'best_bandwidths': {
		'87-86': 0.1,
		'88-86': 0.1,
		'88-87': 0.1,
		'257-256': 0.4,
		'381-380': 0.2,
		},
		'ratio_limits': {
		'87-86': [0, 1.7],
		'88-86': [0, 1.7],
		'88-87': [0, 1.7],
		'257-256': [0, 15],
		'381-380': [0, 4],
		},
		'use_model_for_inference': 'PLACEHOLDER',
		'ratios_to_include': [
		"87-86",
		"88-86",
		"88-87",
		"257-256",
		"381-380"
		]
		}

		intensities_dict = {}
		uncertainties_dict = {}

		data_dict = df.to_dict(orient='records')
		for line_id in ['86', '87', '88', '257', '256', '381', '380']:
		intensities_dict[line_id] = {}
		uncertainties_dict[line_id] = {}
		for row in data_dict:
		intensities_dict[line_id][str(row['source_name'])] = row[f'area_{line_id}']
		uncertainties_dict[line_id][str(row['source_name'])] = row[f'area_unc_{line_id}']
		config_dict['measured_integrated_intensities'] = intensities_dict
		config_dict['integrated_intensities_uncertainties'] = uncertainties_dict

		with open('config/density_inference_input.yml', 'w') as outfile:
		yaml.dump(config_dict, outfile)

etl/prs/prs_analytical_representations.py

0 → 100644

+77 −0

Original line number	Diff line number	Diff line
		import numpy as np
		import os
		import matplotlib.pyplot as plt
		import pickle
		from scipy.interpolate import splrep, BSpline, RegularGridInterpolator
		from scipy.optimize import curve_fit
		from assets.commons import (get_data,
		load_config_file,
		setup_logger,
		validate_parameter)
		from typing import Union, List


		def approx(x, x_shift, a, b, exponent, scale=1):
		return (np.abs(np.arctanh(2 * (x - x_shift) / scale) + a)) ** exponent + b


		def main(ratios_to_fit: Union[List[str], None] = None):
		config = load_config_file(config_file_path=os.path.join('config', 'config.yml'))
		_ratio_to_fit = validate_parameter(
		param_to_validate=ratios_to_fit,
		default=['-'.join(lines_ratio) for lines_ratio in config['overrides']['lines_to_process']])
		data = get_data(limit_rows=None,
		use_model_for_inference=config['run_type'])
		logger.info(f'Using {config["run_type"]} to fit data...')
		logger.info(f'Data contains {data.shape[0]} rows')

		bw = {
		'87-86': 0.1,
		'88-87': 0.1,
		'88-86': 0.1,
		'257-256': 0.4,
		'381-380': 0.2}

		x0 = {
		'87-86': ((0.55, 2.3, 3.75, 0.9),),
		'88-87': ((0.59, 2.3, 3.9, 1.05, 0.96),),
		'88-86': ((0.55, 2.5, 4.3, 0.83, 1.08),),
		'257-256': ((5, 2.5, 2.45, 0.75, 8), (5, 2.5, 3.33, 0.75, -8)),
		'381-380': ((2.04, 2.5, 3.1, 0.6, 2.1), (2.18, 2.5, 3.7, 0.79, -2.4)),
		}
		min_ratio = {
		'87-86': 0.08,
		'88-87': 0.08,
		'88-86': 0.03,
		'257-256': 1,
		'381-380': 1}
		max_ratio = {
		'87-86': 1.04,
		'88-87': 1.02,
		'88-86': 1.07,
		'257-256': 7.5,
		'381-380': 2.9}

		for ratio_string in _ratio_to_fit:
		_column_to_fit = f'ratio_{ratio_string}'
		data.sort_values(by=_column_to_fit, inplace=True)
		plt.clf()
		plt.scatter(data[_column_to_fit], data['avg_nh2'], marker='+', alpha=0.1)
		x_reg = np.linspace(min_ratio[ratio_string], max_ratio[ratio_string], 100)

		data_clean = data[[_column_to_fit, 'avg_nh2']].loc[data[_column_to_fit] <= max_ratio[ratio_string]].copy()
		kde = pickle.load(
		open(f'prs/output/run_type/{config["run_type"]}/trained_model/ratio_density_kde_{ratio_string}.pickle',
		'rb'))
		rgi = RegularGridInterpolator((kde['y'] * bw[ratio_string], kde['x'] * 0.2), kde['values'].reshape(200, 200).T)
		density = rgi(data_clean[[_column_to_fit, 'avg_nh2']])
		data_clean = data_clean.loc[(density > density.mean() - density.std())]
		plt.scatter(data_clean[_column_to_fit], data_clean['avg_nh2'], marker='+', alpha=0.1, color='red')
		for components in x0[ratio_string]:
		plt.plot(x_reg, approx(x_reg, *components), color='cyan')
		plt.show()


		if __name__ == '__main__':
		logger = setup_logger(name='POLYFIT')
		main()

etl/prs/prs_check_biases_poc_sample.py

0 → 100644

+25 −0

Original line number	Diff line number	Diff line
		from assets.utils import get_poc_results
		from scipy.stats import anderson_ksamp
		from assets.commons import setup_logger
		import numpy as np


		logger = setup_logger(name='PRS - AD test')

		df_full = get_poc_results(line_fit_filename='ch3oh_data_top35.csv')

		df_full['best_fit'] /= 1e5
		df_full['best_fit'] = df_full['best_fit'].round(1)
		df_full['mass'] = (df_full['mass'] / 100).round(1)
		df_full['distance'] = (df_full['distance']).round(1)
		df_full['diameter'] = (df_full['diameter']).round(1)
		df_full = df_full[(df_full['mass'] < 100) & (df_full['mass'] > 3)]
		# df_full = df_full[df_full['class'] != 'HII']
		# df_full = df_full[df_full['class'].isin(['IRb', 'IRw'])]

		ad_results = {}
		for column in ['mass', 'distance', 'best_fit', 'diameter']:
		array_list = [d[column].values for _, d in df_full.groupby(['class'])]
		ad_results[column] = anderson_ksamp(array_list)[2]

		logger.info(f'The p values of the AD test are: {ad_results}')

etl/prs/prs_compute_integrated_fluxes_and_ratios.py

0 → 100644

+253 −0

Original line number	Diff line number	Diff line
		import os
		import sqlalchemy
		import numpy as np
		import astropy.units as u
		from datetime import datetime
		from astropy.io import fits
		from astropy import constants
		from typing import Union, List
		from assets.commons import (validate_parameter,
		get_value_if_specified,
		load_config_file,
		setup_logger,
		compute_unique_hash_filename)
		from assets.commons.db_utils import upsert, get_pg_engine
		from assets.constants import aggregation_function_mapping
		from stg.stg_build_db_structure import (MomentZeroMaps,
		RatioMaps)

		logger = setup_logger(name='PRS')


		def populate_mom_zero_table(config_prs: dict,
		fits_cube_name: str,
		moment_zero_map_fits: str,
		aggregated_moment_zero: float,
		executed_on: datetime.timestamp,
		engine: sqlalchemy.engine,
		run_id: str):
		"""
		Insert record in DB for the moment zero table
		:param config_prs: configuration of the presentation layer
		:param fits_cube_name: name of the fits cube
		:param moment_zero_map_fits: filename of the moment zero map
		:param aggregated_moment_zero: the aggregated value of the moment zero
		:param executed_on: timestamp of execution
		:param engine: SQLAlchemy engine to use
		:param run_id: the run unique identifier
		"""
		_integration_limit_low = get_value_if_specified(config_prs, 'integration_limits')[0] if get_value_if_specified(
		config_prs, 'integration_limits') != 'all' else None
		_integration_limit_high = get_value_if_specified(config_prs, 'integration_limits')[1] if get_value_if_specified(
		config_prs, 'integration_limits') != 'all' else None
		moment_zero_dict = {
		'mom_zero_name': f'{moment_zero_map_fits}',
		'fits_cube_name': f'{fits_cube_name}',
		'integration_limit_low': _integration_limit_low,
		'integration_limit_high': _integration_limit_high,
		'aggregated_moment_zero': aggregated_moment_zero,
		'aggregation_function': config_prs['moment_zero_aggregation_function'],
		'created_on': executed_on,
		'run_id': run_id,
		}
		upsert(
		table_object=MomentZeroMaps,
		row_dict=moment_zero_dict,
		conflict_keys=[MomentZeroMaps.mom_zero_name,
		MomentZeroMaps.run_id],
		engine=engine
		)


		def populate_line_ratios_table(config_prs: dict,
		moment_zero_map_fits_list: List[str],
		ratio_map_name: str,
		aggregated_ratio: float,
		engine: sqlalchemy.engine,
		executed_on: datetime.timestamp,
		run_id: str):
		"""
		Insert record in DB for the line ratios table
		:param config_prs: configuration of the presentation layer
		:param moment_zero_map_fits_list: list of the moment 0 maps to use to compute the ratio
		:param ratio_map_name: filename of the ratio map
		:param aggregated_ratio: aggregated value of the ratio
		:param engine: SQLAlchemy engine to use
		:param executed_on: timestamp of execution
		:param run_id: the run unique identifier
		"""
		_integration_limit_low = get_value_if_specified(config_prs, 'integration_limits')[0] if get_value_if_specified(
		config_prs, 'integration_limits') != 'all' else None
		_integration_limit_high = get_value_if_specified(config_prs, 'integration_limits')[1] if get_value_if_specified(
		config_prs, 'integration_limits') != 'all' else None
		assert len(moment_zero_map_fits_list) == 2
		line_ratio_dict = {
		'ratio_map_name': f'{ratio_map_name}',
		'mom_zero_name_1': f'{moment_zero_map_fits_list[0]}',
		'mom_zero_name_2': f'{moment_zero_map_fits_list[1]}',
		'aggregated_ratio': aggregated_ratio,
		'aggregation_function': config_prs['aggregation_function'],
		'created_on': executed_on,
		'run_id': run_id,
		}
		upsert(
		table_object=RatioMaps,
		row_dict=line_ratio_dict,
		conflict_keys=[RatioMaps.ratio_map_name,
		RatioMaps.run_id],
		engine=engine
		)


		def compute_moment_zero(cube: str,
		config: dict,
		cube_path: Union[str, None] = None,
		moment_zero_path: Union[str, None] = None,
		moment_zero_fits_name: Union[str, None] = None,
		hdu_idx: int = 0) -> float:
		"""
		Compute moment zero map, given a cube
		:param cube: filename of the fits cube
		:param config: configuration of the presentation layer
		:param cube_path: path where the fits cube is to be found
		:param moment_zero_path: path where the moment 0 map is to be saved
		:param moment_zero_fits_name: filename for the moment zero map
		:param hdu_idx: index of the HDU to extract the data and the header
		:return: the aggregated value of the moment zero, according to the function specified in the configuration
		"""
		_cube_path = validate_parameter(cube_path, default=os.path.join('prs', 'fits', 'cubes'))
		_moment_zero_path = validate_parameter(moment_zero_path, default=os.path.join('prs', 'fits', 'moments'))
		_moment_zero_fits_name = validate_parameter(moment_zero_fits_name, default='test_mom0.fits')

		fitsfile = open_fits_file_duck_typing(fitsfile=cube, fits_path=_cube_path)
		header = fitsfile[hdu_idx].header.copy()
		data = fitsfile[hdu_idx].data
		conversion = header['CUNIT3'].strip() == 'HZ'
		spectral_unit = 'km/s' if conversion is True else 'Hz'
		conversion_factor = 1 if conversion is False else (-constants.c * header['CDELT3'] / header['CRVAL3']).to('km/s').value
		mom0 = (data.sum(axis=0) * abs(conversion_factor))
		keywords_to_delete = ['NAXIS3', 'CRPIX3', 'CDELT3', 'CRVAL3', 'CUNIT3', 'CTYPE3']
		header['BUNIT'] += f' {spectral_unit}'
		header['BTYPE'] = 'MOMENT0'
		header['NAXIS'] = 2
		for key in keywords_to_delete:
		del header[key]
		fits.writeto(os.path.join(_moment_zero_path, _moment_zero_fits_name), data=mom0, header=header, overwrite=True)
		try:
		return aggregation_function_mapping[config['moment_zero_aggregation_function']](mom0)
		except KeyError:
		logger.warning(
		'Moment zero aggregation function not set or misconfigured. Trying to use the one for the ratio...')
		return aggregation_function_mapping[config['aggregation_function']](mom0)


		def open_fits_file_duck_typing(fitsfile: Union[str, fits.PrimaryHDU],
		fits_path: str = None) -> fits.PrimaryHDU:
		_fits_path = validate_parameter(fits_path, default='.')
		try:
		hdu = fits.open(os.path.join(_fits_path, fitsfile))
		except TypeError:
		hdu = fitsfile
		return hdu


		def compute_image_ratios(fits1: str,
		fits2: str,
		config: dict,
		fits_path: Union[str, None] = None,
		ratio_fits_path: Union[str, None] = None,
		ratio_fits_name: Union[str, None] = None,
		hdu1_idx: int = 0,
		hdu2_idx: int = 0):
		"""
		Compute the ratio of the images and save it to fits. The ratio is computed as fits1/fits2
		:param fits1: filename of the first moment 0 map
		:param fits2: filename of the second moment 0 map
		:param config: configuration of the presentation layer
		:param fits_path: path where the fits files are to be found
		:param ratio_fits_path: path where the ratio map is to be saved
		:param ratio_fits_name: filename for the ratio map
		:param hdu1_idx: index of the HDU to extract the data and the header, for the first fits file
		:param hdu2_idx: index of the HDU to extract the data and the header, for the second fits file
		:return:
		"""
		_fits_path = validate_parameter(fits_path, default=os.path.join('prs', 'fits', 'moments'))
		_ratio_fits_path = validate_parameter(ratio_fits_path, default=os.path.join('prs', 'fits', 'ratios'))
		_ratio_fits_name = validate_parameter(ratio_fits_name, default='ratio.fits')
		hdu1 = open_fits_file_duck_typing(fitsfile=fits1,
		fits_path=_fits_path)
		hdu2 = open_fits_file_duck_typing(fitsfile=fits2,
		fits_path=_fits_path)
		ratio_image_data = hdu1[hdu1_idx].data / hdu2[hdu2_idx].data
		if config['aggregation_function'] in ('mean', 'sum'):
		aggregated_ratio = aggregation_function_mapping[config['aggregation_function']](ratio_image_data)
		elif config['aggregation_function'] == 'weighted_mean':
		aggregated_ratio = np.nansum(hdu1[hdu1_idx].data) / np.nansum(hdu2[hdu2_idx].data)
		else:
		raise NotImplementedError('Aggregation function not configured')
		output_header = hdu1[hdu1_idx].header
		output_header['BUNIT'] = ''
		output_header['BTYPE'] = 'INT.RATIO'
		fits.writeto(os.path.join(_ratio_fits_path, _ratio_fits_name),
		data=ratio_image_data,
		header=output_header,
		overwrite=True)
		return aggregated_ratio


		def main(cube_fits_list: List[str],
		run_id: str,
		mom0_out_cube1: Union[str, None] = None,
		mom0_out_cube2: Union[str, None] = None,
		engine: sqlalchemy.engine = None) -> str:
		assert len(cube_fits_list) == 2
		_mom0_out_cube1 = validate_parameter(mom0_out_cube1, default=cube_fits_list[0].replace('.fits', '_mom0.fits'))
		_mom0_out_cube2 = validate_parameter(mom0_out_cube2, default=cube_fits_list[1].replace('.fits', '_mom0.fits'))
		config_prs = load_config_file(os.path.join('prs', 'config', 'config.yml'))['flux_computation']
		executed_on = datetime.now()
		if engine is None:
		engine = get_pg_engine(logger=logger)
		config_prs.update({
		'cube_fits_list': cube_fits_list,
		'mom0_image_list': [_mom0_out_cube1, _mom0_out_cube2]
		})
		ratio_fits_name = f'{compute_unique_hash_filename(config=config_prs)}.fits'
		aggregated_mom0_1 = compute_moment_zero(cube=cube_fits_list[0],
		config=config_prs,
		moment_zero_fits_name=_mom0_out_cube1)
		aggregated_mom0_2 = compute_moment_zero(cube=cube_fits_list[1],
		config=config_prs,
		moment_zero_fits_name=_mom0_out_cube2)
		aggregated_image_ratio = compute_image_ratios(fits1=_mom0_out_cube1,
		fits2=_mom0_out_cube2,
		config=config_prs,
		ratio_fits_name=ratio_fits_name)
		populate_mom_zero_table(config_prs=config_prs,
		fits_cube_name=cube_fits_list[0],
		moment_zero_map_fits=_mom0_out_cube1,
		aggregated_moment_zero=aggregated_mom0_1,
		executed_on=executed_on,
		engine=engine,
		run_id=run_id)
		populate_mom_zero_table(config_prs=config_prs,
		fits_cube_name=cube_fits_list[1],
		aggregated_moment_zero=aggregated_mom0_2,
		moment_zero_map_fits=_mom0_out_cube2,
		executed_on=executed_on,
		engine=engine,
		run_id=run_id)
		populate_line_ratios_table(config_prs=config_prs,
		moment_zero_map_fits_list=[_mom0_out_cube1, _mom0_out_cube2],
		ratio_map_name=ratio_fits_name,
		aggregated_ratio=aggregated_image_ratio,
		engine=engine,
		executed_on=executed_on,
		run_id=run_id)
		return ratio_fits_name


		if __name__ == '__main__':
		main(cube_fits_list=['test_cube.fits', 'test_cube.fits'],
		mom0_out_cube1='test_cube_mom0.fits',
		mom0_out_cube2='test_cube_mom0_1.fits',
		run_id='test_run')

etl/prs/prs_density_inference.py

0 → 100644

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

etl/prs/prs_expand_modelling_with_ml.py

0 → 100644

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

etl/prs/prs_expand_ratio_modelling_with_ml.py

0 → 100644

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

etl/prs/prs_inspect_results.py

0 → 100644

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

etl/prs/prs_ml_training.py

0 → 100644

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

etl/prs/prs_ml_training_ratio.py

0 → 100644

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

etl/prs/prs_poc_figures.py

0 → 100644

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

etl/prs/prs_poc_latex_table.py

0 → 100644

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

etl/requirements.txt

0 → 100644

+15 −0

Original line number	Diff line number	Diff line
		numpy==1.23.4
		PyYAML==6.0
		astropy==5.1
		matplotlib==3.6.2
		scipy==1.9.3
		tqdm==4.64.1
		xarray==2022.10.0
		sqlalchemy==1.4.42
		psycopg2-binary==2.9.5
		seaborn==0.12.2
		scikit-learn==1.3.0
		xgboost==2.0.0
		astropy==5.1
		keras==2.12.0
		No newline at end of file

etl/reset_output.sh

0 → 100644

+17 −0

Original line number	Diff line number	Diff line
		#!/bin/bash

		read -p "This will delete all of the results, are you sure? " -n 1 -r
		echo

		if [[ $REPLY =~ ^[Yy]$ ]]
		then
		folder_list=("prs/fits/cubes" "prs/fits/moments" "prs/fits/ratios" "prs/fits/grids")
		for folder_name in ${folder_list[@]}
		do
		rm $folder_name/*.fits
		done
		rm prs/output/*.png
		rm stg/archive/*.zip
		rm full_dataset*.csv
		fi