Compare revisions

Andrea Giannetti · Andrea Giannetti · Andrea Giannetti · Andrea Giannetti · Andrea Giannetti · Andrea Giannetti
--- a/.dockerignore
+++ b/.dockerignore
+/etl/credentials/db_credentials*.yml
+/etl/stg/archive/*
+/etl/prs/fits/cubes/*
+/etl/prs/fits/moments/*
+/etl/prs/fits/ratios/*
+/etl/prs/fits/grids/*
+/etl/mdl/radmc_files/*
+/etl/mdl/scratches/*
+/etl/prs/output/*
+/etl/*.sif
\ No newline at end of file
--- a/.gitignore
+++ b/.gitignore
+/etl/tests/test_files/config.yml
+/etl/trn/trn_convert_grid_to_polaris.py
+/etl/posteriors.yml
+/etl/run_id.txt
+/etl/test_sak.html
+/etl/prs/output/
+/etl/prs/fits/
+/etl/stg/archive/
+/etl/assets/internal_files/
+/etl/credentials/db_credentials.yml
+/etl/credentials/db_credentials_local.yml
+/etl/mdl/polaris_input_file.cmd
+/etl/mdl/radmc3d_postprocessing.sh
+/etl/mdl/scratches/
--- a/Dockerfile
+++ b/Dockerfile
+FROM python:3.9-slim
+
+
+# STEP 1) Setting up environment
+# apt packages
+RUN apt-get update && \
+    apt-get --yes upgrade && \
+    apt-get install --yes libgomp1 && \
+    apt-get install --yes postgresql-client && \
+    apt-get install --yes --reinstall build-essential && \
+    apt-get -y install curl vim less nano git && \
+    apt-get -y install zip unzip && \
+    apt-get clean
+
+WORKDIR /usr/src/etl
+RUN python -m pip install --upgrade pip
+COPY etl/requirements.txt .
+RUN pip install -r requirements.txt
+
+RUN mkdir /sak_user
+# RUN /usr/sbin/usermod -d /sak_user root
+RUN HOME=/sak_user
+COPY documentation/radmc3d_install.sh .
+RUN chmod 755 radmc3d_install.sh
+RUN HOME=/sak_user;./radmc3d_install.sh
+ENV PATH="/sak_user/bin:${PATH}"
+ENV PYTHONPATH="/sak_user/bin/python:${PYTHONPATH}"
+RUN chmod 755 /sak_user/bin/radmc3d
+
+# STEP 2) Bundling app
+COPY etl .
+
+CMD python main.py
\ No newline at end of file
--- a/README.md
+++ b/README.md
+The Swiss Army Knife project has the objective to assess whether CH3OH can be used as an effective volume density probe and in which regime.
+
+Important resources are listed in the [documentation.md file](documentation/documentation.md).
\ No newline at end of file
--- a/apptainer.def
+++ b/apptainer.def
+Bootstrap: docker
+From: postgres:14.1-alpine
+
+%environment
+export POSTGRES_DB=$DB_NAME
+export POSTGRES_USER=$DB_USER
+export POSTGRES_PASSWORD=$DB_PASS
--- a/create_apptainer_image_file.sh
+++ b/create_apptainer_image_file.sh
+rm swiss_army_knife_latest.sif
+singularity pull --disable-cache docker://git.ia2.inaf.it:5050/andrea.giannetti/swiss_army_knife
+
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
+version: '3.8'
+services:
+  db:
+    image: postgres:14.1-alpine
+    container_name: db_container_sak
+    restart: always
+    environment:
+      - POSTGRES_DB=$DB_NAME
+      - POSTGRES_USER=$DB_USER
+      - POSTGRES_PASSWORD=$DB_PASS
+    ports:
+      - '31000:5432'
+    volumes:
+      - db:/var/lib/postgresql/data
+    networks:
+      - sak_network
+
+  etl:
+    build:
+      dockerfile: Dockerfile
+    container_name: 'etl_sak'
+    image: 'sak_etl'
+    depends_on:
+      - db
+    stdin_open: true
+    tty: true
+    networks:
+      - sak_network
+    volumes:
+      - ./etl:/usr/src/etl
+volumes:
+  db:
+    driver: local
+networks:
+  sak_network:
+    name: 'sak_app_network'
\ No newline at end of file
--- a/documentation/documentation.md
+++ b/documentation/documentation.md
--- a/documentation/radmc3d_install.sh
+++ b/documentation/radmc3d_install.sh
+#!/bin/bash
+# Set PYCHARM_HOME, INSTALL_DEPENDENCIES environment variable
+PYCHARM_HOME=${PYCHARM_HOME:=$HOME}
+INSTALL_DEPENDENCIES="${INSTALL_DEPENDENCIES:=true}"
+echo $PYCHARM_HOME
+cd $PYCHARM_HOME
+export RADMC_HOME=$PYCHARM_HOME/radmc3d-2.0
+if [[ ! -d "$RADMC_HOME" ]]
+then
+    echo RADMC not found, cloning into "$(pwd)"
+    git clone https://github.com/dullemond/radmc3d-2.0.git
+fi
+
+if $INSTALL_DEPENDENCIES
+then
+    apt-get install -y gfortran
+fi
+
+cd $RADMC_HOME/src
+make
+echo "y\n y\n"| make install
+
+# Add this to the .bashrc configuration file (or equivalent)
+export PATH=$HOME/bin:$PATH
+export PYTHONPATH=$HOME/bin/python:$PYTHONPATH
+
+cd $RADMC_HOME/python/radmc3dPy
+python setup.py install
--- a/etl/assets/commons/__init__.py
+++ b/etl/assets/commons/__init__.py
+import glob
+import logging
+import sys
+import yaml
+import numpy as np
+import shutil
+import os
+import urllib.request
+import json
+import pandas as pd
+from hashlib import sha1
+from typing import (List,
+                    Union,
+                    Tuple)
+from astropy import units as u
+from assets.constants import (leiden_url_mapping)
+
+
+def setup_logger(name: str,
+                 log_level: str = None) -> logging.Logger:
+    """
+    Configure default logger
+    :param name: Logger name
+    :param log_level: Logging levels, as defined by logging
+    :return: the logger object
+    """
+    log_level = 'DEBUG' if log_level is None else log_level
+    """general logger configurator"""
+    logger = logging.getLogger(name)
+    logger.setLevel(log_level)
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setLevel(log_level)
+    formatter = logging.Formatter("%(asctime)s - %(name)-7s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    return logger
+
+
+def validate_parameter(param_to_validate,
+                       default):
+    """
+    Substitute default value if the parameter is set to None
+    :param param_to_validate: input to validate
+    :param default: default value
+    :return: default value if parameter is set to None, else the passed value
+    """
+    return param_to_validate if param_to_validate is not None else default
+
+
+def load_config_file(config_file_path: str,
+                     override_config: Union[dict, None] = None) -> dict:
+    """
+    Load the information in the YAML configuration file into a python dictionary
+    :param config_file_path: path to the configuration file
+    :param override_config: parameters of the input file to override (e.g. for grid creation)
+    :return: a dictionary with the parsed information
+    """
+    _override_config = validate_parameter(override_config, default={})
+    with open(config_file_path) as config_file:
+        config = yaml.load(config_file, Loader=yaml.FullLoader)
+    for key in _override_config:
+        if key not in config.keys():
+            config[key] = {}
+        for subkey in _override_config[key]:
+            try:
+                config[key][subkey] = _override_config[key][subkey]
+            except KeyError:
+                config[key] = {subkey: _override_config[key][subkey]}
+
+    try:
+        if config['density_powerlaw_idx'] == 0:
+            config['central_density'] = config['density_at_reference']
+            config['density_at_reference'] = None
+    except KeyError:
+        pass
+
+    try:
+        if config['dust_temperature_powerlaw_idx'] == 0:
+            config['dust_temperature'] = config['dust_temperature_at_reference']
+            config['dust_temperature_at_reference'] = None
+    except KeyError:
+        pass
+    return config
+
+
+def get_moldata(species_names: list,
+                logger: logging.Logger,
+                path: Union[str, None] = None,
+                use_cache: bool = False):
+    """
+    Downloads the molecular data from the Leiden molecular database; check whether the molecule in mapped in
+        leiden_url_mapping
+    :param species_names: the names of the species for which to download data
+    :param logger: logger for output printing
+    :param path: path where the files must be saved
+    :param use_cache: use molecular input files in cache, if possible
+    """
+    _path = validate_parameter(path, default=os.path.join('mdl', 'radmc_files'))
+    for species in species_names:
+        molecular_file_name = f'molecule_{species}.inp'
+        if use_cache is True:
+            shutil.copy(os.path.join('mdl', 'data', molecular_file_name),
+                        os.path.join(_path, molecular_file_name))
+        if not os.path.isfile(os.path.join(_path, molecular_file_name)):
+            logger.info(f'Downloading file molecule_{species}.inp...')
+            data = urllib.request.urlopen(leiden_url_mapping[species]).read().decode()
+            with open(os.path.join(_path, f'molecule_{species}.inp'), 'w') as outfile:
+                outfile.writelines(data)
+        else:
+            logger.info(f'File molecule_{species}.inp found, skipping download...')
+
+
+def compute_unique_hash_filename(config: dict) -> str:
+    """
+    Compute a unique and reproducible filename given a configuration dictionary
+    :param config: the configuration dictionary to hash
+    :return: a unique hash of the dictionary, to use as a key and filename
+    """
+    hashed_dict = sha1(json.dumps(config, sort_keys=True).encode())
+    return f'{hashed_dict.hexdigest()}'
+
+
+def make_archive(output_filename: str,
+                 source_dir: str,
+                 archive_path: Union[str, None] = None):
+    """
+    Compresses all files in a directory into a .zip file
+    :param output_filename: the output filename
+    :param source_dir: the directory to compress
+    :param archive_path: path to store the compressed-grid archives
+    """
+    _archive_path = validate_parameter(archive_path,
+                                       default=os.path.join('stg', 'archive'))
+    filename_root, filename_format = output_filename.rsplit('.', maxsplit=1)
+    try:
+        os.remove(os.path.join(_archive_path, output_filename))
+    except FileNotFoundError:
+        pass
+    shutil.make_archive(base_name=os.path.join(_archive_path, filename_root),
+                        format=filename_format,
+                        root_dir=source_dir)
+
+
+def cleanup_directory(directory: str,
+                      logger: logging.Logger):
+    """
+    Removes all files from the specified directory.
+    :param directory: directory to clean up
+    :param logger: logger to use
+    """
+    if os.path.isdir(directory):
+        file_list = glob.glob(f'{directory}/*')
+        for filename in file_list:
+            if os.path.isfile(filename) is True:
+                logger.debug(f'Removing file {filename}')
+                os.remove(filename)
+            else:
+                logger.debug(f'Skipping {filename}')
+
+
+def convert_frequency_to_wavelength(frequency: u.Quantity,
+                                    output_units: u.Unit):
+    return frequency.to(output_units, equivalencies=u.spectral())
+
+
+def get_value_if_specified(parameters_dict: dict,
+                           key: str):
+    try:
+        return parameters_dict[key]
+    except KeyError:
+        return None
+
+
+def get_postprocessed_data(limit_rows: Union[None, int] = None,
+                           use_model_for_inference: Union[None, str] = None) -> Tuple[List[List[str]], pd.DataFrame]:
+    """
+    Retrieve data and line pairs from the main config file
+    :param use_model_for_inference: the prs/output/run_type folder from which to get the data from inference;
+        defaults to fiducial model (constant_abundance_p15_q05)
+    :param limit_rows: the number of rows to use from the original dataset; useful to run tests and limit
+        computation time
+    :return: the line pairs list and the dataset
+    """
+    _use_model_for_inference = validate_parameter(
+        use_model_for_inference,
+        default='constant_abundance_p15_q05'
+    )
+    _data_file = os.path.join('prs', 'output', 'run_type', _use_model_for_inference, 'data', 'full_dataset.csv')
+    config = load_config_file(os.path.join('config', 'config.yml'))
+    line_pairs = config['overrides']['lines_to_process']
+    results_df_dict = {
+        'ratios_and_los_averages': add_px_index_column(_data_file)
+    }
+    if limit_rows is not None:
+        data = results_df_dict['ratios_and_los_averages'].head(limit_rows)
+    else:
+        data = results_df_dict['ratios_and_los_averages']
+    return line_pairs, data
+
+
+def prepare_matrix(filename: str,
+                   columns: list,
+                   use_model_for_inference: Union[None, str] = None) -> pd.DataFrame:
+    """
+    Retrieve and prepare the data matrix from a specified file and columns.
+
+    :param filename: The name of the file to read the data from.
+    :param columns: The list of columns to extract from the dataframe.
+    :param use_model_for_inference: The folder within prs/output/run_type to get the data for inference;
+        defaults to the fiducial model ('constant_abundance_p15_q05') if None is provided.
+    :return: A pandas DataFrame containing the specified columns from the file with 'nh2' and 'tdust' columns
+        rounded to one decimal place and converted to string type.
+    """
+    _use_model_for_inference = validate_parameter(
+        use_model_for_inference,
+        default='constant_abundance_p15_q05'
+    )
+    df = pd.read_csv(os.path.join('prs', 'output', 'run_type', _use_model_for_inference, 'data', filename))
+    df['nh2'] = pd.Series(df['nh2'].round(1), dtype='string')
+    df['tdust'] = pd.Series(df['tdust'].round(1), dtype='string')
+    return df[columns]
+
+
+def get_data(limit_rows: Union[int, None] = None,
+             use_model_for_inference: Union[None, str] = None,
+             log_columns: Union[None, List] = None):
+    """
+    Retrieve and preprocess dataset.
+
+    :param limit_rows: The number of rows to use from the original dataset; useful for running tests and limiting
+        computation time. Defaults to None, which uses all rows.
+    :param use_model_for_inference: The folder within prs/output/run_type to get the data for inference;
+        defaults to the fiducial model ('constant_abundance_p15_q05') if None is provided.
+    :param log_columns: The list of columns to apply a logarithmic transformation to. Defaults to
+        ['log_nh2', 'log_tdust', 'avg_nh2', 'avg_tdust', 'molecule_column_density', 'std_nh2'] if None is provided.
+    :return: A pandas DataFrame containing the merged and processed data from multiple sources, with specified
+        columns logarithmically transformed.
+    """
+    _use_model_for_inference = validate_parameter(
+        use_model_for_inference,
+        default='constant_abundance_p15_q05'
+    )
+    _log_columns = validate_parameter(
+        log_columns,
+        default=['log_nh2', 'log_tdust', 'avg_nh2', 'avg_tdust', 'molecule_column_density', 'std_nh2']
+    )
+    _, data = get_postprocessed_data(limit_rows=limit_rows,
+                                     use_model_for_inference=_use_model_for_inference)
+    data['nh2'] = pd.Series(data['nh2'].round(1), dtype='string')
+    data['tdust'] = pd.Series(data['tdust'].round(1), dtype='string')
+    df87 = prepare_matrix(filename='full_dataset_NH2_lines_87-86.csv',
+                          columns=['px_index', 'nh2', 'tdust', 'mom_zero_87', 'mom_zero_86', 'molecule_column_density'],
+                          use_model_for_inference=_use_model_for_inference)
+    df88 = prepare_matrix(filename='full_dataset_NH2_lines_88-86.csv',
+                          columns=['px_index', 'nh2', 'tdust', 'mom_zero_88'],
+                          use_model_for_inference=_use_model_for_inference)
+    df257 = prepare_matrix(filename='full_dataset_NH2_lines_257-256.csv',
+                           columns=['px_index', 'nh2', 'tdust', 'mom_zero_257', 'mom_zero_256'],
+                           use_model_for_inference=_use_model_for_inference)
+    df381 = prepare_matrix(filename='full_dataset_NH2_lines_381-380.csv',
+                           columns=['px_index', 'nh2', 'tdust', 'mom_zero_381', 'mom_zero_380'],
+                           use_model_for_inference=_use_model_for_inference)
+    merged = data.merge(df87, on=['px_index', 'nh2', 'tdust'])
+    merged = merged.merge(df88, on=['px_index', 'nh2', 'tdust'])
+    merged = merged.merge(df257, on=['px_index', 'nh2', 'tdust'])
+    merged = merged.merge(df381, on=['px_index', 'nh2', 'tdust'])
+    # These are transformed later on, to allow splitting
+    merged['nh2'] = pd.to_numeric(merged['nh2'])
+    merged['tdust'] = pd.to_numeric(merged['tdust'])
+    merged['log_nh2'] = pd.to_numeric(merged['nh2'])
+    merged['log_tdust'] = pd.to_numeric(merged['tdust'])
+    for column in _log_columns:
+        merged[column] = np.log10(merged[column])
+    npixels = 101
+    refpix = 50
+    merged['px_distance'] = np.sqrt(
+        ((merged['px_index'] / npixels).astype(int) - refpix) ** 2 + ((merged['px_index'] % npixels) - refpix) ** 2)
+    merged['core_radius_px'] = (1e8 / merged['nh2']) ** -(2 / 3) * 0.5 * 101 / 2
+    return merged
+
+
+def add_px_index_column(filename: str) -> pd.DataFrame:
+    """
+    Reset index to add pixel index column to data
+    :param filename: The filename of the csv file that contains the data
+    :return: a pandas dataframe with the px_index column
+    """
+    df = pd.read_csv(filename, index_col=0)
+    if 'px_index' not in df.columns:
+        df = df.reset_index().rename(columns={'index': 'px_index'})
+    return df
--- a/etl/assets/commons/db_utils.py
+++ b/etl/assets/commons/db_utils.py
+import base64
+import logging
+import os
+from contextlib import closing
+from typing import Union, List
+import sqlalchemy as sqla
+import yaml
+from sqlalchemy.dialects.postgresql import insert
+from sqlalchemy.orm import Session
+from assets.commons import validate_parameter
+
+
+def get_credentials(logger: logging.Logger,
+                    credentials_filename: Union[None, str] = None,
+                    set_envs: bool = False) -> Union[None, dict]:
+    """
+    Retrieves credentials, if available, for logging in to the requested service.
+    :param domain: the name of the service, as it appears in the credentials file (case sensitive!).
+    :param logger: the logger to use.
+    :param credentials_filename: the name of the credentials file to be used. If None, uses the default.
+    :param set_envs: whether to set the credentials as environment variables.
+    :return: a dictionary with username and password, as retrieved from the credentials file or None.
+    """
+    _credentials_filename = credentials_filename if credentials_filename is not None \
+        else os.path.join('credentials', 'credentials.yml')
+    try:
+        with open(_credentials_filename) as credentials_file:
+            credentials = yaml.load(credentials_file, Loader=yaml.FullLoader)
+            credentials['DB_USER'] = base64.b64decode(credentials['DB_USER'].encode()).decode()
+            credentials['DB_PASS'] = base64.b64decode(credentials['DB_PASS'].encode()).decode()
+            if set_envs is True:
+                for key in credentials:
+                    os.environ[key] = str(credentials[key])
+
+            return credentials
+    except FileNotFoundError:
+        logger.info('Credentials not found!')
+        return None
+
+
+def upsert(table_object: sqla.orm.decl_api.DeclarativeMeta,
+           row_dict: dict,
+           conflict_keys: List[sqla.Column],
+           engine: sqla.engine):
+    """
+    Upsert the row into the specified table, according to the indicated conflict columns
+    :param table_object: the table into which the row must be inserted
+    :param row_dict: the dictionary representing the row to insert
+    :param conflict_keys: the conflict columns list, representing the primary key of the table
+    :param engine: the SQLAlchemy engine to use
+    """
+    statement = insert(table_object).values(row_dict)
+    statement = statement.on_conflict_do_update(
+        index_elements=conflict_keys, set_=row_dict)
+    with closing(Session(engine)) as session:
+        session.execute(statement)
+        session.commit()
+
+
+def get_pg_engine(logger: logging.Logger, engine_kwargs: Union[dict, None] = None) -> sqla.engine.Engine:
+    """
+    Return the SQLAlchemy engine, given the credentials in the external file
+    :param logger: the logger to use
+    :return: the SQLAlchemy engine
+    """
+    _kwargs = validate_parameter(engine_kwargs, default={})
+    credentials = get_credentials(logger=logger,
+                                  credentials_filename=os.path.join('credentials', 'db_credentials.yml'))
+    url = f'postgresql://{credentials["DB_USER"]}:{credentials["DB_PASS"]}@{credentials["DB_HOST"]}:{credentials["DB_PORT"]}/{credentials["DB_NAME"]}'
+    engine = sqla.create_engine(url, **_kwargs)
+    return engine
--- a/etl/assets/commons/grid_utils.py
+++ b/etl/assets/commons/grid_utils.py
+import logging
+import os
+from typing import Union, List
+import numpy as np
+from astropy import units as u
+from assets.commons import validate_parameter, load_config_file, setup_logger
+from assets.commons.parsing import get_grid_properties
+from assets.constants import mean_molecular_mass, radmc_grid_map, radmc_coord_system_map
+
+
+def compute_power_law_radial_profile(
+        central_value: float,
+        power_law_index: float,
+        distance_matrix: np.array,
+        maximum_radius: Union[float, None] = None,
+        value_at_reference: Union[float, None] = None,
+        distance_reference: Union[float, u.Quantity] = 1.0,
+        fill_reference_pixel: bool = True) -> np.array:
+    """
+    Compute a power law distribution over the specified grid
+    :param central_value: value in the center of the grid
+    :param power_law_index: index of the power law used to scale the profile
+    :param distance_matrix: the matrix of distances from the reference point
+    :param maximum_radius: the maximum radius to populate with gas within the grid in the same unit as the distance
+                           matrix and distance reference
+    :param value_at_reference: value of the profile at the reference distance; defaults to central value if not provided
+    :param distance_reference: value of the reference distance
+    :param fill_reference_pixel: whether to fill the reference point with central_value and set this to the maximum in
+        the grid
+    :return: the distance matrix
+    """
+    _value_at_reference = validate_parameter(value_at_reference, default=central_value)
+    _distance_matrix = np.where(distance_matrix == 0, np.nanmin(distance_matrix[distance_matrix > 0]),
+                                distance_matrix) if fill_reference_pixel is True else distance_matrix
+    try:
+        _distance_reference = distance_reference.to(u.cm).value
+    except AttributeError:
+        _distance_reference = distance_reference
+    profile = _value_at_reference * (_distance_matrix / _distance_reference) ** power_law_index
+    # If the routine fills the 0-distance point (the centre), it fixes the profile making the central value the maximum
+    if (fill_reference_pixel is True) and (power_law_index != 0):
+        profile = np.where(profile > central_value, central_value, profile)
+    if maximum_radius is not None:
+        profile = np.where(distance_matrix > maximum_radius + maximum_radius / 1e5, 0, profile)
+    return profile
+
+
+def compute_cartesian_coordinate_grid(indices: np.array,
+                                      physical_px_size: np.array) -> np.array:
+    """
+    Compute the physical coordinate grid, for a regular grid
+    :param indices: the indices of the grid pixels
+    :param physical_px_size: the array of physical pixel sizes or the pixel size, as astropy.Quantities
+    :return: the numpy.array with the physical grid coordinates
+    """
+    try:
+        _physical_px_size_cm = [px_size.to(u.cm).value for px_size in physical_px_size]
+    except TypeError:
+        _physical_px_size_cm = physical_px_size.to(u.cm).value
+    return (indices.T * _physical_px_size_cm).T
+
+
+def get_centered_indices(grid_metadata: dict) -> np.array:
+    """
+    Recompute the indices using as reference the reference pixel in the configuration file
+    :param grid_metadata: the dictionary with the grid metadata, to obtain the grid shape and the reference pixel
+    :return: a numpy array of indices, wrt. the reference
+    """
+    return (np.indices(grid_metadata['grid_shape']).T - grid_metadata['grid_refpix']).T
+
+
+def get_grid_edges(grid_metadata: dict) -> np.array:
+    """
+    Compute the physical coordinates at the grid boundaries
+    :param grid_metadata: the dictionary with the grid metadata, to obtain the grid shape, the reference pixel, and the
+        physical pixel size
+    :return: a numpy array with the coordinates of the edges, per axis
+    """
+    grid_edges = []
+    for axis_idx in range(len(grid_metadata['grid_shape'])):
+        indices = np.arange(grid_metadata['grid_shape'][axis_idx] + 1) - grid_metadata['grid_refpix'][axis_idx] - 0.5
+        # valid for regular grid
+        grid_edges.append(compute_cartesian_coordinate_grid(indices=indices,
+                                                            physical_px_size=grid_metadata['physical_px_size'][
+                                                                axis_idx]))
+    # Transposed for consistent flattening
+    return np.array(grid_edges).T
+
+
+def get_distance_matrix(grid_metadata: dict,
+                        indices: np.array) -> np.array:
+    """
+    Compute physical distance from the reference pixel, in cm
+    :param grid_metadata: the dictionary with the grid metadata, to obtain the grid shape, and the physical pixel size
+    :param indices: numpy array of the grid pixel indices
+    :return: a numpy array with the euclidean distance from the reference pixel
+    """
+    distance_matrix = np.zeros(grid_metadata['grid_shape'])
+    for axis_idx in range(len(grid_metadata['grid_shape'])):
+        distance_matrix += (indices[axis_idx, ...] * grid_metadata['physical_px_size'][axis_idx].to(u.cm).value) ** 2
+    return np.sqrt(distance_matrix)
+
+
+def get_physical_px_size(grid_metadata: dict) -> List[u.Quantity]:
+    """
+    Compute the physical pixel size
+    :param grid_metadata: the dictionary with the grid metadata, to obtain the grid shape, the grid size, and the grid
+        size units
+    :return: a list of physical pixel sizes, per axis
+    """
+    physical_px_size = []
+    for axis_idx in range(len(grid_metadata['grid_shape'])):
+        physical_px_size.append(
+            (grid_metadata['grid_size'][axis_idx] * u.Unit(grid_metadata['grid_size_units'][axis_idx])).to(u.cm) /
+            grid_metadata['grid_shape'][axis_idx])
+    return physical_px_size
+
+
+def compute_analytic_profile(central_value: float,
+                             power_law_index: float,
+                             maximum_radius: float,
+                             value_at_reference: float,
+                             distance_reference: float) -> np.array:
+    """
+    Compute the analytic radial profile based on a power-law model.
+
+    :param central_value (float): Central value of the profile, where the power-law could be undefined.
+    :param power_law_index (float): Power-law index determining the shape of the profile.
+    :param maximum_radius (float): Maximum radius within which to compute the profile; values outside this radius are
+        set to zero.
+    :param value_at_reference (float): Value of the profile at the reference distance.
+    :param distance_reference (float): Reference distance at which the profile value is specified.
+
+    :return: np.array: Computed radial profile.
+
+    Example:
+        # Compute an analytic profile with given parameters
+        profile = compute_analytic_profile(central_value=10.0,
+                                            power_law_index=-1.5,
+                                            maximum_radius=5.0,
+                                            value_at_reference=5.0,
+                                            distance_reference=1.0)
+    """
+    config = load_config_file(config_file_path=os.path.join('stg', 'config', 'config.yml'))
+    grid_metadata = extract_grid_metadata(config=config)
+    distance_matrix = get_distance_matrix(grid_metadata=grid_metadata,
+                                          indices=get_centered_indices(grid_metadata=grid_metadata))
+    profile = compute_power_law_radial_profile(central_value=central_value,
+                                               power_law_index=power_law_index,
+                                               distance_matrix=distance_matrix,
+                                               maximum_radius=maximum_radius,
+                                               value_at_reference=value_at_reference,
+                                               distance_reference=distance_reference)
+    return profile
+
+
+def compute_los_average_weighted_profile(profile: np.array,
+                                         weights: Union[float, np.array]) -> np.array:
+    """
+    Compute the line-of-sight (LOS) average weighted profile.
+    This function computes the weighted average profile along the LOS, where each profile is weighted
+    according to the provided weights. The computation is performed by summing the products of each
+    profile value and its corresponding weight across all LOS, and then dividing by the sum of weights
+    to obtain the average.
+
+    Note:
+    - If a profile value is 0, it is treated as missing data and not included in the computation.
+    - If a weight is 0, the corresponding profile is effectively excluded from the computation.
+
+    :param profile: (np.array) Array containing the profiles along different lines of sight (LOS).
+
+    :param weights: (Union[float, np.array]) Array containing the weights corresponding to each profile in the input array,
+        or a single weight value if uniform weighting is desired.
+
+    :return: (np.array) LOS average weighted profile.
+    """
+    return np.nansum(np.where(profile == 0, np.nan, profile * weights), axis=2) / \
+        np.nansum(np.where(profile == 0, np.nan, weights), axis=2)
+
+
+def compute_density_and_temperature_avg_profiles(density_profile: np.array,
+                                                 temperature_profile: np.array,
+                                                 molecular_abundance: float,
+                                                 threshold: float,
+                                                 abundance_jump: Union[float, int],
+                                                 logger: Union[logging.Logger, None] = None) -> tuple:
+    """
+    Compute the average density and temperature profiles along the line-of-sight along with related quantities.
+    This function computes the average density and temperature profiles along the LOS, along with related quantities
+    such as molecular column density and the standard deviation of the species' number density. It first computes the
+    molecular number density grid using the given density, temperature, abundance, temperature threshold, and abundance
+    jump for simulating hot core-like abundance profiles. Then, it computes the average density and temperature profiles
+    using the molecular number density grid as weights along the LOS. Finally, it calculates the
+    molecular column density and its standard deviation.
+
+    :param density_profile (np.array): Array containing the density profiles.
+    :param temperature_profile (np.array): Array containing the temperature profiles.
+    :param molecular_abundance (float): Molecular abundance for computing molecular number density.
+    :param threshold (float): Threshold for adjusting the abundance in hot cores, where the molecule is potentially
+        evaporated from dust grains.
+    :param abundance_jump (Union[float, int]): Abundance jump in hot cores.
+    :param logger (Union[logging.Logger, None], optional): Logger object for logging messages. If not specified, creates
+        a generic logger for reporting the total mass.
+
+    :return: Tuple[np.array, np.array, np.array, np.array]
+        - Average density profile.
+        - Average temperature profile.
+        - Molecular column density.
+        - Standard deviation of molecular number density.
+    """
+    _logger = validate_parameter(logger, setup_logger('GENERIC'))
+    config = load_config_file(config_file_path=os.path.join('stg', 'config', 'config.yml'))
+    grid_metadata = extract_grid_metadata(config=config)
+    molecule_grid = compute_molecular_number_density_hot_core(gas_number_density_profile=density_profile,
+                                                              abundance=molecular_abundance,
+                                                              temperature_profile=temperature_profile,
+                                                              threshold=threshold,
+                                                              abundance_jump=abundance_jump)
+    avg_density_profile = compute_los_average_weighted_profile(profile=density_profile,
+                                                               weights=molecule_grid)
+    mass = (np.nansum(density_profile * u.cm ** -3 * grid_metadata['physical_px_size'][0]
+                      * grid_metadata['physical_px_size'][1]
+                      * grid_metadata['physical_px_size'][2]) * mean_molecular_mass * m_p).to(u.M_sun)
+    _logger.debug(f'Total mass: {mass}')
+    avg_temperature_profile = compute_los_average_weighted_profile(profile=temperature_profile,
+                                                                   weights=molecule_grid)
+    return (avg_density_profile,
+            avg_temperature_profile,
+            np.nansum(molecule_grid, axis=2) * grid_metadata['physical_px_size'][2].value,
+            np.nanstd(molecule_grid, axis=2))
+
+
+def compute_molecular_number_density_hot_core(gas_number_density_profile: np.array,
+                                              abundance: float,
+                                              temperature_profile: np.array,
+                                              threshold: float,
+                                              abundance_jump: Union[float, int]) -> np.array:
+    """
+    Compute the molecular number density, using a step function abundance, changing above a specific temperature to
+    simulate evaporation
+    :param gas_number_density_profile: the gas number density profile array
+    :param abundance: the gas abundance of the species
+    :param temperature_profile: the temperature profile of the source
+    :param threshold: the threshold at which the species evaporate
+    :param abundance_jump: the factor describing the abundance variation wrt the bas level
+    :return: the array of molecular gas density computed using a step function profile
+    """
+    return np.where(temperature_profile < threshold,
+                    gas_number_density_profile * abundance,
+                    gas_number_density_profile * abundance * abundance_jump)
+
+
+def extract_grid_metadata(config: dict) -> dict:
+    """
+    Enrich grid metadata from the information in the configuration file
+    :param config: configuration dictionary
+    :return: a dictionary with the metadata
+    """
+    grid_config = config['grid']
+
+    grid_metadata = grid_config.copy()
+    grid_metadata['grid_type'] = radmc_grid_map[grid_config['grid_type']]
+    grid_metadata['coordinate_system'] = radmc_coord_system_map[grid_config['coordinate_system']]
+    grid_properties_keywords = ['shape', 'refpix', 'size', 'size_units']
+    for key in grid_properties_keywords:
+        grid_metadata[f'grid_{key}'] = get_grid_properties(grid_config=grid_config,
+                                                           keyword=key)
+
+    grid_metadata['physical_px_size'] = get_physical_px_size(grid_metadata)
+
+    grid_metadata['centered_indices'] = get_centered_indices(grid_metadata)
+    grid_metadata['coordinate_grid'] = compute_cartesian_coordinate_grid(indices=grid_metadata['centered_indices'],
+                                                                         physical_px_size=grid_metadata[
+                                                                             'physical_px_size'])
+
+    grid_metadata['distance_matrix'] = get_distance_matrix(grid_metadata, grid_metadata['centered_indices'])
+
+    grid_metadata['grid_edges'] = get_grid_edges(grid_metadata)
+    grid_metadata['continuum_lambdas'] = 100
+    return grid_metadata
--- a/etl/assets/commons/parsing.py
+++ b/etl/assets/commons/parsing.py
+import os
+from typing import Tuple
+
+import numpy as np
+
+from assets.commons import (load_config_file,
+                            validate_parameter)
+
+
+def parse_grid_overrides(par_name: str,
+                         config: dict) -> np.array:
+    """
+    Parse the global configuration for constructing the grid, computing the parameter values for which the grid has to
+    be computed.
+    :param par_name: parameter to parse
+    :param config: configuration dictionary to extract the grid properties
+    :return: the array of values to be considered in the grid
+    """
+    config_overrides = config['overrides']
+    if config_overrides[f'{par_name}_grid_type'] == 'linear':
+        grid_values = np.arange(float(config_overrides[f'{par_name}_limits'][0]),
+                                float(config_overrides[f'{par_name}_limits'][1]),
+                                float(config_overrides[f'{par_name}_step']))
+    else:
+        _step = float(config_overrides[f'{par_name}_step']) if float(config_overrides[f'{par_name}_step']) > 1 else 10 ** float(config_overrides[f'{par_name}_step'])
+        grid_values = 10 ** np.arange(np.log10(float(config_overrides[f'{par_name}_limits'][0])),
+                                      np.log10(float(config_overrides[f'{par_name}_limits'][1])),
+                                      np.log10(_step))
+    return np.around(grid_values, 2)
+
+
+def get_grid_properties(grid_config: dict,
+                        keyword: str) -> list:
+    """
+    Compute the grid axes properties using the grid configuration dictionary. If less than 3 axes are provided, fill
+        the missing ones with the first axis.
+    :param grid_config: the dictionary containing the grid configuration metadata
+    :param keyword: the keyword to read
+    :return: a list of the requested keyword values over the three spatial axes
+    """
+    grid_property_list = [grid_config['dim1'][keyword]]
+    for axis_idx in range(2):
+        try:
+            grid_property_list.append(grid_config[f'dim{axis_idx + 2}'][keyword])
+        except KeyError:
+            grid_property_list.append(grid_config['dim1'][keyword])
+    return grid_property_list
+
+
+def parse_input_main() -> Tuple[str, str, np.array, np.array, list, int, str]:
+    """
+    Parse the stg and mdl configuration files, as well as the main one, to apply overrides, compute the arrays of
+    temperatures and number densities and the type of models defined in the grid (isothermal/uniform density vs.
+    power-law profiles)
+    :return: a tuple containing the dust profile type (isothermal/heated), the density profile (homogeneous/spherical),
+        the distinct values of dust temperatures and densities in the grid, the line pairs used to compute ratios,
+        the number of processes to be used in case of multiprocessing, and the model run_type string
+    """
+    default_config_name = 'config.yml'
+    stg_config = load_config_file(os.path.join('stg', 'config', default_config_name))
+    pl_density_idx = float(stg_config['grid']['density_powerlaw_idx'])
+    pl_dust_temperature_idx = float(stg_config['grid']['dust_temperature_powerlaw_idx'])
+    _model_type = 'spherical' if pl_density_idx != 0 else 'homogeneous'
+    _tdust_model_type = 'heated' if pl_dust_temperature_idx != 0 else 'isothermal'
+    config = load_config_file(os.path.join('config', default_config_name))
+    # grid definition
+    dust_temperatures = parse_grid_overrides(par_name='dust_temperature',
+                                             config=config)
+    densities = parse_grid_overrides(par_name='gas_density',
+                                     config=config)
+    line_pairs = config['overrides']['lines_to_process']
+    n_processes = validate_parameter(config['computation']['threads'], default=10)
+    return _tdust_model_type, _model_type, dust_temperatures, densities, line_pairs, n_processes, config['run_type']
+
+
+def read_abundance_variation_schema(line_config: dict) -> dict:
+    """
+    Read and fill the abundance variation schema, defined as a step function
+    :param line_config: the dictionary of the line configurations, for the abundances
+    :return: the dictionary of the abundance variations per species, with the abundance jump factor and the temperature
+        threshold at which evaporation happens
+    """
+    species_list = list(line_config['molecular_abundances'].keys())
+    results_dict = {}
+    for species in species_list:
+        results_dict[species] = {
+            'threshold': 20,
+            'abundance_jump': 1
+        }
+        if 'hot_core_specs' in line_config.keys():
+            if species in line_config['hot_core_specs']:
+                results_dict[species] = line_config['hot_core_specs'][species]
+    return results_dict
--- a/etl/assets/commons/training_utils.py
+++ b/etl/assets/commons/training_utils.py
--- a/etl/assets/constants/__init__.py
+++ b/etl/assets/constants/__init__.py
+import numpy as np
+from .physical_constants import mean_molecular_mass
+
+polaris_grid_map = {
+    'spherical': 30
+}
+
+polaris_data_map = {
+    "gas_number_density": 0,
+    "dust_temperature": 2,
+    "gas_temperature": 3,
+    "molecular_abundance_mass_ratio": 17
+}
+
+radmc_grid_map = {
+    'regular': 0,
+    'spherical': 100
+}
+
+radmc_coord_system_map = {
+    'cartesian': 0,
+    'polar': 100
+}
+
+radmc_input_headers = {
+    'amr_grid.inp': ['iformat', 'grid_type', 'coordinate_system', 'grid_info', 'active_axes', 'ncells_per_axis'],
+    'dust_density.inp': ['iformat', 'ncells', 'dust_species'],
+    'dust_temperature.dat': ['iformat', 'ncells', 'dust_species'],
+    'microturbulence.inp': ['iformat', 'ncells'],
+    'gas_velocity.inp': ['iformat', 'ncells'],
+    'gas_temperature.inp': ['iformat', 'ncells'],
+    'wavelength_micron.inp': ['continuum_lambdas'],
+    'numberdens_mol.inp': ['iformat', 'ncells'],
+    'escprob_lengthscale.inp': ['iformat', 'ncells'],
+    'stars.inp': ['iformat', 'nstars', 'continuum_lambdas', 'stars_properties', 'lambdas']
+}
+
+leiden_url_mapping = {
+    'co': 'https://home.strw.leidenuniv.nl/~moldata/datafiles/co.dat',
+    'e-ch3oh': 'https://home.strw.leidenuniv.nl/~moldata/datafiles/e-ch3oh.dat',
+    'a-ch3oh': 'https://home.strw.leidenuniv.nl/~moldata/datafiles/ch3oh_a.dat'
+}
+
+radmc_lines_mode_mapping = {
+    'lte': 1,
+    'user_defined_populations': 2,
+    'lvg': 3,
+    'optically_thin_non_lte': 4,
+}
+
+radmc_options_mapping = {
+    'inclination': 'incl',
+    'position_angle': 'phi',
+    'imolspec': 'imolspec',
+    'iline': 'iline',
+    'width_kms': 'widthkms',
+    'nchannels': 'linenlam',
+    'npix': 'npix',
+    'image_size_pc': 'sizepc',
+    'threads': 'setthreads'
+}
+
+aggregation_function_mapping = {
+    'mean': np.nanmean,
+    'sum': np.nansum,
+    'median': np.nanmedian
+}
+
+line_ratio_mapping = {
+    '87-86': r'$I(2_{0}-1_{0})/I(2_{-1}-1_{-1})$',
+    '88-86': r'$I(2_{1}-1_{1})/I(2_{-1}-1_{-1})$',
+    '88-87': r'$I(2_{1}-1_{1})/I(2_{0}-1_{0})$',
+    '257-256': r'$I(5_{-1}-4_{-1})/I(5_{0}-4_{0})$',
+    '381-380': r'$I(7_{-1}-6_{-1})/I(7_{0}-6_{0})$'
+}
+
+config_overrides = {
+    'isothermal_p15': {
+        'grid': {
+            'power_law_temperature': 0
+        }
+    },
+    'isothermal_p12': {
+        'grid': {
+            'power_law_density': -1.2,
+            'power_law_temperature': 0
+        }
+    },
+    'isothermal_p18': {
+        'grid': {
+            'power_law_density': -1.8,
+            'power_law_temperature': 0
+        }
+    },
+    'constant_abundance_p15_q05': {},
+    'constant_abundance_p12_q05': {
+        'grid': {
+            'power_law_density': -1.2,
+        }
+    },
+    'constant_abundance_p18_q05': {
+        'grid': {
+            'power_law_density': -1.8,
+        }
+    },
+    'constant_abundance_p15_q05_x01': {
+        'lines': {
+            'molecular_abundances': {
+                "e-ch3oh": 1e-10
+            }
+        }
+    },
+    'constant_abundance_p15_q05_x10': {
+        'lines': {
+            'molecular_abundances': {
+                "e-ch3oh": 1e-8
+            }
+        }
+    },
+    'hot_core_p15_q05': {
+        'lines': {
+            'hot_core_specs': {
+                "e-ch3oh": {
+                    "threshold": 90,
+                    "abundance_jump": 100
+                }
+            }
+        }
+    },
+    'hot_core_p12_q05': {
+        'grid': {
+            'power_law_density': -1.2,
+        },
+        'lines': {
+            'hot_core_specs': {
+                "e-ch3oh": {
+                    "threshold": 90,
+                    "abundance_jump": 100
+                }
+            }
+        }
+    },
+    'hot_core_p18_q05': {
+        'grid': {
+            'power_law_density': -1.8,
+        },
+        'lines': {
+            'hot_core_specs': {
+                "e-ch3oh": {
+                    "threshold": 90,
+                    "abundance_jump": 100
+                }
+            }
+        }
+    },
+    'hot_core_x10': {
+        'lines': {
+            'molecular_abundances': {
+                "e-ch3oh": 1e-8
+            },
+            'hot_core_specs': {
+                "e-ch3oh": {
+                    "threshold": 90,
+                    "abundance_jump": 100
+                }
+            }
+        }
+    },
+    'hot_core_x01': {
+        'lines': {
+            'molecular_abundances': {
+                "e-ch3oh": 1e-10
+            },
+            'hot_core_specs': {
+                "e-ch3oh": {
+                    "threshold": 90,
+                    "abundance_jump": 100
+                }
+            }
+        }
+    },
+    'double_microturbulence': {},
+}
--- a/etl/assets/constants/physical_constants.py
+++ b/etl/assets/constants/physical_constants.py
+mean_molecular_mass = 2.34
+speed_of_light = 2.99792458e10 #cm/s
--- a/etl/assets/data/ch3oh_data_top35.csv
+++ b/etl/assets/data/ch3oh_data_top35.csv
--- a/etl/assets/data/tab_top100_carsten_sort.csv
+++ b/etl/assets/data/tab_top100_carsten_sort.csv
--- a/etl/assets/data/top100_density.csv
+++ b/etl/assets/data/top100_density.csv
+# select
+# 	agal_name
+# 	, round("N_H2_peak" / (2 * r_fwhm * 3.0856e18)) as volume_density
+# from top100_sed_data tsd
+"agal_name","volume_density"
+AGAL006.216-00.609,89795.0
+AGAL008.671-00.356,190969.0
+AGAL008.684-00.367,68138.0
+AGAL008.706-00.414,27785.0
+AGAL008.831-00.027,NaN
+AGAL010.444-00.017,22907.0
+AGAL010.472+00.027,366225.0
+AGAL010.624-00.384,299726.0
+AGAL012.804-00.199,375774.0
+AGAL013.178+00.059,68826.0
+AGAL013.658-00.599,52684.0
+AGAL014.114-00.574,75908.0
+AGAL014.194-00.194,78815.0
+AGAL014.492-00.139,74017.0
+AGAL014.632-00.577,127991.0
+AGAL015.029-00.669,209966.0
+AGAL018.606-00.074,43782.0
+AGAL018.734-00.226,19630.0
+AGAL018.888-00.474,56893.0
+AGAL019.609-00.234,92787.0
+AGAL019.882-00.534,136234.0
+AGAL022.376+00.447,82445.0
+AGAL023.206-00.377,163688.0
+AGAL024.416+00.101,15957.0
+AGAL024.629+00.172,15443.0
+AGAL024.651-00.169,7869.0
+AGAL024.789+00.082,108737.0
+AGAL028.564-00.236,37289.0
+AGAL028.861+00.066,19217.0
+AGAL030.818-00.056,386655.0
+AGAL030.848-00.081,23220.0
+AGAL030.893+00.139,49987.0
+AGAL031.412+00.307,389457.0
+AGAL034.258+00.154,1700013.0
+AGAL034.401+00.226,218025.0
+AGAL034.411+00.234,495365.0
+AGAL034.821+00.351,68475.0
+AGAL035.197-00.742,228801.0
+AGAL035.579+00.007,8844.0
+AGAL037.554+00.201,27842.0
+AGAL043.166+00.011,221873.0
+AGAL049.489-00.389,864473.0
+AGAL053.141+00.069,184095.0
+AGAL059.782+00.066,94354.0
+AGAL301.136-00.226,259527.0
+AGAL305.192-00.006,44264.0
+AGAL305.209+00.206,183284.0
+AGAL305.562+00.014,50295.0
+AGAL305.794-00.096,30607.0
+AGAL309.384-00.134,34424.0
+AGAL310.014+00.387,38275.0
+AGAL313.576+00.324,56581.0
+AGAL316.641-00.087,119766.0
+AGAL317.867-00.151,159134.0
+AGAL318.779-00.137,34706.0
+AGAL320.881-00.397,16371.0
+AGAL326.661+00.519,93292.0
+AGAL326.987-00.032,74912.0
+AGAL327.119+00.509,29492.0
+AGAL327.293-00.579,1286938.0
+AGAL327.393+00.199,36543.0
+AGAL328.809+00.632,350565.0
+AGAL329.029-00.206,34454.0
+AGAL329.066-00.307,14237.0
+AGAL330.879-00.367,218474.0
+AGAL330.954-00.182,322707.0
+AGAL331.709+00.582,23291.0
+AGAL332.094-00.421,83157.0
+AGAL332.826-00.549,407894.0
+AGAL333.134-00.431,200007.0
+AGAL333.284-00.387,117459.0
+AGAL333.314+00.106,53337.0
+AGAL333.604-00.212,210139.0
+AGAL333.656+00.059,21760.0
+AGAL335.789+00.174,135994.0
+AGAL336.958-00.224,28640.0
+AGAL337.176-00.032,11767.0
+AGAL337.258-00.101,40725.0
+AGAL337.286+00.007,21253.0
+AGAL337.406-00.402,266919.0
+AGAL337.704-00.054,76912.0
+AGAL337.916-00.477,386876.0
+AGAL338.066+00.044,15655.0
+AGAL338.786+00.476,38848.0
+AGAL338.926+00.554,154575.0
+AGAL339.623-00.122,40057.0
+AGAL340.374-00.391,66279.0
+AGAL340.746-01.001,43744.0
+AGAL340.784-00.097,30196.0
+AGAL341.217-00.212,77596.0
+AGAL342.484+00.182,20980.0
+AGAL343.128-00.062,280086.0
+AGAL343.756-00.164,319987.0
+AGAL344.227-00.569,609039.0
+AGAL345.003-00.224,303461.0
+AGAL345.488+00.314,225021.0
+AGAL345.504+00.347,168516.0
+AGAL345.718+00.817,90728.0
+AGAL351.131+00.771,43470.0
+AGAL351.161+00.697,743236.0
+AGAL351.244+00.669,238178.0
+AGAL351.416+00.646,2489024.0
+AGAL351.444+00.659,1201754.0
+AGAL351.496+00.691,78226.0
+AGAL351.571+00.762,61074.0
+AGAL351.581-00.352,234338.0
+AGAL351.774-00.537,2754463.0
+AGAL353.066+00.452,153199.0
+AGAL353.409-00.361,202379.0
+AGAL353.417-00.079,8376.0
+AGAL354.944-00.537,46271.0
--- a/etl/assets/utils.py
+++ b/etl/assets/utils.py
+import os
+import pandas as pd
+import yaml
+from assets.constants import config_overrides
+from typing import (Union,
+                    Tuple)
+from assets.commons import validate_parameter
+
+
+def get_poc_results(line_fit_filename: str) -> pd.DataFrame:
+    df_class = pd.read_csv(os.path.join('assets', 'data', line_fit_filename))
+    with open(os.path.join('prs', 'output', 'run_type', 'constant_abundance_p15_q05', 'volume_density_results.yml'),
+              'r') as infile:
+        fit_results = yaml.load(infile, Loader=yaml.FullLoader)
+    df_results = pd.DataFrame.from_dict(fit_results, orient='index')
+    df_full = (df_results.merge(df_class, left_index=True, right_on='source_name', how='inner', validate='1:1')
+               .dropna(subset='best_fit'))
+    df_full['class_phys'] = 'HII'
+    df_full.loc[df_full['class'] == '70w', 'class_phys'] = 'Quiescent'
+    df_full.loc[df_full['class'] == 'IRw', 'class_phys'] = 'Protostellar'
+    df_full.loc[df_full['class'] == 'IRb', 'class_phys'] = 'MYSOs'
+    return df_full
+
+
+def get_profile_input(
+        model_root_folder: str,
+        species: Union[None, str] = None) -> Tuple[dict, dict]:
+    overrides = config_overrides[model_root_folder.split(os.path.sep)[-1]]
+    _species = validate_parameter(species, default='e-ch3oh')
+    try:
+        overrides_grid = overrides['grid']
+    except KeyError:
+        overrides_grid = {}
+    try:
+        overrides_lines = overrides['lines']
+    except KeyError:
+        overrides_lines = {}
+    overrides_abundance = {}
+
+    try:
+        overrides_abundance['molecular_abundance'] = overrides_lines['molecular_abundances'][_species]
+    except KeyError:
+        pass
+
+    for key in ['abundance_jump', 'threshold']:
+        try:
+            overrides_abundance[key] = overrides_lines['hot_core_specs'][_species][key]
+        except KeyError:
+            pass
+
+    return overrides_grid, overrides_abundance
+
No results found