Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • main
  • v0.1
  • v0.2
  • v0.2.1
  • v0.3
  • v0.3.1
  • v0.3.2
  • v0.3.3
8 results

Target

Select target project
No results found
Select Git revision
  • main
  • v0.1
  • v0.2
  • v0.2.1
  • v0.3
  • v0.3.1
  • v0.3.2
  • v0.3.3
8 results
Show changes

Commits on Source 6

Compare changes
  • Side-by-side
  • Inline

Files

.dockerignore

0 → 100644
+11 −0
Original line number Diff line number Diff line
/etl/credentials/db_credentials*.yml
/etl/stg/archive/*
/etl/prs/fits/cubes/*
/etl/prs/fits/moments/*
/etl/prs/fits/ratios/*
/etl/prs/fits/grids/*
/etl/mdl/radmc_files/*
/etl/mdl/scratches/*
/etl/prs/output/*
/etl/*.sif
 No newline at end of file

.gitignore

0 → 100644
+14 −0
Original line number Diff line number Diff line
/etl/tests/test_files/config.yml
/etl/trn/trn_convert_grid_to_polaris.py
/etl/posteriors.yml
/etl/run_id.txt
/etl/test_sak.html
/etl/prs/output/
/etl/prs/fits/
/etl/stg/archive/
/etl/assets/internal_files/
/etl/credentials/db_credentials.yml
/etl/credentials/db_credentials_local.yml
/etl/mdl/polaris_input_file.cmd
/etl/mdl/radmc3d_postprocessing.sh
/etl/mdl/scratches/

Dockerfile

0 → 100644
+34 −0
Original line number Diff line number Diff line
FROM python:3.9-slim


# STEP 1) Setting up environment
# apt packages
RUN apt-get update && \
    apt-get --yes upgrade && \
    apt-get install --yes libgomp1 && \
    apt-get install --yes postgresql-client && \
    apt-get install --yes --reinstall build-essential && \
    apt-get -y install curl vim less nano git && \
    apt-get -y install zip unzip && \
    apt-get clean

WORKDIR /usr/src/etl
RUN python -m pip install --upgrade pip
COPY etl/requirements.txt .
RUN pip install -r requirements.txt

RUN mkdir /sak_user
# RUN /usr/sbin/usermod -d /sak_user root
RUN HOME=/sak_user
COPY documentation/radmc3d_install.sh .
RUN chmod 755 radmc3d_install.sh
RUN HOME=/sak_user;./radmc3d_install.sh
ENV PATH="/sak_user/bin:${PATH}"
ENV PYTHONPATH="/sak_user/bin/python:${PYTHONPATH}"
RUN chmod 755 /sak_user/bin/radmc3d

# STEP 2) Bundling app
COPY etl .

CMD python main.py
 No newline at end of file

README.md

0 → 100644
+4 −0
Original line number Diff line number Diff line
The Swiss Army Knife project has the objective to assess whether CH3OH can be used as an effective volume density probe and in which regime.

Important resources are listed in the [documentation.md file](documentation/documentation.md).
 No newline at end of file

apptainer.def

0 → 100644
+7 −0
Original line number Diff line number Diff line
Bootstrap: docker
From: postgres:14.1-alpine

%environment
export POSTGRES_DB=$DB_NAME
export POSTGRES_USER=$DB_USER
export POSTGRES_PASSWORD=$DB_PASS
+3 −0
Original line number Diff line number Diff line
rm swiss_army_knife_latest.sif
singularity pull --disable-cache docker://git.ia2.inaf.it:5050/andrea.giannetti/swiss_army_knife

docker-compose.yaml

0 → 100644
+37 −0
Original line number Diff line number Diff line
version: '3.8'
services:
  db:
    image: postgres:14.1-alpine
    container_name: db_container_sak
    restart: always
    environment:
      - POSTGRES_DB=$DB_NAME
      - POSTGRES_USER=$DB_USER
      - POSTGRES_PASSWORD=$DB_PASS
    ports:
      - '31000:5432'
    volumes:
      - db:/var/lib/postgresql/data
    networks:
      - sak_network

  etl:
    build:
      dockerfile: Dockerfile
    container_name: 'etl_sak'
    image: 'sak_etl'
    depends_on:
      - db
    stdin_open: true
    tty: true
    networks:
      - sak_network
    volumes:
      - ./etl:/usr/src/etl
volumes:
  db:
    driver: local
networks:
  sak_network:
    name: 'sak_app_network'
 No newline at end of file
+28 −0
Original line number Diff line number Diff line
#!/bin/bash
# Set PYCHARM_HOME, INSTALL_DEPENDENCIES environment variable
PYCHARM_HOME=${PYCHARM_HOME:=$HOME}
INSTALL_DEPENDENCIES="${INSTALL_DEPENDENCIES:=true}"
echo $PYCHARM_HOME
cd $PYCHARM_HOME
export RADMC_HOME=$PYCHARM_HOME/radmc3d-2.0
if [[ ! -d "$RADMC_HOME" ]]
then
    echo RADMC not found, cloning into "$(pwd)"
    git clone https://github.com/dullemond/radmc3d-2.0.git
fi

if $INSTALL_DEPENDENCIES
then
    apt-get install -y gfortran
fi

cd $RADMC_HOME/src
make
echo "y\n y\n"| make install

# Add this to the .bashrc configuration file (or equivalent)
export PATH=$HOME/bin:$PATH
export PYTHONPATH=$HOME/bin/python:$PYTHONPATH

cd $RADMC_HOME/python/radmc3dPy
python setup.py install
+291 −0
Original line number Diff line number Diff line
import glob
import logging
import sys
import yaml
import numpy as np
import shutil
import os
import urllib.request
import json
import pandas as pd
from hashlib import sha1
from typing import (List,
                    Union,
                    Tuple)
from astropy import units as u
from assets.constants import (leiden_url_mapping)


def setup_logger(name: str,
                 log_level: str = None) -> logging.Logger:
    """
    Configure default logger
    :param name: Logger name
    :param log_level: Logging levels, as defined by logging
    :return: the logger object
    """
    log_level = 'DEBUG' if log_level is None else log_level
    """general logger configurator"""
    logger = logging.getLogger(name)
    logger.setLevel(log_level)
    handler = logging.StreamHandler(sys.stdout)
    handler.setLevel(log_level)
    formatter = logging.Formatter("%(asctime)s - %(name)-7s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    return logger


def validate_parameter(param_to_validate,
                       default):
    """
    Substitute default value if the parameter is set to None
    :param param_to_validate: input to validate
    :param default: default value
    :return: default value if parameter is set to None, else the passed value
    """
    return param_to_validate if param_to_validate is not None else default


def load_config_file(config_file_path: str,
                     override_config: Union[dict, None] = None) -> dict:
    """
    Load the information in the YAML configuration file into a python dictionary
    :param config_file_path: path to the configuration file
    :param override_config: parameters of the input file to override (e.g. for grid creation)
    :return: a dictionary with the parsed information
    """
    _override_config = validate_parameter(override_config, default={})
    with open(config_file_path) as config_file:
        config = yaml.load(config_file, Loader=yaml.FullLoader)
    for key in _override_config:
        if key not in config.keys():
            config[key] = {}
        for subkey in _override_config[key]:
            try:
                config[key][subkey] = _override_config[key][subkey]
            except KeyError:
                config[key] = {subkey: _override_config[key][subkey]}

    try:
        if config['density_powerlaw_idx'] == 0:
            config['central_density'] = config['density_at_reference']
            config['density_at_reference'] = None
    except KeyError:
        pass

    try:
        if config['dust_temperature_powerlaw_idx'] == 0:
            config['dust_temperature'] = config['dust_temperature_at_reference']
            config['dust_temperature_at_reference'] = None
    except KeyError:
        pass
    return config


def get_moldata(species_names: list,
                logger: logging.Logger,
                path: Union[str, None] = None,
                use_cache: bool = False):
    """
    Downloads the molecular data from the Leiden molecular database; check whether the molecule in mapped in
        leiden_url_mapping
    :param species_names: the names of the species for which to download data
    :param logger: logger for output printing
    :param path: path where the files must be saved
    :param use_cache: use molecular input files in cache, if possible
    """
    _path = validate_parameter(path, default=os.path.join('mdl', 'radmc_files'))
    for species in species_names:
        molecular_file_name = f'molecule_{species}.inp'
        if use_cache is True:
            shutil.copy(os.path.join('mdl', 'data', molecular_file_name),
                        os.path.join(_path, molecular_file_name))
        if not os.path.isfile(os.path.join(_path, molecular_file_name)):
            logger.info(f'Downloading file molecule_{species}.inp...')
            data = urllib.request.urlopen(leiden_url_mapping[species]).read().decode()
            with open(os.path.join(_path, f'molecule_{species}.inp'), 'w') as outfile:
                outfile.writelines(data)
        else:
            logger.info(f'File molecule_{species}.inp found, skipping download...')


def compute_unique_hash_filename(config: dict) -> str:
    """
    Compute a unique and reproducible filename given a configuration dictionary
    :param config: the configuration dictionary to hash
    :return: a unique hash of the dictionary, to use as a key and filename
    """
    hashed_dict = sha1(json.dumps(config, sort_keys=True).encode())
    return f'{hashed_dict.hexdigest()}'


def make_archive(output_filename: str,
                 source_dir: str,
                 archive_path: Union[str, None] = None):
    """
    Compresses all files in a directory into a .zip file
    :param output_filename: the output filename
    :param source_dir: the directory to compress
    :param archive_path: path to store the compressed-grid archives
    """
    _archive_path = validate_parameter(archive_path,
                                       default=os.path.join('stg', 'archive'))
    filename_root, filename_format = output_filename.rsplit('.', maxsplit=1)
    try:
        os.remove(os.path.join(_archive_path, output_filename))
    except FileNotFoundError:
        pass
    shutil.make_archive(base_name=os.path.join(_archive_path, filename_root),
                        format=filename_format,
                        root_dir=source_dir)


def cleanup_directory(directory: str,
                      logger: logging.Logger):
    """
    Removes all files from the specified directory.
    :param directory: directory to clean up
    :param logger: logger to use
    """
    if os.path.isdir(directory):
        file_list = glob.glob(f'{directory}/*')
        for filename in file_list:
            if os.path.isfile(filename) is True:
                logger.debug(f'Removing file {filename}')
                os.remove(filename)
            else:
                logger.debug(f'Skipping {filename}')


def convert_frequency_to_wavelength(frequency: u.Quantity,
                                    output_units: u.Unit):
    return frequency.to(output_units, equivalencies=u.spectral())


def get_value_if_specified(parameters_dict: dict,
                           key: str):
    try:
        return parameters_dict[key]
    except KeyError:
        return None


def get_postprocessed_data(limit_rows: Union[None, int] = None,
                           use_model_for_inference: Union[None, str] = None) -> Tuple[List[List[str]], pd.DataFrame]:
    """
    Retrieve data and line pairs from the main config file
    :param use_model_for_inference: the prs/output/run_type folder from which to get the data from inference;
        defaults to fiducial model (constant_abundance_p15_q05)
    :param limit_rows: the number of rows to use from the original dataset; useful to run tests and limit
        computation time
    :return: the line pairs list and the dataset
    """
    _use_model_for_inference = validate_parameter(
        use_model_for_inference,
        default='constant_abundance_p15_q05'
    )
    _data_file = os.path.join('prs', 'output', 'run_type', _use_model_for_inference, 'data', 'full_dataset.csv')
    config = load_config_file(os.path.join('config', 'config.yml'))
    line_pairs = config['overrides']['lines_to_process']
    results_df_dict = {
        'ratios_and_los_averages': add_px_index_column(_data_file)
    }
    if limit_rows is not None:
        data = results_df_dict['ratios_and_los_averages'].head(limit_rows)
    else:
        data = results_df_dict['ratios_and_los_averages']
    return line_pairs, data


def prepare_matrix(filename: str,
                   columns: list,
                   use_model_for_inference: Union[None, str] = None) -> pd.DataFrame:
    """
    Retrieve and prepare the data matrix from a specified file and columns.

    :param filename: The name of the file to read the data from.
    :param columns: The list of columns to extract from the dataframe.
    :param use_model_for_inference: The folder within prs/output/run_type to get the data for inference;
        defaults to the fiducial model ('constant_abundance_p15_q05') if None is provided.
    :return: A pandas DataFrame containing the specified columns from the file with 'nh2' and 'tdust' columns
        rounded to one decimal place and converted to string type.
    """
    _use_model_for_inference = validate_parameter(
        use_model_for_inference,
        default='constant_abundance_p15_q05'
    )
    df = pd.read_csv(os.path.join('prs', 'output', 'run_type', _use_model_for_inference, 'data', filename))
    df['nh2'] = pd.Series(df['nh2'].round(1), dtype='string')
    df['tdust'] = pd.Series(df['tdust'].round(1), dtype='string')
    return df[columns]


def get_data(limit_rows: Union[int, None] = None,
             use_model_for_inference: Union[None, str] = None,
             log_columns: Union[None, List] = None):
    """
    Retrieve and preprocess dataset.

    :param limit_rows: The number of rows to use from the original dataset; useful for running tests and limiting
        computation time. Defaults to None, which uses all rows.
    :param use_model_for_inference: The folder within prs/output/run_type to get the data for inference;
        defaults to the fiducial model ('constant_abundance_p15_q05') if None is provided.
    :param log_columns: The list of columns to apply a logarithmic transformation to. Defaults to
        ['log_nh2', 'log_tdust', 'avg_nh2', 'avg_tdust', 'molecule_column_density', 'std_nh2'] if None is provided.
    :return: A pandas DataFrame containing the merged and processed data from multiple sources, with specified
        columns logarithmically transformed.
    """
    _use_model_for_inference = validate_parameter(
        use_model_for_inference,
        default='constant_abundance_p15_q05'
    )
    _log_columns = validate_parameter(
        log_columns,
        default=['log_nh2', 'log_tdust', 'avg_nh2', 'avg_tdust', 'molecule_column_density', 'std_nh2']
    )
    _, data = get_postprocessed_data(limit_rows=limit_rows,
                                     use_model_for_inference=_use_model_for_inference)
    data['nh2'] = pd.Series(data['nh2'].round(1), dtype='string')
    data['tdust'] = pd.Series(data['tdust'].round(1), dtype='string')
    df87 = prepare_matrix(filename='full_dataset_NH2_lines_87-86.csv',
                          columns=['px_index', 'nh2', 'tdust', 'mom_zero_87', 'mom_zero_86', 'molecule_column_density'],
                          use_model_for_inference=_use_model_for_inference)
    df88 = prepare_matrix(filename='full_dataset_NH2_lines_88-86.csv',
                          columns=['px_index', 'nh2', 'tdust', 'mom_zero_88'],
                          use_model_for_inference=_use_model_for_inference)
    df257 = prepare_matrix(filename='full_dataset_NH2_lines_257-256.csv',
                           columns=['px_index', 'nh2', 'tdust', 'mom_zero_257', 'mom_zero_256'],
                           use_model_for_inference=_use_model_for_inference)
    df381 = prepare_matrix(filename='full_dataset_NH2_lines_381-380.csv',
                           columns=['px_index', 'nh2', 'tdust', 'mom_zero_381', 'mom_zero_380'],
                           use_model_for_inference=_use_model_for_inference)
    merged = data.merge(df87, on=['px_index', 'nh2', 'tdust'])
    merged = merged.merge(df88, on=['px_index', 'nh2', 'tdust'])
    merged = merged.merge(df257, on=['px_index', 'nh2', 'tdust'])
    merged = merged.merge(df381, on=['px_index', 'nh2', 'tdust'])
    # These are transformed later on, to allow splitting
    merged['nh2'] = pd.to_numeric(merged['nh2'])
    merged['tdust'] = pd.to_numeric(merged['tdust'])
    merged['log_nh2'] = pd.to_numeric(merged['nh2'])
    merged['log_tdust'] = pd.to_numeric(merged['tdust'])
    for column in _log_columns:
        merged[column] = np.log10(merged[column])
    npixels = 101
    refpix = 50
    merged['px_distance'] = np.sqrt(
        ((merged['px_index'] / npixels).astype(int) - refpix) ** 2 + ((merged['px_index'] % npixels) - refpix) ** 2)
    merged['core_radius_px'] = (1e8 / merged['nh2']) ** -(2 / 3) * 0.5 * 101 / 2
    return merged


def add_px_index_column(filename: str) -> pd.DataFrame:
    """
    Reset index to add pixel index column to data
    :param filename: The filename of the csv file that contains the data
    :return: a pandas dataframe with the px_index column
    """
    df = pd.read_csv(filename, index_col=0)
    if 'px_index' not in df.columns:
        df = df.reset_index().rename(columns={'index': 'px_index'})
    return df
+71 −0
Original line number Diff line number Diff line
import base64
import logging
import os
from contextlib import closing
from typing import Union, List
import sqlalchemy as sqla
import yaml
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.orm import Session
from assets.commons import validate_parameter


def get_credentials(logger: logging.Logger,
                    credentials_filename: Union[None, str] = None,
                    set_envs: bool = False) -> Union[None, dict]:
    """
    Retrieves credentials, if available, for logging in to the requested service.
    :param domain: the name of the service, as it appears in the credentials file (case sensitive!).
    :param logger: the logger to use.
    :param credentials_filename: the name of the credentials file to be used. If None, uses the default.
    :param set_envs: whether to set the credentials as environment variables.
    :return: a dictionary with username and password, as retrieved from the credentials file or None.
    """
    _credentials_filename = credentials_filename if credentials_filename is not None \
        else os.path.join('credentials', 'credentials.yml')
    try:
        with open(_credentials_filename) as credentials_file:
            credentials = yaml.load(credentials_file, Loader=yaml.FullLoader)
            credentials['DB_USER'] = base64.b64decode(credentials['DB_USER'].encode()).decode()
            credentials['DB_PASS'] = base64.b64decode(credentials['DB_PASS'].encode()).decode()
            if set_envs is True:
                for key in credentials:
                    os.environ[key] = str(credentials[key])

            return credentials
    except FileNotFoundError:
        logger.info('Credentials not found!')
        return None


def upsert(table_object: sqla.orm.decl_api.DeclarativeMeta,
           row_dict: dict,
           conflict_keys: List[sqla.Column],
           engine: sqla.engine):
    """
    Upsert the row into the specified table, according to the indicated conflict columns
    :param table_object: the table into which the row must be inserted
    :param row_dict: the dictionary representing the row to insert
    :param conflict_keys: the conflict columns list, representing the primary key of the table
    :param engine: the SQLAlchemy engine to use
    """
    statement = insert(table_object).values(row_dict)
    statement = statement.on_conflict_do_update(
        index_elements=conflict_keys, set_=row_dict)
    with closing(Session(engine)) as session:
        session.execute(statement)
        session.commit()


def get_pg_engine(logger: logging.Logger, engine_kwargs: Union[dict, None] = None) -> sqla.engine.Engine:
    """
    Return the SQLAlchemy engine, given the credentials in the external file
    :param logger: the logger to use
    :return: the SQLAlchemy engine
    """
    _kwargs = validate_parameter(engine_kwargs, default={})
    credentials = get_credentials(logger=logger,
                                  credentials_filename=os.path.join('credentials', 'db_credentials.yml'))
    url = f'postgresql://{credentials["DB_USER"]}:{credentials["DB_PASS"]}@{credentials["DB_HOST"]}:{credentials["DB_PORT"]}/{credentials["DB_NAME"]}'
    engine = sqla.create_engine(url, **_kwargs)
    return engine
+280 −0
Original line number Diff line number Diff line
import logging
import os
from typing import Union, List
import numpy as np
from astropy import units as u
from assets.commons import validate_parameter, load_config_file, setup_logger
from assets.commons.parsing import get_grid_properties
from assets.constants import mean_molecular_mass, radmc_grid_map, radmc_coord_system_map


def compute_power_law_radial_profile(
        central_value: float,
        power_law_index: float,
        distance_matrix: np.array,
        maximum_radius: Union[float, None] = None,
        value_at_reference: Union[float, None] = None,
        distance_reference: Union[float, u.Quantity] = 1.0,
        fill_reference_pixel: bool = True) -> np.array:
    """
    Compute a power law distribution over the specified grid
    :param central_value: value in the center of the grid
    :param power_law_index: index of the power law used to scale the profile
    :param distance_matrix: the matrix of distances from the reference point
    :param maximum_radius: the maximum radius to populate with gas within the grid in the same unit as the distance
                           matrix and distance reference
    :param value_at_reference: value of the profile at the reference distance; defaults to central value if not provided
    :param distance_reference: value of the reference distance
    :param fill_reference_pixel: whether to fill the reference point with central_value and set this to the maximum in
        the grid
    :return: the distance matrix
    """
    _value_at_reference = validate_parameter(value_at_reference, default=central_value)
    _distance_matrix = np.where(distance_matrix == 0, np.nanmin(distance_matrix[distance_matrix > 0]),
                                distance_matrix) if fill_reference_pixel is True else distance_matrix
    try:
        _distance_reference = distance_reference.to(u.cm).value
    except AttributeError:
        _distance_reference = distance_reference
    profile = _value_at_reference * (_distance_matrix / _distance_reference) ** power_law_index
    # If the routine fills the 0-distance point (the centre), it fixes the profile making the central value the maximum
    if (fill_reference_pixel is True) and (power_law_index != 0):
        profile = np.where(profile > central_value, central_value, profile)
    if maximum_radius is not None:
        profile = np.where(distance_matrix > maximum_radius + maximum_radius / 1e5, 0, profile)
    return profile


def compute_cartesian_coordinate_grid(indices: np.array,
                                      physical_px_size: np.array) -> np.array:
    """
    Compute the physical coordinate grid, for a regular grid
    :param indices: the indices of the grid pixels
    :param physical_px_size: the array of physical pixel sizes or the pixel size, as astropy.Quantities
    :return: the numpy.array with the physical grid coordinates
    """
    try:
        _physical_px_size_cm = [px_size.to(u.cm).value for px_size in physical_px_size]
    except TypeError:
        _physical_px_size_cm = physical_px_size.to(u.cm).value
    return (indices.T * _physical_px_size_cm).T


def get_centered_indices(grid_metadata: dict) -> np.array:
    """
    Recompute the indices using as reference the reference pixel in the configuration file
    :param grid_metadata: the dictionary with the grid metadata, to obtain the grid shape and the reference pixel
    :return: a numpy array of indices, wrt. the reference
    """
    return (np.indices(grid_metadata['grid_shape']).T - grid_metadata['grid_refpix']).T


def get_grid_edges(grid_metadata: dict) -> np.array:
    """
    Compute the physical coordinates at the grid boundaries
    :param grid_metadata: the dictionary with the grid metadata, to obtain the grid shape, the reference pixel, and the
        physical pixel size
    :return: a numpy array with the coordinates of the edges, per axis
    """
    grid_edges = []
    for axis_idx in range(len(grid_metadata['grid_shape'])):
        indices = np.arange(grid_metadata['grid_shape'][axis_idx] + 1) - grid_metadata['grid_refpix'][axis_idx] - 0.5
        # valid for regular grid
        grid_edges.append(compute_cartesian_coordinate_grid(indices=indices,
                                                            physical_px_size=grid_metadata['physical_px_size'][
                                                                axis_idx]))
    # Transposed for consistent flattening
    return np.array(grid_edges).T


def get_distance_matrix(grid_metadata: dict,
                        indices: np.array) -> np.array:
    """
    Compute physical distance from the reference pixel, in cm
    :param grid_metadata: the dictionary with the grid metadata, to obtain the grid shape, and the physical pixel size
    :param indices: numpy array of the grid pixel indices
    :return: a numpy array with the euclidean distance from the reference pixel
    """
    distance_matrix = np.zeros(grid_metadata['grid_shape'])
    for axis_idx in range(len(grid_metadata['grid_shape'])):
        distance_matrix += (indices[axis_idx, ...] * grid_metadata['physical_px_size'][axis_idx].to(u.cm).value) ** 2
    return np.sqrt(distance_matrix)


def get_physical_px_size(grid_metadata: dict) -> List[u.Quantity]:
    """
    Compute the physical pixel size
    :param grid_metadata: the dictionary with the grid metadata, to obtain the grid shape, the grid size, and the grid
        size units
    :return: a list of physical pixel sizes, per axis
    """
    physical_px_size = []
    for axis_idx in range(len(grid_metadata['grid_shape'])):
        physical_px_size.append(
            (grid_metadata['grid_size'][axis_idx] * u.Unit(grid_metadata['grid_size_units'][axis_idx])).to(u.cm) /
            grid_metadata['grid_shape'][axis_idx])
    return physical_px_size


def compute_analytic_profile(central_value: float,
                             power_law_index: float,
                             maximum_radius: float,
                             value_at_reference: float,
                             distance_reference: float) -> np.array:
    """
    Compute the analytic radial profile based on a power-law model.

    :param central_value (float): Central value of the profile, where the power-law could be undefined.
    :param power_law_index (float): Power-law index determining the shape of the profile.
    :param maximum_radius (float): Maximum radius within which to compute the profile; values outside this radius are
        set to zero.
    :param value_at_reference (float): Value of the profile at the reference distance.
    :param distance_reference (float): Reference distance at which the profile value is specified.

    :return: np.array: Computed radial profile.

    Example:
        # Compute an analytic profile with given parameters
        profile = compute_analytic_profile(central_value=10.0,
                                            power_law_index=-1.5,
                                            maximum_radius=5.0,
                                            value_at_reference=5.0,
                                            distance_reference=1.0)
    """
    config = load_config_file(config_file_path=os.path.join('stg', 'config', 'config.yml'))
    grid_metadata = extract_grid_metadata(config=config)
    distance_matrix = get_distance_matrix(grid_metadata=grid_metadata,
                                          indices=get_centered_indices(grid_metadata=grid_metadata))
    profile = compute_power_law_radial_profile(central_value=central_value,
                                               power_law_index=power_law_index,
                                               distance_matrix=distance_matrix,
                                               maximum_radius=maximum_radius,
                                               value_at_reference=value_at_reference,
                                               distance_reference=distance_reference)
    return profile


def compute_los_average_weighted_profile(profile: np.array,
                                         weights: Union[float, np.array]) -> np.array:
    """
    Compute the line-of-sight (LOS) average weighted profile.
    This function computes the weighted average profile along the LOS, where each profile is weighted
    according to the provided weights. The computation is performed by summing the products of each
    profile value and its corresponding weight across all LOS, and then dividing by the sum of weights
    to obtain the average.

    Note:
    - If a profile value is 0, it is treated as missing data and not included in the computation.
    - If a weight is 0, the corresponding profile is effectively excluded from the computation.

    :param profile: (np.array) Array containing the profiles along different lines of sight (LOS).

    :param weights: (Union[float, np.array]) Array containing the weights corresponding to each profile in the input array,
        or a single weight value if uniform weighting is desired.

    :return: (np.array) LOS average weighted profile.
    """
    return np.nansum(np.where(profile == 0, np.nan, profile * weights), axis=2) / \
        np.nansum(np.where(profile == 0, np.nan, weights), axis=2)


def compute_density_and_temperature_avg_profiles(density_profile: np.array,
                                                 temperature_profile: np.array,
                                                 molecular_abundance: float,
                                                 threshold: float,
                                                 abundance_jump: Union[float, int],
                                                 logger: Union[logging.Logger, None] = None) -> tuple:
    """
    Compute the average density and temperature profiles along the line-of-sight along with related quantities.
    This function computes the average density and temperature profiles along the LOS, along with related quantities
    such as molecular column density and the standard deviation of the species' number density. It first computes the
    molecular number density grid using the given density, temperature, abundance, temperature threshold, and abundance
    jump for simulating hot core-like abundance profiles. Then, it computes the average density and temperature profiles
    using the molecular number density grid as weights along the LOS. Finally, it calculates the
    molecular column density and its standard deviation.

    :param density_profile (np.array): Array containing the density profiles.
    :param temperature_profile (np.array): Array containing the temperature profiles.
    :param molecular_abundance (float): Molecular abundance for computing molecular number density.
    :param threshold (float): Threshold for adjusting the abundance in hot cores, where the molecule is potentially
        evaporated from dust grains.
    :param abundance_jump (Union[float, int]): Abundance jump in hot cores.
    :param logger (Union[logging.Logger, None], optional): Logger object for logging messages. If not specified, creates
        a generic logger for reporting the total mass.

    :return: Tuple[np.array, np.array, np.array, np.array]
        - Average density profile.
        - Average temperature profile.
        - Molecular column density.
        - Standard deviation of molecular number density.
    """
    _logger = validate_parameter(logger, setup_logger('GENERIC'))
    config = load_config_file(config_file_path=os.path.join('stg', 'config', 'config.yml'))
    grid_metadata = extract_grid_metadata(config=config)
    molecule_grid = compute_molecular_number_density_hot_core(gas_number_density_profile=density_profile,
                                                              abundance=molecular_abundance,
                                                              temperature_profile=temperature_profile,
                                                              threshold=threshold,
                                                              abundance_jump=abundance_jump)
    avg_density_profile = compute_los_average_weighted_profile(profile=density_profile,
                                                               weights=molecule_grid)
    mass = (np.nansum(density_profile * u.cm ** -3 * grid_metadata['physical_px_size'][0]
                      * grid_metadata['physical_px_size'][1]
                      * grid_metadata['physical_px_size'][2]) * mean_molecular_mass * m_p).to(u.M_sun)
    _logger.debug(f'Total mass: {mass}')
    avg_temperature_profile = compute_los_average_weighted_profile(profile=temperature_profile,
                                                                   weights=molecule_grid)
    return (avg_density_profile,
            avg_temperature_profile,
            np.nansum(molecule_grid, axis=2) * grid_metadata['physical_px_size'][2].value,
            np.nanstd(molecule_grid, axis=2))


def compute_molecular_number_density_hot_core(gas_number_density_profile: np.array,
                                              abundance: float,
                                              temperature_profile: np.array,
                                              threshold: float,
                                              abundance_jump: Union[float, int]) -> np.array:
    """
    Compute the molecular number density, using a step function abundance, changing above a specific temperature to
    simulate evaporation
    :param gas_number_density_profile: the gas number density profile array
    :param abundance: the gas abundance of the species
    :param temperature_profile: the temperature profile of the source
    :param threshold: the threshold at which the species evaporate
    :param abundance_jump: the factor describing the abundance variation wrt the bas level
    :return: the array of molecular gas density computed using a step function profile
    """
    return np.where(temperature_profile < threshold,
                    gas_number_density_profile * abundance,
                    gas_number_density_profile * abundance * abundance_jump)


def extract_grid_metadata(config: dict) -> dict:
    """
    Enrich grid metadata from the information in the configuration file
    :param config: configuration dictionary
    :return: a dictionary with the metadata
    """
    grid_config = config['grid']

    grid_metadata = grid_config.copy()
    grid_metadata['grid_type'] = radmc_grid_map[grid_config['grid_type']]
    grid_metadata['coordinate_system'] = radmc_coord_system_map[grid_config['coordinate_system']]
    grid_properties_keywords = ['shape', 'refpix', 'size', 'size_units']
    for key in grid_properties_keywords:
        grid_metadata[f'grid_{key}'] = get_grid_properties(grid_config=grid_config,
                                                           keyword=key)

    grid_metadata['physical_px_size'] = get_physical_px_size(grid_metadata)

    grid_metadata['centered_indices'] = get_centered_indices(grid_metadata)
    grid_metadata['coordinate_grid'] = compute_cartesian_coordinate_grid(indices=grid_metadata['centered_indices'],
                                                                         physical_px_size=grid_metadata[
                                                                             'physical_px_size'])

    grid_metadata['distance_matrix'] = get_distance_matrix(grid_metadata, grid_metadata['centered_indices'])

    grid_metadata['grid_edges'] = get_grid_edges(grid_metadata)
    grid_metadata['continuum_lambdas'] = 100
    return grid_metadata
+93 −0
Original line number Diff line number Diff line
import os
from typing import Tuple

import numpy as np

from assets.commons import (load_config_file,
                            validate_parameter)


def parse_grid_overrides(par_name: str,
                         config: dict) -> np.array:
    """
    Parse the global configuration for constructing the grid, computing the parameter values for which the grid has to
    be computed.
    :param par_name: parameter to parse
    :param config: configuration dictionary to extract the grid properties
    :return: the array of values to be considered in the grid
    """
    config_overrides = config['overrides']
    if config_overrides[f'{par_name}_grid_type'] == 'linear':
        grid_values = np.arange(float(config_overrides[f'{par_name}_limits'][0]),
                                float(config_overrides[f'{par_name}_limits'][1]),
                                float(config_overrides[f'{par_name}_step']))
    else:
        _step = float(config_overrides[f'{par_name}_step']) if float(config_overrides[f'{par_name}_step']) > 1 else 10 ** float(config_overrides[f'{par_name}_step'])
        grid_values = 10 ** np.arange(np.log10(float(config_overrides[f'{par_name}_limits'][0])),
                                      np.log10(float(config_overrides[f'{par_name}_limits'][1])),
                                      np.log10(_step))
    return np.around(grid_values, 2)


def get_grid_properties(grid_config: dict,
                        keyword: str) -> list:
    """
    Compute the grid axes properties using the grid configuration dictionary. If less than 3 axes are provided, fill
        the missing ones with the first axis.
    :param grid_config: the dictionary containing the grid configuration metadata
    :param keyword: the keyword to read
    :return: a list of the requested keyword values over the three spatial axes
    """
    grid_property_list = [grid_config['dim1'][keyword]]
    for axis_idx in range(2):
        try:
            grid_property_list.append(grid_config[f'dim{axis_idx + 2}'][keyword])
        except KeyError:
            grid_property_list.append(grid_config['dim1'][keyword])
    return grid_property_list


def parse_input_main() -> Tuple[str, str, np.array, np.array, list, int, str]:
    """
    Parse the stg and mdl configuration files, as well as the main one, to apply overrides, compute the arrays of
    temperatures and number densities and the type of models defined in the grid (isothermal/uniform density vs.
    power-law profiles)
    :return: a tuple containing the dust profile type (isothermal/heated), the density profile (homogeneous/spherical),
        the distinct values of dust temperatures and densities in the grid, the line pairs used to compute ratios,
        the number of processes to be used in case of multiprocessing, and the model run_type string
    """
    default_config_name = 'config.yml'
    stg_config = load_config_file(os.path.join('stg', 'config', default_config_name))
    pl_density_idx = float(stg_config['grid']['density_powerlaw_idx'])
    pl_dust_temperature_idx = float(stg_config['grid']['dust_temperature_powerlaw_idx'])
    _model_type = 'spherical' if pl_density_idx != 0 else 'homogeneous'
    _tdust_model_type = 'heated' if pl_dust_temperature_idx != 0 else 'isothermal'
    config = load_config_file(os.path.join('config', default_config_name))
    # grid definition
    dust_temperatures = parse_grid_overrides(par_name='dust_temperature',
                                             config=config)
    densities = parse_grid_overrides(par_name='gas_density',
                                     config=config)
    line_pairs = config['overrides']['lines_to_process']
    n_processes = validate_parameter(config['computation']['threads'], default=10)
    return _tdust_model_type, _model_type, dust_temperatures, densities, line_pairs, n_processes, config['run_type']


def read_abundance_variation_schema(line_config: dict) -> dict:
    """
    Read and fill the abundance variation schema, defined as a step function
    :param line_config: the dictionary of the line configurations, for the abundances
    :return: the dictionary of the abundance variations per species, with the abundance jump factor and the temperature
        threshold at which evaporation happens
    """
    species_list = list(line_config['molecular_abundances'].keys())
    results_dict = {}
    for species in species_list:
        results_dict[species] = {
            'threshold': 20,
            'abundance_jump': 1
        }
        if 'hot_core_specs' in line_config.keys():
            if species in line_config['hot_core_specs']:
                results_dict[species] = line_config['hot_core_specs'][species]
    return results_dict
+183 −0
Original line number Diff line number Diff line
import numpy as np
from .physical_constants import mean_molecular_mass

polaris_grid_map = {
    'spherical': 30
}

polaris_data_map = {
    "gas_number_density": 0,
    "dust_temperature": 2,
    "gas_temperature": 3,
    "molecular_abundance_mass_ratio": 17
}

radmc_grid_map = {
    'regular': 0,
    'spherical': 100
}

radmc_coord_system_map = {
    'cartesian': 0,
    'polar': 100
}

radmc_input_headers = {
    'amr_grid.inp': ['iformat', 'grid_type', 'coordinate_system', 'grid_info', 'active_axes', 'ncells_per_axis'],
    'dust_density.inp': ['iformat', 'ncells', 'dust_species'],
    'dust_temperature.dat': ['iformat', 'ncells', 'dust_species'],
    'microturbulence.inp': ['iformat', 'ncells'],
    'gas_velocity.inp': ['iformat', 'ncells'],
    'gas_temperature.inp': ['iformat', 'ncells'],
    'wavelength_micron.inp': ['continuum_lambdas'],
    'numberdens_mol.inp': ['iformat', 'ncells'],
    'escprob_lengthscale.inp': ['iformat', 'ncells'],
    'stars.inp': ['iformat', 'nstars', 'continuum_lambdas', 'stars_properties', 'lambdas']
}

leiden_url_mapping = {
    'co': 'https://home.strw.leidenuniv.nl/~moldata/datafiles/co.dat',
    'e-ch3oh': 'https://home.strw.leidenuniv.nl/~moldata/datafiles/e-ch3oh.dat',
    'a-ch3oh': 'https://home.strw.leidenuniv.nl/~moldata/datafiles/ch3oh_a.dat'
}

radmc_lines_mode_mapping = {
    'lte': 1,
    'user_defined_populations': 2,
    'lvg': 3,
    'optically_thin_non_lte': 4,
}

radmc_options_mapping = {
    'inclination': 'incl',
    'position_angle': 'phi',
    'imolspec': 'imolspec',
    'iline': 'iline',
    'width_kms': 'widthkms',
    'nchannels': 'linenlam',
    'npix': 'npix',
    'image_size_pc': 'sizepc',
    'threads': 'setthreads'
}

aggregation_function_mapping = {
    'mean': np.nanmean,
    'sum': np.nansum,
    'median': np.nanmedian
}

line_ratio_mapping = {
    '87-86': r'$I(2_{0}-1_{0})/I(2_{-1}-1_{-1})$',
    '88-86': r'$I(2_{1}-1_{1})/I(2_{-1}-1_{-1})$',
    '88-87': r'$I(2_{1}-1_{1})/I(2_{0}-1_{0})$',
    '257-256': r'$I(5_{-1}-4_{-1})/I(5_{0}-4_{0})$',
    '381-380': r'$I(7_{-1}-6_{-1})/I(7_{0}-6_{0})$'
}

config_overrides = {
    'isothermal_p15': {
        'grid': {
            'power_law_temperature': 0
        }
    },
    'isothermal_p12': {
        'grid': {
            'power_law_density': -1.2,
            'power_law_temperature': 0
        }
    },
    'isothermal_p18': {
        'grid': {
            'power_law_density': -1.8,
            'power_law_temperature': 0
        }
    },
    'constant_abundance_p15_q05': {},
    'constant_abundance_p12_q05': {
        'grid': {
            'power_law_density': -1.2,
        }
    },
    'constant_abundance_p18_q05': {
        'grid': {
            'power_law_density': -1.8,
        }
    },
    'constant_abundance_p15_q05_x01': {
        'lines': {
            'molecular_abundances': {
                "e-ch3oh": 1e-10
            }
        }
    },
    'constant_abundance_p15_q05_x10': {
        'lines': {
            'molecular_abundances': {
                "e-ch3oh": 1e-8
            }
        }
    },
    'hot_core_p15_q05': {
        'lines': {
            'hot_core_specs': {
                "e-ch3oh": {
                    "threshold": 90,
                    "abundance_jump": 100
                }
            }
        }
    },
    'hot_core_p12_q05': {
        'grid': {
            'power_law_density': -1.2,
        },
        'lines': {
            'hot_core_specs': {
                "e-ch3oh": {
                    "threshold": 90,
                    "abundance_jump": 100
                }
            }
        }
    },
    'hot_core_p18_q05': {
        'grid': {
            'power_law_density': -1.8,
        },
        'lines': {
            'hot_core_specs': {
                "e-ch3oh": {
                    "threshold": 90,
                    "abundance_jump": 100
                }
            }
        }
    },
    'hot_core_x10': {
        'lines': {
            'molecular_abundances': {
                "e-ch3oh": 1e-8
            },
            'hot_core_specs': {
                "e-ch3oh": {
                    "threshold": 90,
                    "abundance_jump": 100
                }
            }
        }
    },
    'hot_core_x01': {
        'lines': {
            'molecular_abundances': {
                "e-ch3oh": 1e-10
            },
            'hot_core_specs': {
                "e-ch3oh": {
                    "threshold": 90,
                    "abundance_jump": 100
                }
            }
        }
    },
    'double_microturbulence': {},
}
+116 −0
Original line number Diff line number Diff line
# select
# 	agal_name
# 	, round("N_H2_peak" / (2 * r_fwhm * 3.0856e18)) as volume_density
# from top100_sed_data tsd
"agal_name","volume_density"
AGAL006.216-00.609,89795.0
AGAL008.671-00.356,190969.0
AGAL008.684-00.367,68138.0
AGAL008.706-00.414,27785.0
AGAL008.831-00.027,NaN
AGAL010.444-00.017,22907.0
AGAL010.472+00.027,366225.0
AGAL010.624-00.384,299726.0
AGAL012.804-00.199,375774.0
AGAL013.178+00.059,68826.0
AGAL013.658-00.599,52684.0
AGAL014.114-00.574,75908.0
AGAL014.194-00.194,78815.0
AGAL014.492-00.139,74017.0
AGAL014.632-00.577,127991.0
AGAL015.029-00.669,209966.0
AGAL018.606-00.074,43782.0
AGAL018.734-00.226,19630.0
AGAL018.888-00.474,56893.0
AGAL019.609-00.234,92787.0
AGAL019.882-00.534,136234.0
AGAL022.376+00.447,82445.0
AGAL023.206-00.377,163688.0
AGAL024.416+00.101,15957.0
AGAL024.629+00.172,15443.0
AGAL024.651-00.169,7869.0
AGAL024.789+00.082,108737.0
AGAL028.564-00.236,37289.0
AGAL028.861+00.066,19217.0
AGAL030.818-00.056,386655.0
AGAL030.848-00.081,23220.0
AGAL030.893+00.139,49987.0
AGAL031.412+00.307,389457.0
AGAL034.258+00.154,1700013.0
AGAL034.401+00.226,218025.0
AGAL034.411+00.234,495365.0
AGAL034.821+00.351,68475.0
AGAL035.197-00.742,228801.0
AGAL035.579+00.007,8844.0
AGAL037.554+00.201,27842.0
AGAL043.166+00.011,221873.0
AGAL049.489-00.389,864473.0
AGAL053.141+00.069,184095.0
AGAL059.782+00.066,94354.0
AGAL301.136-00.226,259527.0
AGAL305.192-00.006,44264.0
AGAL305.209+00.206,183284.0
AGAL305.562+00.014,50295.0
AGAL305.794-00.096,30607.0
AGAL309.384-00.134,34424.0
AGAL310.014+00.387,38275.0
AGAL313.576+00.324,56581.0
AGAL316.641-00.087,119766.0
AGAL317.867-00.151,159134.0
AGAL318.779-00.137,34706.0
AGAL320.881-00.397,16371.0
AGAL326.661+00.519,93292.0
AGAL326.987-00.032,74912.0
AGAL327.119+00.509,29492.0
AGAL327.293-00.579,1286938.0
AGAL327.393+00.199,36543.0
AGAL328.809+00.632,350565.0
AGAL329.029-00.206,34454.0
AGAL329.066-00.307,14237.0
AGAL330.879-00.367,218474.0
AGAL330.954-00.182,322707.0
AGAL331.709+00.582,23291.0
AGAL332.094-00.421,83157.0
AGAL332.826-00.549,407894.0
AGAL333.134-00.431,200007.0
AGAL333.284-00.387,117459.0
AGAL333.314+00.106,53337.0
AGAL333.604-00.212,210139.0
AGAL333.656+00.059,21760.0
AGAL335.789+00.174,135994.0
AGAL336.958-00.224,28640.0
AGAL337.176-00.032,11767.0
AGAL337.258-00.101,40725.0
AGAL337.286+00.007,21253.0
AGAL337.406-00.402,266919.0
AGAL337.704-00.054,76912.0
AGAL337.916-00.477,386876.0
AGAL338.066+00.044,15655.0
AGAL338.786+00.476,38848.0
AGAL338.926+00.554,154575.0
AGAL339.623-00.122,40057.0
AGAL340.374-00.391,66279.0
AGAL340.746-01.001,43744.0
AGAL340.784-00.097,30196.0
AGAL341.217-00.212,77596.0
AGAL342.484+00.182,20980.0
AGAL343.128-00.062,280086.0
AGAL343.756-00.164,319987.0
AGAL344.227-00.569,609039.0
AGAL345.003-00.224,303461.0
AGAL345.488+00.314,225021.0
AGAL345.504+00.347,168516.0
AGAL345.718+00.817,90728.0
AGAL351.131+00.771,43470.0
AGAL351.161+00.697,743236.0
AGAL351.244+00.669,238178.0
AGAL351.416+00.646,2489024.0
AGAL351.444+00.659,1201754.0
AGAL351.496+00.691,78226.0
AGAL351.571+00.762,61074.0
AGAL351.581-00.352,234338.0
AGAL351.774-00.537,2754463.0
AGAL353.066+00.452,153199.0
AGAL353.409-00.361,202379.0
AGAL353.417-00.079,8376.0
AGAL354.944-00.537,46271.0

etl/assets/utils.py

0 → 100644
+52 −0
Original line number Diff line number Diff line
import os
import pandas as pd
import yaml
from assets.constants import config_overrides
from typing import (Union,
                    Tuple)
from assets.commons import validate_parameter


def get_poc_results(line_fit_filename: str) -> pd.DataFrame:
    df_class = pd.read_csv(os.path.join('assets', 'data', line_fit_filename))
    with open(os.path.join('prs', 'output', 'run_type', 'constant_abundance_p15_q05', 'volume_density_results.yml'),
              'r') as infile:
        fit_results = yaml.load(infile, Loader=yaml.FullLoader)
    df_results = pd.DataFrame.from_dict(fit_results, orient='index')
    df_full = (df_results.merge(df_class, left_index=True, right_on='source_name', how='inner', validate='1:1')
               .dropna(subset='best_fit'))
    df_full['class_phys'] = 'HII'
    df_full.loc[df_full['class'] == '70w', 'class_phys'] = 'Quiescent'
    df_full.loc[df_full['class'] == 'IRw', 'class_phys'] = 'Protostellar'
    df_full.loc[df_full['class'] == 'IRb', 'class_phys'] = 'MYSOs'
    return df_full


def get_profile_input(
        model_root_folder: str,
        species: Union[None, str] = None) -> Tuple[dict, dict]:
    overrides = config_overrides[model_root_folder.split(os.path.sep)[-1]]
    _species = validate_parameter(species, default='e-ch3oh')
    try:
        overrides_grid = overrides['grid']
    except KeyError:
        overrides_grid = {}
    try:
        overrides_lines = overrides['lines']
    except KeyError:
        overrides_lines = {}
    overrides_abundance = {}

    try:
        overrides_abundance['molecular_abundance'] = overrides_lines['molecular_abundances'][_species]
    except KeyError:
        pass

    for key in ['abundance_jump', 'threshold']:
        try:
            overrides_abundance[key] = overrides_lines['hot_core_specs'][_species][key]
        except KeyError:
            pass

    return overrides_grid, overrides_abundance

etl/config/config.yml

0 → 100644
+14 −0
Original line number Diff line number Diff line
run_type: constant_abundance_p15_q05

computation:
    threads: 10

overrides:
    dust_temperature_grid_type: 'linear'
    dust_temperature_limits: [10, 35]
    dust_temperature_step: 2.5
    gas_density_grid_type: 'log'
    gas_density_limits: [1e3, 1e7]
    gas_density_step: 3
    gas_density_unit: cm^-3
    lines_to_process: [['87', '86'], ['88', '87'], ['88', '86'], ['257', '256'], ['381', '380']]
+473 −0
Original line number Diff line number Diff line
best_bandwidths:
  257-256: 0.4
  381-380: 0.2
  87-86: 0.1
  88-86: 0.1
  88-87: 0.1
integrated_intensities_uncertainties:
  '256':
    G08.71-0.41: 0.3772371124870143
    G10.45-0.02: 0.3438149090944644
    G10.47+0.03: 4.67062615566759
    G10.62-0.38: 5.717801509967671
    G12.81-0.20: 2.088877254036277
    G13.18+0.06: 1.43381518306444
    G13.66-0.60: 1.1620189309770297
    G14.11-0.57: 0.3933488163719442
    G14.19-0.19: 1.9966333535808136
    G14.49-0.14: 0.5232957299744114
    G14.63-0.58: 0.6760059683722098
    G15.03-0.67: 1.3201864103978675
    G18.61-0.07: 0.449933406945426
    G18.73-0.23: 1.1925529389006952
    G18.89-0.47: 1.2420175138135696
    G19.88-0.54: 1.1775666735052563
    G22.37+0.45: 0.8397889239422417
    G23.21-0.38: 2.366268748274346
    G28.56-0.24: 0.7853900725084517
    G28.86+0.07: 0.5
    G30.85-0.08: 0.5698331975250233
    G31.41+0.31: 2.794406439049449
    G34.26+0.15: 5.571485634832507
    G34.40+0.23: 2.4222527304195376
    G34.41+0.23: 2.455950237377704
    G34.82+0.35: 0.1947198188044887
    G35.20-0.74: 1.9116488388286448
    G37.55+0.20: 0.7800057066048043
    G49.49-0.39: 13.79415939155848
    G53.14+0.07: 0.9832922403050368
    G59.78+0.07: 0.5
  '257':
    G08.71-0.41: 0.4136351969963581
    G10.45-0.02: 0.3567836016634692
    G10.47+0.03: 4.737191542189759
    G10.62-0.38: 5.828217319934533
    G12.81-0.20: 2.159742388795141
    G13.18+0.06: 1.528153771626887
    G13.66-0.60: 1.2364636458899043
    G14.11-0.57: 0.4315807681683414
    G14.19-0.19: 2.1114871424317903
    G14.49-0.14: 0.5631062582040194
    G14.63-0.58: 0.7362604233371383
    G15.03-0.67: 1.3883181061379544
    G18.61-0.07: 0.4918960784434188
    G18.73-0.23: 1.2489705007902534
    G18.89-0.47: 1.30997858860221
    G19.88-0.54: 1.2556873263733366
    G22.37+0.45: 0.9012219713131504
    G23.21-0.38: 2.4463347346958884
    G28.56-0.24: 0.8256372552866202
    G28.86+0.07: 0.5
    G30.85-0.08: 0.5939589107713851
    G31.41+0.31: 2.830674270184918
    G34.26+0.15: 5.689969110997285
    G34.40+0.23: 2.5501966207773785
    G34.41+0.23: 2.586820094338446
    G34.82+0.35: 0.2153653834587912
    G35.20-0.74: 2.009289929523078
    G37.55+0.20: 0.8302500355158099
    G49.49-0.39: 13.996345324993044
    G53.14+0.07: 1.037736129080711
    G59.78+0.07: 0.5
  '380':
    G08.71-0.41: 0.1719132347425028
    G10.45-0.02: 0.3350748768496078
    G10.47+0.03: 2.5238526974413102
    G10.62-0.38: 1.9730421303059005
    G12.81-0.20: 0.4938645969760634
    G13.18+0.06: 0.3130453732259515
    G13.66-0.60: 0.3896020198637059
    G14.11-0.57: 0.1604869867792384
    G14.19-0.19: 0.3718943617922549
    G14.49-0.14: 0.3980987437341203
    G14.63-0.58: 0.1942770443673398
    G15.03-0.67: 0.4226996779770496
    G18.61-0.07: 0.3104707669438686
    G18.73-0.23: 0.3983825447868095
    G18.89-0.47: 0.3283273384950035
    G19.88-0.54: 0.297825582498799
    G22.37+0.45: 0.2435883035421827
    G23.21-0.38: 0.4720878606853434
    G28.56-0.24: 0.3689771528703325
    G28.86+0.07: 0.2133053661117121
    G30.85-0.08: 0.4002321844656681
    G31.41+0.31: 1.7677726204007878
    G34.26+0.15: 2.2053525649662755
    G34.40+0.23: 0.3157737464671486
    G34.41+0.23: 0.552143182260122
    G34.82+0.35: 0.3762696495121889
    G35.20-0.74: 0.3277361642527309
    G37.55+0.20: 0.3505786313165484
    G49.49-0.39: 6.458962659096731
    G53.14+0.07: 0.2659096119306583
    G59.78+0.07: 0.1958691834258987
  '381':
    G08.71-0.41: 0.1912145881150111
    G10.45-0.02: 0.348675803268881
    G10.47+0.03: 2.527338233134208
    G10.62-0.38: 2.000038955895338
    G12.81-0.20: 0.5078276831867471
    G13.18+0.06: 0.3327236010567068
    G13.66-0.60: 0.4023699128083662
    G14.11-0.57: 0.1778607797165793
    G14.19-0.19: 0.3901217659238757
    G14.49-0.14: 0.410805519891472
    G14.63-0.58: 0.2096518477508132
    G15.03-0.67: 0.4404189229260346
    G18.61-0.07: 0.3250267274896626
    G18.73-0.23: 0.4111428033705079
    G18.89-0.47: 0.3562705793927619
    G19.88-0.54: 0.3054447929243074
    G22.37+0.45: 0.2565267403612208
    G23.21-0.38: 0.47971744770208
    G28.56-0.24: 0.3858032381571517
    G28.86+0.07: 0.2244369344927569
    G30.85-0.08: 0.4287274799425559
    G31.41+0.31: 1.7849749705573348
    G34.26+0.15: 2.2213861922198466
    G34.40+0.23: 0.3283727163284978
    G34.41+0.23: 0.5631980961359404
    G34.82+0.35: 0.3873761358129582
    G35.20-0.74: 0.3362751271550414
    G37.55+0.20: 0.3531542428043672
    G49.49-0.39: 6.498419983328799
    G53.14+0.07: 0.278825242987693
    G59.78+0.07: 0.2103971057794001
  '86':
    G08.71-0.41: 0.1141338092612959
    G10.45-0.02: 0.1513227343419588
    G10.47+0.03: 0.2436313155314365
    G10.62-0.38: 0.2543853719744313
    G12.81-0.20: 0.2120144583210657
    G13.18+0.06: 0.1444849558699236
    G13.66-0.60: 0.1880743486996585
    G14.11-0.57: 0.0991920101690233
    G14.19-0.19: 0.2591102799195194
    G14.49-0.14: 0.1932137455379076
    G14.63-0.58: 0.0920867956240891
    G15.03-0.67: 0.1519740165468335
    G18.61-0.07: 0.1336149687864388
    G18.73-0.23: 0.1544601974519147
    G18.89-0.47: 0.284814022368405
    G19.88-0.54: 0.1238378758928978
    G22.37+0.45: 0.1935045511182322
    G23.21-0.38: 0.1601721389776916
    G28.56-0.24: 0.1699145015331222
    G28.86+0.07: 0.1037452815160559
    G30.85-0.08: 0.1481775324641313
    G31.41+0.31: 0.2194502880473983
    G34.26+0.15: 0.1627530591328137
    G34.40+0.23: 0.1351065672329636
    G34.41+0.23: 0.2465062371409508
    G34.82+0.35: 0.0865664840020827
    G35.20-0.74: 0.1340658506805993
    G37.55+0.20: 0.1253184895008262
    G49.49-0.39: 0.2791842617996957
    G53.14+0.07: 0.0768660912353171
    G59.78+0.07: 0.0574168367111523
  '87':
    G08.71-0.41: 0.111937365388756
    G10.45-0.02: 0.1491063536449446
    G10.47+0.03: 0.2419807697773685
    G10.62-0.38: 0.2507814699869695
    G12.81-0.20: 0.2090805176901697
    G13.18+0.06: 0.1418278810377906
    G13.66-0.60: 0.1841371839860628
    G14.11-0.57: 0.097470593166154
    G14.19-0.19: 0.2519127822081297
    G14.49-0.14: 0.188762628907307
    G14.63-0.58: 0.0892839165361122
    G15.03-0.67: 0.1499962128329128
    G18.61-0.07: 0.1315931235851593
    G18.73-0.23: 0.1517892338894
    G18.89-0.47: 0.2784484436939946
    G19.88-0.54: 0.121808079112921
    G22.37+0.45: 0.1892876467707792
    G23.21-0.38: 0.1576533484208159
    G28.56-0.24: 0.1640239796382693
    G28.86+0.07: 0.1023581134022235
    G30.85-0.08: 0.145424188960802
    G31.41+0.31: 0.2170064162555401
    G34.26+0.15: 0.16145840413561
    G34.40+0.23: 0.1324170108062351
    G34.41+0.23: 0.2427485184738539
    G34.82+0.35: 0.0851539809088609
    G35.20-0.74: 0.1319126641393791
    G37.55+0.20: 0.122848111292009
    G49.49-0.39: 0.2773996831748604
    G53.14+0.07: 0.075643530717601
    G59.78+0.07: 0.0555749088316803
  '88':
    G08.71-0.41: 0.1118095857454675
    G10.45-0.02: 0.1483259532833787
    G10.47+0.03: 0.2409668716291754
    G10.62-0.38: 0.2495329007627918
    G12.81-0.20: 0.2084495919578603
    G13.18+0.06: 0.1414209323698791
    G13.66-0.60: 0.1831553534762108
    G14.11-0.57: 0.0972920392752167
    G14.19-0.19: 0.2507189304794556
    G14.49-0.14: 0.1882827587313301
    G14.63-0.58: 0.0890047760093749
    G15.03-0.67: 0.1495172793656858
    G18.61-0.07: 0.1313898333367211
    G18.73-0.23: 0.1513505926924855
    G18.89-0.47: 0.2779301291723239
    G19.88-0.54: 0.121507432969849
    G22.37+0.45: 0.1889958485649907
    G23.21-0.38: 0.1568877828888521
    G28.56-0.24: 0.1635890088005906
    G28.86+0.07: 0.1020862031904043
    G30.85-0.08: 0.1451689199961562
    G31.41+0.31: 0.2158655413134478
    G34.26+0.15: 0.1607951465382442
    G34.40+0.23: 0.1320891607520888
    G34.41+0.23: 0.2421195456012388
    G34.82+0.35: 0.0850472508528964
    G35.20-0.74: 0.1316317053747658
    G37.55+0.20: 0.1222906026734552
    G49.49-0.39: 0.2758738141000653
    G53.14+0.07: 0.0755106793427082
    G59.78+0.07: 0.0553352170005506
measured_integrated_intensities:
  '256':
    G08.71-0.41: 0.2225977131194544
    G10.45-0.02: 0.6483658492610417
    G10.47+0.03: 12.781079959863508
    G10.62-0.38: 14.026812217900536
    G12.81-0.20: 4.594177385112016
    G13.18+0.06: 2.2560621827614065
    G13.66-0.60: 2.2460391103183417
    G14.11-0.57: 0.4209070251778891
    G14.19-0.19: 3.080405790891515
    G14.49-0.14: 0.7458559310555399
    G14.63-0.58: 1.2815176351639286
    G15.03-0.67: 2.5627713663667446
    G18.61-0.07: 0.6336927890231682
    G18.73-0.23: 1.7197328016314029
    G18.89-0.47: 2.044797825251944
    G19.88-0.54: 2.689820011325686
    G22.37+0.45: 1.0252721670025875
    G23.21-0.38: 5.128448315345341
    G28.56-0.24: 0.3548360822544717
    G28.86+0.07: 0.1
    G30.85-0.08: 0.5697324971999391
    G31.41+0.31: 8.11815427336179
    G34.26+0.15: 14.800657803071704
    G34.40+0.23: 3.986394686990907
    G34.41+0.23: 5.298745541451453
    G34.82+0.35: 0.2244740966733627
    G35.20-0.74: 3.557121906245741
    G37.55+0.20: 1.45314493723405
    G49.49-0.39: 36.47299209277803
    G53.14+0.07: 1.769951086391564
    G59.78+0.07: 0.1
  '257':
    G08.71-0.41: 1.6896248544507602
    G10.45-0.02: 1.6172560812119725
    G10.47+0.03: 17.90790753460651
    G10.62-0.38: 23.51945555648909
    G12.81-0.20: 9.576422834532323
    G13.18+0.06: 6.614500597410293
    G13.66-0.60: 5.099529057565239
    G14.11-0.57: 2.328801003580471
    G14.19-0.19: 8.648952235029466
    G14.49-0.14: 2.4117084959115784
    G14.63-0.58: 4.22783491577442
    G15.03-0.67: 6.482321394374812
    G18.61-0.07: 2.119319392382522
    G18.73-0.23: 4.953380016604151
    G18.89-0.47: 5.763854872422209
    G19.88-0.54: 6.538781173715317
    G22.37+0.45: 3.680142077328448
    G23.21-0.38: 9.457619850247136
    G28.56-0.24: 2.68014308698633
    G28.86+0.07: 0.1
    G30.85-0.08: 2.087101701007916
    G31.41+0.31: 10.657245054323353
    G34.26+0.15: 23.76415857208871
    G34.40+0.23: 10.868969226850243
    G34.41+0.23: 11.460979034617344
    G34.82+0.35: 1.0392490902135514
    G35.20-0.74: 9.236714367808798
    G37.55+0.20: 3.44395156070276
    G49.49-0.39: 53.88508148720575
    G53.14+0.07: 4.882683930286012
    G59.78+0.07: 0.1
  '380':
    G08.71-0.41: 0.1764976739921008
    G10.45-0.02: 0.3956084197099886
    G10.47+0.03: 17.205686694830824
    G10.62-0.38: 16.829857896249838
    G12.81-0.20: 3.77800331658647
    G13.18+0.06: 1.7426684781184258
    G13.66-0.60: 2.050893367850886
    G14.11-0.57: 0.4213659117392063
    G14.19-0.19: 3.0914972380702115
    G14.49-0.14: 0.5203058400694469
    G14.63-0.58: 0.7159602280790358
    G15.03-0.67: 2.1151044891765216
    G18.61-0.07: 0.6989085703534818
    G18.73-0.23: 2.341561030185213
    G18.89-0.47: 1.1824614382740015
    G19.88-0.54: 5.43307241666044
    G22.37+0.45: 0.8920116613769251
    G23.21-0.38: 6.836653617571014
    G28.56-0.24: 0.0440146014079702
    G28.86+0.07: 1.4210884372776502
    G30.85-0.08: 0.5707347171006769
    G31.41+0.31: 12.47776135190902
    G34.26+0.15: 22.863445215418928
    G34.40+0.23: 3.531757873095086
    G34.41+0.23: 7.573594043835107
    G34.82+0.35: 0.5068421046687672
    G35.20-0.74: 3.735886344534172
    G37.55+0.20: 2.1481844033314563
    G49.49-0.39: 59.72041757348511
    G53.14+0.07: 2.1621252984074437
    G59.78+0.07: 0.9953421793454204
  '381':
    G08.71-0.41: 0.6722454928574747
    G10.45-0.02: 0.9251788351499816
    G10.47+0.03: 18.155676944590947
    G10.62-0.38: 26.31767222497715
    G12.81-0.20: 6.359416798755954
    G13.18+0.06: 5.105986039174396
    G13.66-0.60: 3.5815529807897217
    G14.11-0.57: 1.4017924550811132
    G14.19-0.19: 7.297072065038203
    G14.49-0.14: 1.440371736841573
    G14.63-0.58: 3.008997339860133
    G15.03-0.67: 5.842826943986542
    G18.61-0.07: 1.8679581479443947
    G18.73-0.23: 4.779917207406943
    G18.89-0.47: 3.8325153184359935
    G19.88-0.54: 10.285222566660105
    G22.37+0.45: 2.6850343064569504
    G23.21-0.38: 9.739053502995969
    G28.56-0.24: 0.9680884829289998
    G28.86+0.07: 2.7304343388726755
    G30.85-0.08: 1.7951582288444188
    G31.41+0.31: 15.666989012027273
    G34.26+0.15: 27.670510995076157
    G34.40+0.23: 8.838480648374599
    G34.41+0.23: 12.543875121955834
    G34.82+0.35: 0.912504455201019
    G35.20-0.74: 8.213603364738846
    G37.55+0.20: 2.427780024280013
    G49.49-0.39: 71.80734717626261
    G53.14+0.07: 4.9231567416211615
    G59.78+0.07: 2.24196075698127
  '86':
    G08.71-0.41: 5.770415738358244
    G10.45-0.02: 3.585944537913708
    G10.47+0.03: 19.61767281185442
    G10.62-0.38: 17.24610289004276
    G12.81-0.20: 8.431310860817968
    G13.18+0.06: 10.867003106610996
    G13.66-0.60: 8.534961548014586
    G14.11-0.57: 3.616973808521042
    G14.19-0.19: 15.749893351960294
    G14.49-0.14: 6.365582995343145
    G14.63-0.58: 5.623662981046849
    G15.03-0.67: 4.060482703151656
    G18.61-0.07: 4.924693385096532
    G18.73-0.23: 10.095062635647638
    G18.89-0.47: 13.902280291349596
    G19.88-0.54: 7.349704071521969
    G22.37+0.45: 8.934333184369493
    G23.21-0.38: 10.589381466381129
    G28.56-0.24: 7.718943160772005
    G28.86+0.07: 3.744479978305277
    G30.85-0.08: 3.600081120187853
    G31.41+0.31: 10.334408688143643
    G34.26+0.15: 17.148782015804713
    G34.40+0.23: 14.72159545617476
    G34.41+0.23: 17.151716147853218
    G34.82+0.35: 2.3557453894152305
    G35.20-0.74: 11.441136922618975
    G37.55+0.20: 5.218930784342653
    G49.49-0.39: 40.047671620058914
    G53.14+0.07: 3.998167013285548
    G59.78+0.07: 2.88330892529155
  '87':
    G08.71-0.41: 1.3070165152749178
    G10.45-0.02: 1.8754428022340208
    G10.47+0.03: 14.654313712932716
    G10.62-0.38: 10.36368884187998
    G12.81-0.20: 4.037590978170881
    G13.18+0.06: 3.998842732080239
    G13.66-0.60: 3.965098726041119
    G14.11-0.57: 1.0980674058983302
    G14.19-0.19: 5.9799020315281455
    G14.49-0.14: 1.94278246727516
    G14.63-0.58: 1.7565446299584897
    G15.03-0.67: 1.9599610541713943
    G18.61-0.07: 1.4529504405718856
    G18.73-0.23: 3.946558643004384
    G18.89-0.47: 3.87746848292546
    G19.88-0.54: 2.725369130194149
    G22.37+0.45: 2.2334635415722928
    G23.21-0.38: 5.3215576604812105
    G28.56-0.24: 2.00135317412325
    G28.86+0.07: 1.6724447166657392
    G30.85-0.08: 1.0384292597032818
    G31.41+0.31: 7.634235223264329
    G34.26+0.15: 11.79339165343781
    G34.40+0.23: 4.893358882791944
    G34.41+0.23: 6.975706242571226
    G34.82+0.35: 0.6233212838879257
    G35.20-0.74: 4.120487302502623
    G37.55+0.20: 2.3647012203938407
    G49.49-0.39: 30.638278402401056
    G53.14+0.07: 1.2755372740021984
    G59.78+0.07: 1.051817414971558
  '88':
    G08.71-0.41: 0.2555605423861423
    G10.45-0.02: 0.5654586375969508
    G10.47+0.03: 10.621236220690491
    G10.62-0.38: 6.590559744325095
    G12.81-0.20: 2.2391932908285748
    G13.18+0.06: 1.043058241991689
    G13.66-0.60: 1.388687476672136
    G14.11-0.57: 0.2400487988472261
    G14.19-0.19: 1.3734457205383674
    G14.49-0.14: 0.1719527527259724
    G14.63-0.58: 0.6227318765911359
    G15.03-0.67: 0.9484169569327036
    G18.61-0.07: 0.2409772077030058
    G18.73-0.23: 1.43132088364319
    G18.89-0.47: 1.240400088439042
    G19.88-0.54: 0.940430197609148
    G22.37+0.45: 0.4850629557476013
    G23.21-0.38: 1.900341099965392
    G28.56-0.24: 0.3811019169488633
    G28.86+0.07: 0.8361276696375168
    G30.85-0.08: 0.2353506461831835
    G31.41+0.31: 6.015384520155143
    G34.26+0.15: 7.861370828025846
    G34.40+0.23: 1.3090143608612992
    G34.41+0.23: 3.094135827250596
    G34.82+0.35: 0.1891164402500769
    G35.20-0.74: 1.7277506260977236
    G37.55+0.20: 0.934902424028627
    G49.49-0.39: 19.50272354704647
    G53.14+0.07: 0.4187857086494883
    G59.78+0.07: 0.4564477452483653
probability_threshold: 0.37
ratio_limits: {
  '87-86': [ 0, 1.7 ],
  '88-86': [ 0, 1.7 ],
  '88-87': [ 0, 1.7 ],
  '257-256': [ 0, 15 ],
  '381-380': [ 0, 4 ],
}
ratios_to_include:
- 87-86
- 88-86
- 88-87
- 257-256
- 381-380
recompute_kde: true
simulated_ratio_realizations: 1000
use_model_for_inference: PLACEHOLDER
+71 −0
Original line number Diff line number Diff line
retrain: True
use_model_for_training: constant_abundance_p15_q05

overrides: {
  'tdust_grid_type': 'linear',
  'tdust_limits': [ 10, 35 ],
  'tdust_step': 2.5,
  'nh2_grid_type': 'log',
  'nh2_limits': [ 1e3, 6.5e7 ],
  'nh2_step': 1.316074013
}

#model_type: 'XGBoost'
#model_parameters: {
#  'n_estimators': 300,
#  'max_depth': 4,
#  'learning_rate': 0.02,
#  'subsample': 0.5
#}

use_validation: True

model_type: 'XGBoost_gridsearch'
model_parameters: {
  'param_grid':
    {
      'n_estimators': [ 300, 320, 50 ],
      'max_depth': [ 5, 7, 2 ],
      'learning_rate': [ 0.05, 0.1, 0.03 ],
      'subsample': [ 1, 2, 1 ],
      'lambda': [ 10, 20, 10 ],
      'colsample_bynode': [ 0.75, 0.8, 0.1 ],
#      'colsample_bylevel': [ 0.5, 1, 0.1 ],
      'colsample_bytree': [ 0.75, 0.8, 0.2 ],
      'min_child_weight': [ 100, 200, 100 ]
    },
  'param_gridsearch':
    {
      'cv': 5,
      'n_jobs': 16
    },
}
#model_parameters: {
#  'param_grid':
#    {
#      'n_estimators': [ 400, 900, 100 ],
#      'max_depth': [ 3, 7, 2 ],
#      'learning_rate': [ 0.01, 0.06, 0.02 ],
#      'subsample': [ 0.3, 0.9, 0.2 ]
#    },
#  'param_gridsearch':
#    {
#      'cv': 3,
#      'n_jobs': 16
#    },
#}

#model_type: 'auto_skl'
#model_parameters: {
#  'time_left_for_this_task': 3600,
#  'per_run_time_limit': 600,
#  'n_jobs': 12
#}

#model_type: 'RandomForest'
#model_parameters: {
#  'n_estimators': 20,
#  'criterion': 'friedman_mse',
#  'max_samples': 0.5
#}
Original line number Diff line number Diff line
# add a db_credentials.yml file with your credentials. The structure should be:
#DB_USER: db_user B64-encoded
#DB_PASS: db_pass B64-encoded
#DB_HOST: db_host
#DB_NAME: db_name
#DB_PORT: db_port
 No newline at end of file

etl/main.py

0 → 100644
+384 −0
Original line number Diff line number Diff line
import glob
import os
import uuid
import sqlalchemy
import argparse
import sys
from multiprocessing import Pool
from stg.stg_build_db_structure import init_db, TmpExecutionQueue
from itertools import product, chain
from typing import Union, Tuple, Iterator
from assets.commons import (cleanup_directory,
                            setup_logger,
                            validate_parameter)
from assets.commons.parsing import parse_input_main
from assets.commons.db_utils import upsert, get_pg_engine
from stg.stg_radmc_input_generator import main as stg_main
from mdl.mdl_execute_radmc_command import main as execute_radmc_script
from prs.prs_compute_integrated_fluxes_and_ratios import main as prs_main
from prs.prs_inspect_results import main as prs_inspection_main


def compute_full_grid(tdust: float,
                      nh2: float,
                      line: int,
                      density_keyword: str,
                      dust_temperature_keyword: str) -> Tuple[float, float, int, str]:
    """
        Compute the full grid for a given dust temperature, hydrogen density, and line identifier, and return the results.

        :param tdust: The dust temperature.
        :param nh2: The H2 number density.
        :param line: The line identifier for the RADMC-3D observation.
        :param density_keyword: The keyword for the density in the grid configuration.
        :param dust_temperature_keyword: The keyword for the dust temperature in the grid configuration.
        :return: A tuple containing the dust temperature, H2 number density, line identifier, and the name of the
            resulting FITS file.
    """
    scratch_dir = os.path.join('mdl', 'scratches', str(uuid.uuid4()))
    stg_overrides = {
        'grid': {
            dust_temperature_keyword: tdust,
            density_keyword: nh2,
        }
    }
    overrides = {
        'grid_lines': stg_overrides,
        'model': {
            'radmc_observation': {
                'iline': line
            }
        }
    }
    tarname = stg_main(override_config=overrides,
                       path_radmc_files=scratch_dir,
                       run_id=run_id)
    cube_fits_name = execute_radmc_script(grid_zipfile=tarname,
                                          override_config=overrides,
                                          radmc_input_path=scratch_dir,
                                          run_id=run_id)
    return tdust, nh2, line, cube_fits_name


def initialize_queue(engine: sqlalchemy.engine,
                     run_id: str,
                     run_arguments: Iterator):
    """
        Initialize the execution queue for a specific run ID with given run arguments if not already initialized.

        :param engine: The SQLAlchemy engine used to interact with the database.
        :param run_id: The unique identifier for the run.
        :param run_arguments: An iterator of run arguments, each containing dust temperature, density, line,
            density keyword, and dust temperature keyword.
        :return: None. Inserts entries into the execution queue if the queue is not already initialized.
    """
    is_initialized = engine.execute(f"select count(*) from tmp_execution_queue where run_id='{run_id}'").first()[0] != 0
    if is_initialized is False:
        for arguments in run_arguments:
            raw_insert_entry = {'run_id': run_id,
                                'dust_temperature': arguments[0],
                                'density': arguments[1],
                                'line': arguments[2],
                                'density_keyword': arguments[3],
                                'dust_temperature_keyword': arguments[4],
                                'done': False}
            upsert(
                table_object=TmpExecutionQueue,
                row_dict=raw_insert_entry,
                conflict_keys=[
                    TmpExecutionQueue.run_id,
                    TmpExecutionQueue.dust_temperature,
                    TmpExecutionQueue.density,
                    TmpExecutionQueue.line,
                    TmpExecutionQueue.density_keyword,
                    TmpExecutionQueue.dust_temperature_keyword
                ],
                engine=engine
            )


def get_run_pars(engine: sqlalchemy.engine,
                 run_id: str):
    """
        Get and mark the next pending row from the execution queue associated with the given run ID as done.

        :param engine: The SQLAlchemy engine used to interact with the database.
        :param run_id: The unique identifier for the run.
        :return: The row corresponding to the next pending task in the execution queue not marked as done, or None if there
            are no more pending tasks.
    """
    sql_query = sqlalchemy.text(f"""UPDATE tmp_execution_queue 
                SET done = true 
                WHERE row_id = (SELECT row_id 
                                   FROM tmp_execution_queue
                                   WHERE (run_id = '{run_id}')
                                        AND (done is false)
                                        AND pg_try_advisory_xact_lock(row_id) 
                                   LIMIT 1 FOR UPDATE) 
                RETURNING *""")
    return engine.execution_options(autocommit=True).execute(sql_query).first()


def verify_run(engine: sqlalchemy.engine,
               run_id: str):
    """
        Verify the completion status of a run by resetting completed but unfinished tasks to pending.

        :param engine: The SQLAlchemy engine used to interact with the database.
        :param run_id: The unique identifier for the run.
        :return: True if all completed tasks for the run have associated FITS cube names, indicating completion,
            otherwise False.
    """
    sql_query = sqlalchemy.text(f"""UPDATE tmp_execution_queue 
                SET done = false
                WHERE row_id in (SELECT row_id 
                                   FROM tmp_execution_queue
                                   WHERE run_id = '{run_id}'
                                        AND (done is true)
                                        AND (fits_cube_name is null))""")
    engine.execution_options(autocommit=True).execute(sql_query)
    _remaining_models = compute_remaining_models(run_id=run_id)
    return True if _remaining_models == 0 else False


def insert_fits_name(engine: sqlalchemy.engine,
                     row_id: int,
                     fits_cube_name: str):
    """
        Insert the FITS cube name into the row of the execution queue with the specified row ID.

        :param engine: The SQLAlchemy engine used to interact with the database.
        :param row_id: The unique identifier for the row in the execution queue.
        :param fits_cube_name: The name of the FITS cube associated with the row.
        :return: None. Updates the row in the execution queue with the FITS cube name.
    """
    sql_query = sqlalchemy.text(f"""UPDATE tmp_execution_queue 
                SET fits_cube_name = '{fits_cube_name}'
                WHERE row_id = {row_id}""")
    engine.execution_options(autocommit=True).execute(sql_query)


def compute_grid_elements(run_id: str):
    """
        Compute grid elements for a given run ID by initializing the execution queue with parallel arguments.

        :param run_id: The unique identifier for the run.
        :return: None. Initializes the execution queue for the specified run ID.
    """
    init_db()
    parallel_args, _ = get_parallel_args_and_nprocesses()
    engine = get_pg_engine(logger=logger)
    initialize_queue(engine=engine,
                     run_id=run_id,
                     run_arguments=parallel_args)
    engine.dispose()


def get_parallel_args_and_nprocesses() -> Tuple[Iterator, int]:
    """
        Get parallel computation arguments and the number of processes for computation.

        :return: A tuple containing an iterator of parallel arguments and the number of processes to use.
    """
    _tdust_model_type, _model_type, dust_temperatures, densities, line_pairs, n_processes, _ = parse_input_main()
    line_set = set(chain.from_iterable(line_pairs))
    density_keyword = 'central_density' if _model_type == 'homogeneous' else 'density_at_reference'
    dust_temperature_keyword = 'dust_temperature' if _tdust_model_type == 'isothermal' else 'dust_temperature_at_reference'
    parallel_args = product(dust_temperatures, densities, line_set, [density_keyword], [dust_temperature_keyword])
    return parallel_args, n_processes


def compute_model(run_id: str):
    """
        Compute a model associated with a given run ID from parameters retrieved from the execution queue.

        :param run_id: The unique identifier for the run.
        :return: None. Computes a model and updates the database with the associated FITS cube name if parameters are
            available in the execution queue.
    """
    engine = get_pg_engine(logger=logger)
    parameters_set = get_run_pars(engine=engine,
                                  run_id=run_id)
    engine.dispose()
    if parameters_set is not None:
        _, _, _, fits_cube_name = compute_full_grid(tdust=parameters_set[2],
                                                    nh2=parameters_set[3],
                                                    line=parameters_set[4],
                                                    density_keyword=parameters_set[5],
                                                    dust_temperature_keyword=parameters_set[6])
        engine = get_pg_engine(logger=logger)
        insert_fits_name(engine=engine,
                         row_id=parameters_set[0],
                         fits_cube_name=fits_cube_name)
        engine.dispose()
    else:
        logger.info('All models were completed.')


def initialize_run():
    """
        Initialize a new run by generating a run ID if not provided, computing grid elements for the run,
        and saving the run ID to a file for future reference.

        :return: The generated or provided run ID.
    """
    if args.run_id is not None:
        run_id = args.run_id
    else:
        logger.info('Generating new run_id')
        run_id = str(uuid.uuid4())
    compute_grid_elements(run_id=run_id)
    sys.stdout.write(run_id)
    with open('run_id.txt', 'w') as run_id_file:
        run_id_file.write(f'{run_id}\n')
    return run_id


def compute_remaining_models(run_id: Union[None, str] = None) -> int:
    """
        Compute the number of pending models for a given run ID.

        :param run_id: Optional. The unique identifier for the run. If None, it defaults to the value of the
            'run_id' environment variable.
        :return: The number of remaining models.
    """
    _run_id = validate_parameter(run_id, default=os.getenv('run_id'))
    logger.info(_run_id)
    sql_query = sqlalchemy.text(f"""SELECT count(*)
                                    FROM tmp_execution_queue
                                    WHERE (run_id = '{run_id}')
                                        AND ((done is false)
                                        OR ((done is true) AND (fits_cube_name is null)))""")
    engine = get_pg_engine(logger=logger)
    n_models = engine.execution_options(autocommit=True).execute(sql_query).first()[0]
    engine.dispose()
    sys.stdout.write(str(n_models))
    return n_models


def get_results(engine: sqlalchemy.engine,
                run_id: str):
    """
        Retrieve the results of a given run ID from the execution queue.

        :param engine: The SQLAlchemy engine used to interact with the database.
        :param run_id: The unique identifier for the run.
        :return: A list of tuples containing the dust temperature, density, line, and FITS cube name for each model
            associated with the run ID.
    """
    sql_query = sqlalchemy.text(f"""SELECT dust_temperature
                                           , density
                                           , line
                                           , fits_cube_name
                                    FROM tmp_execution_queue
                                    WHERE run_id = '{run_id}'""")
    return engine.execution_options(autocommit=True).execute(sql_query).all()


def cleanup_tmp_table(run_id: str,
                      engine: sqlalchemy.engine):
    """
        Cleanup the temporary execution queue for a given run ID.

        :param run_id: The unique identifier for the run.
        :param engine: The SQLAlchemy engine used to interact with the database.
        :return: None. Deletes all rows from the execution queue associated with the specified run ID.
    """
    sql_query = sqlalchemy.text(f"""DELETE
                                    FROM tmp_execution_queue
                                    WHERE run_id = '{run_id}'""")
    return engine.execution_options(autocommit=True).execute(sql_query)


def main_presentation_step(run_id: str,
                           cleanup_scratches: bool = True,
                           results_dict: Union[dict, None] = None) -> bool:
    _tdust_model_type, _model_type, dust_temperatures, densities, line_pairs, n_processes, run_type = parse_input_main()

    for folder in ('data', 'figures', 'trained_model'):
        os.makedirs(os.path.join(
            'prs',
            'output',
            'run_type',
            run_type,
            folder
        ), exist_ok=True)

    engine = get_pg_engine(logger=logger)
    _results_dict = validate_parameter(results_dict,
                                       default=get_results(engine=engine,
                                                           run_id=run_id))

    results_map = {}
    for (tdust, nh2, line, cube_fits_name) in _results_dict:
        results_map[f'{str(nh2)}_{str(tdust)}_{line}'] = cube_fits_name

    for line_pair in line_pairs:
        for tdust, nh2 in product(dust_temperatures, densities):
            prs_main(cube_fits_list=[results_map[f'{str(nh2)}_{str(tdust)}_{line_pair[0]}'],
                                     results_map[f'{str(nh2)}_{str(tdust)}_{line_pair[1]}']],
                     run_id=run_id,
                     engine=engine)

    if cleanup_scratches is True:
        scratches_dirs = glob.glob(os.path.join('mdl', 'scratches', '*'))
        for scratches in scratches_dirs:
            cleanup_directory(directory=scratches, logger=logger)

    prs_inspection_main(run_id=run_id,
                        is_isothermal=_tdust_model_type == 'isothermal',
                        is_homogeneous=_model_type == 'homogeneous',
                        engine=engine,
                        run_type=run_type)
    _run_success = verify_run(run_id=run_id,
                              engine=engine)
    if _run_success is True:
        cleanup_tmp_table(run_id=run_id,
                          engine=engine)
    engine.dispose()
    return _run_success


def process_models(distributed: bool = False) -> Tuple[Union[None, dict], int]:
    """
        Process models either in a distributed environment or on a single machine (with multiprocessing).

        :param distributed: A boolean flag indicating whether to process models in parallel. Defaults to False.
        :return: A tuple containing results (if processed in parallel) and the number of remaining models.
    """
    if distributed is True:
        compute_model(run_id=run_id)
        results = None
        remaining_models = compute_remaining_models(run_id)
    else:
        parallel_args, n_processes = get_parallel_args_and_nprocesses()
        with Pool(n_processes) as pool:
            results = pool.starmap(compute_full_grid, parallel_args)
        remaining_models = 0
    return results, remaining_models


logger = setup_logger(name='MAIN')
parser = argparse.ArgumentParser()
parser.add_argument('--run_id')
parser.add_argument('--cleanup_scratches')
parser.add_argument('--distributed')
args = parser.parse_args()

if __name__ == '__main__':
    run_id = initialize_run()
    assert run_id is not None
    _distributed = validate_parameter(args.distributed, default='false').lower() == 'true'

    results, remaining_models = process_models(distributed=_distributed)
    if remaining_models == 0:
        logger.info('All grid points processed. Summarizing results.')
        _cleanup = validate_parameter(args.cleanup_scratches,
                                      default='true').lower() == 'true'
        _run_success = main_presentation_step(run_id=run_id,
                                              cleanup_scratches=_cleanup,
                                              results_dict=results)
        if (_run_success is True) or (_distributed is False):
            logger.info('The run completed successfully!')
        else:
            logger.error('The run was incomplete. I have reset the done flag in the database for incomplete models')
+16 −0
Original line number Diff line number Diff line
radmc_postprocessing:
    nphot: 1000000
    scattering_mode_max: 0
    iranfreqmode: 1
    tgas_eq_tdust: 1
    lines_slowlvg_as_alternative: 1

radmc_observation:
    inclination: 0
    position_angle: 0
    iline: 2
    width_kms: 10
    nchannels: 100
    npix: 101
    threads: 12
    image_size_pc: 2
+7 −0
Original line number Diff line number Diff line
2               Format number of this file
1               Nr of dust species
============================================================================
1               Way in which this dust species is read
0               0=Thermal grain
silicate        Extension of name of dustkappa_***.inp file
----------------------------------------------------------------------------
+249 −0
Original line number Diff line number Diff line
import os
import sys
from itertools import product
from typing import Union
import sqlalchemy
import shutil
from astropy import units as u
from datetime import datetime
from radmc3dPy import image
from stg.stg_build_db_structure import (LinePars,
                                        SpeciesAndPartners,
                                        ModelPars,
                                        StarsPars)
from assets.commons import (load_config_file,
                            validate_parameter,
                            setup_logger,
                            compute_unique_hash_filename,
                            get_value_if_specified,
                            cleanup_directory)
from assets.commons.parsing import read_abundance_variation_schema
from assets.commons.db_utils import upsert, get_pg_engine
from assets.constants import (radmc_options_mapping,
                              radmc_lines_mode_mapping)

logger = setup_logger(name='MDL')


def save_cube_as_fits(cube_out_name: Union[str, None] = None,
                      cube_out_path: Union[str, None] = None,
                      path_radmc_files: Union[str, None] = None):
    """
    Convert the RADMC output to fits
    :param cube_out_name: outfile name
    :param cube_out_path: outfile path
    :param path_radmc_files: path where the radmc input/output files are stored
    """
    _cube_out_name = validate_parameter(cube_out_name, default='test_cube.fits')
    _cube_out_path = validate_parameter(cube_out_path, default=os.path.join('prs', 'fits', 'cubes'))
    _path_radmc_files = validate_parameter(path_radmc_files, default=os.path.join('mdl', 'radmc_files'))
    imdata = image.readImage(fname=os.path.join(_path_radmc_files, 'image.out'))
    output_name = os.path.join(_cube_out_path, _cube_out_name)
    if os.path.isfile(output_name):
        os.remove(output_name)
    imdata.writeFits(fname=output_name)


def check_config(config: dict) -> bool:
    """
    Check if all mandatory keys are present in the configuration dictionary.
    :param config: the configuration dictionary
    :return: True is all keys are present, False otherwise
    """
    mandatory_keys = {
        'inclination',
        'position_angle',
    }
    return set(mandatory_keys).difference(set(config.keys())) == set()


def get_command_options(config: dict) -> set:
    """
    Parse the radmc3d command options from the configuration dictionary
    :param config: the configuration dictionary
    :return: a set of string to postpone to the radmc3d image command
    """
    options_list = []
    for key in config["radmc_observation"]:
        try:
            options_list.append(f'{radmc_options_mapping[key]} {config["radmc_observation"][key]}')
        except KeyError:
            pass
    return set(options_list)


def populate_model_table(config_mdl: dict,
                         grid_zipfile: str,
                         cube_filename: str,
                         engine: sqlalchemy.engine,
                         executed_on: datetime.timestamp,
                         run_id: str):
    """
    Populate the model_parameters table in the DB
    :param config_mdl: the model configuration
    :param grid_zipfile: the name of the grid tarfile
    :param cube_filename: the name of the fits cube
    :param engine: the SQLAlchemy engine
    :param executed_on: the timestamp of execution
    :param run_id: the run unique identifier
    """
    model_pars_dict = {
        'zipped_grid_name': grid_zipfile,
        'fits_cube_name': cube_filename,
        'nphotons': config_mdl['radmc_postprocessing']['nphot'],
        'scattering_mode_max': int(config_mdl['radmc_postprocessing']['scattering_mode_max']),
        'iranfreqmode': config_mdl['radmc_postprocessing']['iranfreqmode'],
        'tgas_eq_tdust': config_mdl['radmc_postprocessing']['tgas_eq_tdust'],
        'inclination': config_mdl['radmc_observation']['inclination'],
        'position_angle': config_mdl['radmc_observation']['position_angle'],
        'imolspec': get_value_if_specified(config_mdl['radmc_observation'], 'imolspec'),
        'iline': get_value_if_specified(config_mdl['radmc_observation'], 'iline'),
        'width_kms': get_value_if_specified(config_mdl['radmc_observation'], 'width_kms'),
        'nchannels': get_value_if_specified(config_mdl['radmc_observation'], 'nchannels'),
        'npix': get_value_if_specified(config_mdl['radmc_observation'], 'npix'),
        'created_on': executed_on,
        'run_id': run_id
    }
    upsert(
        table_object=ModelPars,
        row_dict=model_pars_dict,
        conflict_keys=[ModelPars.fits_cube_name, ModelPars.run_id],
        engine=engine
    )


def populate_species_and_partner_table(config_lines: dict,
                                       engine: sqlalchemy.engine,
                                       executed_on: datetime.timestamp,
                                       grid_zipfile: str,
                                       run_id: str):
    """
    Populate the species_and_partners table in the DB
    :param config_lines: the line configuration
    :param engine: the SQLAlchemy engine
    :param executed_on: the timestamp of execution
    :param grid_zipfile: the name of the grid tarfile
    :param run_id: the run unique identifier
    """
    hot_core_specs = read_abundance_variation_schema(line_config=config_lines)
    for (species, collision_partner) in product(config_lines['species_to_include'], config_lines['collision_partners']):
        species_partner_dict = {
            'zipped_grid_name': f'{grid_zipfile}',
            'species_to_include': species,
            'molecular_abundance': config_lines['molecular_abundances'][species],
            'threshold': hot_core_specs[species]['threshold'],
            'abundance_jump': hot_core_specs[species]['abundance_jump'],
            'collision_partner': collision_partner,
            'molecular_abundance_collision_partner': config_lines['molecular_abundances'][collision_partner],
            'created_on': executed_on,
            'run_id': run_id
        }
        upsert(
            table_object=SpeciesAndPartners,
            row_dict=species_partner_dict,
            conflict_keys=[SpeciesAndPartners.zipped_grid_name,
                           SpeciesAndPartners.species_to_include,
                           SpeciesAndPartners.collision_partner,
                           SpeciesAndPartners.run_id],
            engine=engine
        )


def populate_line_table(config_lines: dict,
                        engine: sqlalchemy.engine,
                        executed_on: datetime.timestamp,
                        grid_zipfile: str,
                        run_id: str):
    """
    Populate the lines table in the DB
    :param config_lines: the dictionary containing the line configuration
    :param engine: the SQLAlchemy engine to use
    :param executed_on: the timestamp of execution, to add to the record
    :param grid_zipfile: the grid tarfile name, to be used as key
    :param run_id: the run unique identifier
    """
    line_pars_dict = {
        'zipped_grid_name': f'{grid_zipfile}',
        'lines_mode': config_lines['lines_mode'],
        'created_on': executed_on,
        'run_id': run_id
    }
    upsert(
        table_object=LinePars,
        row_dict=line_pars_dict,
        conflict_keys=[LinePars.zipped_grid_name, LinePars.run_id],
        engine=engine
    )


def main(grid_zipfile: str,
         run_id: str,
         override_config: Union[dict, None] = None,
         radmc_input_path: Union[str, None] = None,
         engine: sqlalchemy.engine = None) -> str:
    # This is necessary, because the lines_mode is needed both in the lines.inp and radmc3d.inp files
    # The reason for splitting the main input file from the rest is that some parameters can be changed
    # independently of the grid for the modeling. The mdl hash should depend on all the mdl parameters, not a subset
    _override_config = validate_parameter(override_config, default={'grid_lines': {}, 'model': {}})
    executed_on = datetime.now()
    _radmc_input_path = validate_parameter(radmc_input_path, default=os.path.join('mdl', 'radmc_files'))
    config_stg = load_config_file(os.path.join('stg', 'config', 'config.yml'),
                                  override_config=_override_config['grid_lines'])
    config_lines = config_stg['lines']
    config_mdl = load_config_file(os.path.join('mdl', 'config', 'config.yml'),
                                  override_config=_override_config['model'])
    assert check_config(config=config_mdl['radmc_observation'])
    with open(os.path.join('mdl', 'radmc3d_postprocessing.sh'), 'w') as outfile:
        outfile.write(f'cd {_radmc_input_path}\n')
        options_set = get_command_options(config_mdl)
        radmc_command = f'radmc3d image {" ".join(options_set)}'
        outfile.write(radmc_command)

    config_full = config_mdl.copy()
    config_full.update(config_stg)
    cube_filename = f'{compute_unique_hash_filename(config=config_full)}.fits'

    # Execute radmc if not done already
    if not os.path.isfile(os.path.join('prs', 'fits', 'cubes', cube_filename)):
        logger.debug(f'Executing command: {radmc_command}')
        execution_dir = os.getcwd()
        os.chdir(_radmc_input_path)
        os.system(radmc_command)
        os.chdir(execution_dir)
        logger.debug(f'Checking presence of file: {os.path.join(_radmc_input_path, "image.out")}')
        assert os.path.isfile(os.path.join(_radmc_input_path, 'image.out'))

        save_cube_as_fits(cube_out_name=cube_filename,
                          cube_out_path=os.path.join('prs', 'fits', 'cubes'),
                          path_radmc_files=radmc_input_path)
    else:
        logger.info('Computation performed already! Skipping...')

    if engine is None:
        engine = get_pg_engine(logger=logger, engine_kwargs={'pool_size': 2})

    populate_line_table(config_lines=config_lines,
                        engine=engine,
                        executed_on=executed_on,
                        grid_zipfile=grid_zipfile,
                        run_id=run_id)

    populate_species_and_partner_table(config_lines=config_lines,
                                       engine=engine,
                                       executed_on=executed_on,
                                       grid_zipfile=grid_zipfile,
                                       run_id=run_id)

    populate_model_table(config_mdl=config_mdl,
                         grid_zipfile=grid_zipfile,
                         cube_filename=cube_filename,
                         engine=engine,
                         executed_on=executed_on,
                         run_id=run_id)
    engine.dispose()
    return cube_filename


if __name__ == '__main__':
    main(grid_zipfile='459295aa894dffa8c521e606d14dbb6927638a2c.zip',
         run_id='test_run')
+2 −0
Original line number Diff line number Diff line
Here SAK will store the radmc files for each run.
 No newline at end of file
+2 −0
Original line number Diff line number Diff line
Folder that contains the scratch files used by SAK in order to avoid concurrency when running in parallel.
 No newline at end of file
+7 −0
Original line number Diff line number Diff line
flux_computation:
    central_frequency: 230.000
    central_frequency_units: "GHz"
    integration_limits: "all"
    moment_zero_aggregation_function: 'sum'
    aggregation_function: 'weighted_mean'
 No newline at end of file
+2 −0
Original line number Diff line number Diff line
In this directory you will find the postprocessed fits cubes.
 No newline at end of file
+2 −0
Original line number Diff line number Diff line
In this directory you will find the fits files representing the model grid.
 No newline at end of file
+2 −0
Original line number Diff line number Diff line
In this directory you will find the moment zero images of the lines.
 No newline at end of file
+2 −0
Original line number Diff line number Diff line
In this directory you will find the imagrs of the line ratios.
 No newline at end of file
+2 −0
Original line number Diff line number Diff line
Here you will find the figure referring to the LTE column density approximation comparison.
 No newline at end of file

etl/prs/output/readme

0 → 100644
+2 −0
Original line number Diff line number Diff line
In this directory you will find the final (summarized) output of the run(s).
 No newline at end of file
+2 −0
Original line number Diff line number Diff line
Here you will find the results of the run, in the folder corresponding to the run type.
 No newline at end of file
+49 −0
Original line number Diff line number Diff line
import pandas as pd
import yaml
import os

df = pd.read_csv(os.path.join('assets', 'data', 'ch3oh_data_top35.csv'))

config_dict = {
    'simulated_ratio_realizations': 1000,
    'recompute_kde': True,
    'probability_threshold': 0.37,
    'best_bandwidths': {
        '87-86': 0.1,
        '88-86': 0.1,
        '88-87': 0.1,
        '257-256': 0.4,
        '381-380': 0.2,
    },
    'ratio_limits': {
        '87-86': [0, 1.7],
        '88-86': [0, 1.7],
        '88-87': [0, 1.7],
        '257-256': [0, 15],
        '381-380': [0, 4],
    },
    'use_model_for_inference': 'PLACEHOLDER',
    'ratios_to_include': [
        "87-86",
        "88-86",
        "88-87",
        "257-256",
        "381-380"
    ]
}

intensities_dict = {}
uncertainties_dict = {}

data_dict = df.to_dict(orient='records')
for line_id in ['86', '87', '88', '257', '256', '381', '380']:
    intensities_dict[line_id] = {}
    uncertainties_dict[line_id] = {}
    for row in data_dict:
        intensities_dict[line_id][str(row['source_name'])] = row[f'area_{line_id}']
        uncertainties_dict[line_id][str(row['source_name'])] = row[f'area_unc_{line_id}']
config_dict['measured_integrated_intensities'] = intensities_dict
config_dict['integrated_intensities_uncertainties'] = uncertainties_dict

with open('config/density_inference_input.yml', 'w') as outfile:
    yaml.dump(config_dict, outfile)
Original line number Diff line number Diff line
import numpy as np
import os
import matplotlib.pyplot as plt
import pickle
from scipy.interpolate import splrep, BSpline, RegularGridInterpolator
from scipy.optimize import curve_fit
from assets.commons import (get_data,
                            load_config_file,
                            setup_logger,
                            validate_parameter)
from typing import Union, List


def approx(x, x_shift, a, b, exponent, scale=1):
    return (np.abs(np.arctanh(2 * (x - x_shift) / scale) + a)) ** exponent + b


def main(ratios_to_fit: Union[List[str], None] = None):
    config = load_config_file(config_file_path=os.path.join('config', 'config.yml'))
    _ratio_to_fit = validate_parameter(
        param_to_validate=ratios_to_fit,
        default=['-'.join(lines_ratio) for lines_ratio in config['overrides']['lines_to_process']])
    data = get_data(limit_rows=None,
                    use_model_for_inference=config['run_type'])
    logger.info(f'Using {config["run_type"]} to fit data...')
    logger.info(f'Data contains {data.shape[0]} rows')

    bw = {
        '87-86': 0.1,
        '88-87': 0.1,
        '88-86': 0.1,
        '257-256': 0.4,
        '381-380': 0.2}

    x0 = {
        '87-86': ((0.55, 2.3, 3.75, 0.9),),
        '88-87': ((0.59, 2.3, 3.9, 1.05, 0.96),),
        '88-86': ((0.55, 2.5, 4.3, 0.83, 1.08),),
        '257-256': ((5, 2.5, 2.45, 0.75, 8), (5, 2.5, 3.33, 0.75, -8)),
        '381-380': ((2.04, 2.5, 3.1, 0.6, 2.1), (2.18, 2.5, 3.7, 0.79, -2.4)),
    }
    min_ratio = {
        '87-86': 0.08,
        '88-87': 0.08,
        '88-86': 0.03,
        '257-256': 1,
        '381-380': 1}
    max_ratio = {
        '87-86': 1.04,
        '88-87': 1.02,
        '88-86': 1.07,
        '257-256': 7.5,
        '381-380': 2.9}

    for ratio_string in _ratio_to_fit:
        _column_to_fit = f'ratio_{ratio_string}'
        data.sort_values(by=_column_to_fit, inplace=True)
        plt.clf()
        plt.scatter(data[_column_to_fit], data['avg_nh2'], marker='+', alpha=0.1)
        x_reg = np.linspace(min_ratio[ratio_string], max_ratio[ratio_string], 100)

        data_clean = data[[_column_to_fit, 'avg_nh2']].loc[data[_column_to_fit] <= max_ratio[ratio_string]].copy()
        kde = pickle.load(
            open(f'prs/output/run_type/{config["run_type"]}/trained_model/ratio_density_kde_{ratio_string}.pickle',
                 'rb'))
        rgi = RegularGridInterpolator((kde['y'] * bw[ratio_string], kde['x'] * 0.2), kde['values'].reshape(200, 200).T)
        density = rgi(data_clean[[_column_to_fit, 'avg_nh2']])
        data_clean = data_clean.loc[(density > density.mean() - density.std())]
        plt.scatter(data_clean[_column_to_fit], data_clean['avg_nh2'], marker='+', alpha=0.1, color='red')
        for components in x0[ratio_string]:
            plt.plot(x_reg, approx(x_reg, *components), color='cyan')
        plt.show()


if __name__ == '__main__':
    logger = setup_logger(name='POLYFIT')
    main()
+25 −0
Original line number Diff line number Diff line
from assets.utils import get_poc_results
from scipy.stats import anderson_ksamp
from assets.commons import setup_logger
import numpy as np


logger = setup_logger(name='PRS - AD test')

df_full = get_poc_results(line_fit_filename='ch3oh_data_top35.csv')

df_full['best_fit'] /= 1e5
df_full['best_fit'] = df_full['best_fit'].round(1)
df_full['mass'] = (df_full['mass'] / 100).round(1)
df_full['distance'] = (df_full['distance']).round(1)
df_full['diameter'] = (df_full['diameter']).round(1)
df_full = df_full[(df_full['mass'] < 100) & (df_full['mass'] > 3)]
# df_full = df_full[df_full['class'] != 'HII']
# df_full = df_full[df_full['class'].isin(['IRb', 'IRw'])]

ad_results = {}
for column in ['mass', 'distance', 'best_fit', 'diameter']:
    array_list = [d[column].values for _, d in df_full.groupby(['class'])]
    ad_results[column] = anderson_ksamp(array_list)[2]

logger.info(f'The p values of the AD test are: {ad_results}')
Original line number Diff line number Diff line
import os
import sqlalchemy
import numpy as np
import astropy.units as u
from datetime import datetime
from astropy.io import fits
from astropy import constants
from typing import Union, List
from assets.commons import (validate_parameter,
                            get_value_if_specified,
                            load_config_file,
                            setup_logger,
                            compute_unique_hash_filename)
from assets.commons.db_utils import upsert, get_pg_engine
from assets.constants import aggregation_function_mapping
from stg.stg_build_db_structure import (MomentZeroMaps,
                                        RatioMaps)

logger = setup_logger(name='PRS')


def populate_mom_zero_table(config_prs: dict,
                            fits_cube_name: str,
                            moment_zero_map_fits: str,
                            aggregated_moment_zero: float,
                            executed_on: datetime.timestamp,
                            engine: sqlalchemy.engine,
                            run_id: str):
    """
    Insert record in DB for the moment zero table
    :param config_prs: configuration of the presentation layer
    :param fits_cube_name: name of the fits cube
    :param moment_zero_map_fits: filename of the moment zero map
    :param aggregated_moment_zero: the aggregated value of the moment zero
    :param executed_on: timestamp of execution
    :param engine: SQLAlchemy engine to use
    :param run_id: the run unique identifier
    """
    _integration_limit_low = get_value_if_specified(config_prs, 'integration_limits')[0] if get_value_if_specified(
        config_prs, 'integration_limits') != 'all' else None
    _integration_limit_high = get_value_if_specified(config_prs, 'integration_limits')[1] if get_value_if_specified(
        config_prs, 'integration_limits') != 'all' else None
    moment_zero_dict = {
        'mom_zero_name': f'{moment_zero_map_fits}',
        'fits_cube_name': f'{fits_cube_name}',
        'integration_limit_low': _integration_limit_low,
        'integration_limit_high': _integration_limit_high,
        'aggregated_moment_zero': aggregated_moment_zero,
        'aggregation_function': config_prs['moment_zero_aggregation_function'],
        'created_on': executed_on,
        'run_id': run_id,
    }
    upsert(
        table_object=MomentZeroMaps,
        row_dict=moment_zero_dict,
        conflict_keys=[MomentZeroMaps.mom_zero_name,
                       MomentZeroMaps.run_id],
        engine=engine
    )


def populate_line_ratios_table(config_prs: dict,
                               moment_zero_map_fits_list: List[str],
                               ratio_map_name: str,
                               aggregated_ratio: float,
                               engine: sqlalchemy.engine,
                               executed_on: datetime.timestamp,
                               run_id: str):
    """
    Insert record in DB for the line ratios table
    :param config_prs: configuration of the presentation layer
    :param moment_zero_map_fits_list: list of the moment 0 maps to use to compute the ratio
    :param ratio_map_name: filename of the ratio map
    :param aggregated_ratio: aggregated value of the ratio
    :param engine: SQLAlchemy engine to use
    :param executed_on: timestamp of execution
    :param run_id: the run unique identifier
    """
    _integration_limit_low = get_value_if_specified(config_prs, 'integration_limits')[0] if get_value_if_specified(
        config_prs, 'integration_limits') != 'all' else None
    _integration_limit_high = get_value_if_specified(config_prs, 'integration_limits')[1] if get_value_if_specified(
        config_prs, 'integration_limits') != 'all' else None
    assert len(moment_zero_map_fits_list) == 2
    line_ratio_dict = {
        'ratio_map_name': f'{ratio_map_name}',
        'mom_zero_name_1': f'{moment_zero_map_fits_list[0]}',
        'mom_zero_name_2': f'{moment_zero_map_fits_list[1]}',
        'aggregated_ratio': aggregated_ratio,
        'aggregation_function': config_prs['aggregation_function'],
        'created_on': executed_on,
        'run_id': run_id,
    }
    upsert(
        table_object=RatioMaps,
        row_dict=line_ratio_dict,
        conflict_keys=[RatioMaps.ratio_map_name,
                       RatioMaps.run_id],
        engine=engine
    )


def compute_moment_zero(cube: str,
                        config: dict,
                        cube_path: Union[str, None] = None,
                        moment_zero_path: Union[str, None] = None,
                        moment_zero_fits_name: Union[str, None] = None,
                        hdu_idx: int = 0) -> float:
    """
    Compute moment zero map, given a cube
    :param cube: filename of the fits cube
    :param config: configuration of the presentation layer
    :param cube_path: path where the fits cube is to be found
    :param moment_zero_path: path where the moment 0 map is to be saved
    :param moment_zero_fits_name: filename for the moment zero map
    :param hdu_idx: index of the HDU to extract the data and the header
    :return: the aggregated value of the moment zero, according to the function specified in the configuration
    """
    _cube_path = validate_parameter(cube_path, default=os.path.join('prs', 'fits', 'cubes'))
    _moment_zero_path = validate_parameter(moment_zero_path, default=os.path.join('prs', 'fits', 'moments'))
    _moment_zero_fits_name = validate_parameter(moment_zero_fits_name, default='test_mom0.fits')

    fitsfile = open_fits_file_duck_typing(fitsfile=cube, fits_path=_cube_path)
    header = fitsfile[hdu_idx].header.copy()
    data = fitsfile[hdu_idx].data
    conversion = header['CUNIT3'].strip() == 'HZ'
    spectral_unit = 'km/s' if conversion is True else 'Hz'
    conversion_factor = 1 if conversion is False else (-constants.c * header['CDELT3'] / header['CRVAL3']).to('km/s').value
    mom0 = (data.sum(axis=0) * abs(conversion_factor))
    keywords_to_delete = ['NAXIS3', 'CRPIX3', 'CDELT3', 'CRVAL3', 'CUNIT3', 'CTYPE3']
    header['BUNIT'] += f' {spectral_unit}'
    header['BTYPE'] = 'MOMENT0'
    header['NAXIS'] = 2
    for key in keywords_to_delete:
        del header[key]
    fits.writeto(os.path.join(_moment_zero_path, _moment_zero_fits_name), data=mom0, header=header, overwrite=True)
    try:
        return aggregation_function_mapping[config['moment_zero_aggregation_function']](mom0)
    except KeyError:
        logger.warning(
            'Moment zero aggregation function not set or misconfigured. Trying to use the one for the ratio...')
        return aggregation_function_mapping[config['aggregation_function']](mom0)


def open_fits_file_duck_typing(fitsfile: Union[str, fits.PrimaryHDU],
                               fits_path: str = None) -> fits.PrimaryHDU:
    _fits_path = validate_parameter(fits_path, default='.')
    try:
        hdu = fits.open(os.path.join(_fits_path, fitsfile))
    except TypeError:
        hdu = fitsfile
    return hdu


def compute_image_ratios(fits1: str,
                         fits2: str,
                         config: dict,
                         fits_path: Union[str, None] = None,
                         ratio_fits_path: Union[str, None] = None,
                         ratio_fits_name: Union[str, None] = None,
                         hdu1_idx: int = 0,
                         hdu2_idx: int = 0):
    """
    Compute the ratio of the images and save it to fits. The ratio is computed as fits1/fits2
    :param fits1: filename of the first moment 0 map
    :param fits2: filename of the second moment 0 map
    :param config: configuration of the presentation layer
    :param fits_path: path where the fits files are to be found
    :param ratio_fits_path: path where the ratio map is to be saved
    :param ratio_fits_name: filename for the ratio map
    :param hdu1_idx: index of the HDU to extract the data and the header, for the first fits file
    :param hdu2_idx: index of the HDU to extract the data and the header, for the second fits file
    :return:
    """
    _fits_path = validate_parameter(fits_path, default=os.path.join('prs', 'fits', 'moments'))
    _ratio_fits_path = validate_parameter(ratio_fits_path, default=os.path.join('prs', 'fits', 'ratios'))
    _ratio_fits_name = validate_parameter(ratio_fits_name, default='ratio.fits')
    hdu1 = open_fits_file_duck_typing(fitsfile=fits1,
                                      fits_path=_fits_path)
    hdu2 = open_fits_file_duck_typing(fitsfile=fits2,
                                      fits_path=_fits_path)
    ratio_image_data = hdu1[hdu1_idx].data / hdu2[hdu2_idx].data
    if config['aggregation_function'] in ('mean', 'sum'):
        aggregated_ratio = aggregation_function_mapping[config['aggregation_function']](ratio_image_data)
    elif config['aggregation_function'] == 'weighted_mean':
        aggregated_ratio = np.nansum(hdu1[hdu1_idx].data) / np.nansum(hdu2[hdu2_idx].data)
    else:
        raise NotImplementedError('Aggregation function not configured')
    output_header = hdu1[hdu1_idx].header
    output_header['BUNIT'] = ''
    output_header['BTYPE'] = 'INT.RATIO'
    fits.writeto(os.path.join(_ratio_fits_path, _ratio_fits_name),
                 data=ratio_image_data,
                 header=output_header,
                 overwrite=True)
    return aggregated_ratio


def main(cube_fits_list: List[str],
         run_id: str,
         mom0_out_cube1: Union[str, None] = None,
         mom0_out_cube2: Union[str, None] = None,
         engine: sqlalchemy.engine = None) -> str:
    assert len(cube_fits_list) == 2
    _mom0_out_cube1 = validate_parameter(mom0_out_cube1, default=cube_fits_list[0].replace('.fits', '_mom0.fits'))
    _mom0_out_cube2 = validate_parameter(mom0_out_cube2, default=cube_fits_list[1].replace('.fits', '_mom0.fits'))
    config_prs = load_config_file(os.path.join('prs', 'config', 'config.yml'))['flux_computation']
    executed_on = datetime.now()
    if engine is None:
        engine = get_pg_engine(logger=logger)
    config_prs.update({
        'cube_fits_list': cube_fits_list,
        'mom0_image_list': [_mom0_out_cube1, _mom0_out_cube2]
    })
    ratio_fits_name = f'{compute_unique_hash_filename(config=config_prs)}.fits'
    aggregated_mom0_1 = compute_moment_zero(cube=cube_fits_list[0],
                                            config=config_prs,
                                            moment_zero_fits_name=_mom0_out_cube1)
    aggregated_mom0_2 = compute_moment_zero(cube=cube_fits_list[1],
                                            config=config_prs,
                                            moment_zero_fits_name=_mom0_out_cube2)
    aggregated_image_ratio = compute_image_ratios(fits1=_mom0_out_cube1,
                                                  fits2=_mom0_out_cube2,
                                                  config=config_prs,
                                                  ratio_fits_name=ratio_fits_name)
    populate_mom_zero_table(config_prs=config_prs,
                            fits_cube_name=cube_fits_list[0],
                            moment_zero_map_fits=_mom0_out_cube1,
                            aggregated_moment_zero=aggregated_mom0_1,
                            executed_on=executed_on,
                            engine=engine,
                            run_id=run_id)
    populate_mom_zero_table(config_prs=config_prs,
                            fits_cube_name=cube_fits_list[1],
                            aggregated_moment_zero=aggregated_mom0_2,
                            moment_zero_map_fits=_mom0_out_cube2,
                            executed_on=executed_on,
                            engine=engine,
                            run_id=run_id)
    populate_line_ratios_table(config_prs=config_prs,
                               moment_zero_map_fits_list=[_mom0_out_cube1, _mom0_out_cube2],
                               ratio_map_name=ratio_fits_name,
                               aggregated_ratio=aggregated_image_ratio,
                               engine=engine,
                               executed_on=executed_on,
                               run_id=run_id)
    return ratio_fits_name


if __name__ == '__main__':
    main(cube_fits_list=['test_cube.fits', 'test_cube.fits'],
         mom0_out_cube1='test_cube_mom0.fits',
         mom0_out_cube2='test_cube_mom0_1.fits',
         run_id='test_run')

etl/requirements.txt

0 → 100644
+15 −0
Original line number Diff line number Diff line
numpy==1.23.4
PyYAML==6.0
astropy==5.1
matplotlib==3.6.2
scipy==1.9.3
tqdm==4.64.1
xarray==2022.10.0
sqlalchemy==1.4.42
psycopg2-binary==2.9.5
seaborn==0.12.2
scikit-learn==1.3.0
xgboost==2.0.0
astropy==5.1
keras==2.12.0
 No newline at end of file

etl/reset_output.sh

0 → 100644
+17 −0
Original line number Diff line number Diff line
#!/bin/bash

read -p "This will delete all of the results, are you sure? " -n 1 -r
echo

if [[ $REPLY =~ ^[Yy]$ ]]
then
    folder_list=("prs/fits/cubes" "prs/fits/moments" "prs/fits/ratios" "prs/fits/grids")
    for folder_name in ${folder_list[@]}
    do
        rm $folder_name/*.fits
    done
    rm prs/output/*.png
    rm stg/archive/*.zip
    rm full_dataset*.csv
fi
+6 −0
Original line number Diff line number Diff line
DB_USER: cGlwcG8=
DB_PASS: cGx1dG8=
DB_HOST: localhost
DB_NAME: postgres
DB_PORT: 5432
 No newline at end of file