from assets.utils import get_poc_results
from scipy.stats import anderson_ksamp
from assets.commons import setup_logger
import numpy as np


logger = setup_logger(name='PRS - AD test')

df_full = get_poc_results(line_fit_filename='ch3oh_data_top35.csv')

df_full['best_fit'] /= 1e5
df_full['best_fit'] = df_full['best_fit'].round(1)
df_full['mass'] = (df_full['mass'] / 100).round(1)
df_full['distance'] = (df_full['distance']).round(1)
df_full['diameter'] = (df_full['diameter']).round(1)
df_full = df_full[(df_full['mass'] < 100) & (df_full['mass'] > 3)]
# df_full = df_full[df_full['class'] != 'HII']
# df_full = df_full[df_full['class'].isin(['IRb', 'IRw'])]

ad_results = {}
for column in ['mass', 'distance', 'best_fit', 'diameter']:
    array_list = [d[column].values for _, d in df_full.groupby(['class'])]
    ad_results[column] = anderson_ksamp(array_list)[2]

logger.info(f'The p values of the AD test are: {ad_results}')
