#!/usr/bin/env python

import json
import logging
import os
import re
import shutil
import sys
import time

from datetime import datetime as dt

from checksum import Checksum
from config import Config
from file_grouper import FileGrouper
from db_connector import DbConnector
from mailer import Mailer
from node import Node
from redis_log_handler import RedisLogHandler
from system_utils import SystemUtils
from task_executor import TaskExecutor


class StorePreprocessor(TaskExecutor):

    def __init__(self):
        self.type = "store_preprocessor"
        self.systemUtils = SystemUtils()
        self.md5calc = Checksum()
        config = Config("/etc/vos_ts/vos_ts.conf")
        params = config.loadSection("file_grouper")
        self.fileGrouper = FileGrouper(params.getint("min_num_files"),
                                       self.systemUtils.convertSizeToBytes(params["max_dir_size"]))
        params = config.loadSection("file_catalog")
        self.dbConn = DbConnector(params["user"],
                                  params["password"],
                                  params["host"],
                                  params.getint("port"),
                                  params["db"],
                                  1,
                                  1)
        params = config.loadSection("transfer_node")
        self.storageStorePath = params["store_path"]
        params = config.loadSection("mail")
        self.adminEmail = params["admin_email"]
        params = config.loadSection("logging")
        self.logger = logging.getLogger(__name__)
        logLevel = "logging." + params["log_level"]
        logFormat = params["log_format"]
        logFormatter = logging.Formatter(logFormat)
        self.logger.setLevel(eval(logLevel))
        redisLogHandler = RedisLogHandler()
        redisLogHandler.setFormatter(logFormatter)
        self.logger.addHandler(redisLogHandler)
        self.storageId = None
        self.storageType = None
        self.jobObj = None
        self.jobId = None
        self.username = None
        self.userId = None
        self.nodeList = []
        super(StorePreprocessor, self).__init__()

    def prepare(self, username):
        self.logger.info("File permissions setup")
        self.username = username
        #self.path = "/home/" + username + "/store"
        self.path = self.storageStorePath.replace("{username}", self.username)
        for folder, subfolders, files in os.walk(self.path):
            os.chown(folder, 0, 0)
            os.chmod(folder, 0o555)
            for s in subfolders:
                os.chown(os.path.join(folder, s), 0, 0)
                os.chmod(os.path.join(folder, s), 0o555)
            for f in files:
                os.chown(os.path.join(folder, f), 0, 0)
                os.chmod(os.path.join(folder, f), 0o555)

    def execute(self):
        try:
            self.logger.info("++++++++++ Start of preprocessing phase ++++++++++")
        
            # First scan to find crowded dirs
            self.logger.info("Searching for 'crowded' dirs")
            [ dirs, files ] = self.systemUtils.scan(self.path)

            # Create a .tar for all dirs matching the constraints, if any
            self.logger.info("Starting .tar file generation for 'crowded' dirs (if any)")
            for dir in dirs:
                self.fileGrouper.recursive(self.path + '/' + dir)

            # Second scan after file grouper execution
            self.logger.info(f"First-level scan of '{self.path}'")
            [ dirs, files ] = self.systemUtils.scan(self.path)
            timestamp = dt.now().strftime("%Y_%m_%d-%H_%M_%S")

            # Case 1: /home/user/store contains both files and dirs
            if files and dirs:
                self.logger.debug("The 'store' directory contains both files and dirs")
                destPath = self.path + '/' + timestamp + "-vos_wrapper"
                os.mkdir(destPath)
                for file in files:
                    srcPath = self.path + '/' + file
                    shutil.move(srcPath, destPath)
                for dir in dirs:
                    srcPath = self.path + '/' + dir
                    shutil.move(srcPath, destPath)
                self.md5calc.recursive(destPath)
            # Case 2: /home/user/store contains only files
            elif files and not dirs:
                self.logger.debug("The 'store' directory contains only files")
                destPath = self.path + '/' + timestamp + "-vos_wrapper"
                os.mkdir(destPath)
                for file in files:
                    srcPath = self.path + '/' + file
                    shutil.move(srcPath, destPath)
                self.md5calc.recursive(destPath)
            # Case 3: /home/user/store contains only dirs
            elif not files and dirs:
                self.logger.debug("The 'store' directory contains only dirs")
                for dir in dirs:
                    destPath = self.path + '/' + dir
                    self.md5calc.recursive(destPath)
            # Case 4: /home/user/store is empty (this should be handled by data_rpc_server.py)
            else:
                self.logger.critical("FATAL: the 'store' directory is empty.")
                return False

            # Third scan after directory structure 'check & repair'
            self.logger.info(f"Recursive scan of '{self.path}'")
            [ dirs, files ] = self.systemUtils.scanRecursive(self.path)
            
            try:
                locationId = self.dbConn.getLocationId(self.storageId)
            except Exception:
                self.logger.exception("FATAL: unable to obtain the location ID for the storage point")
                return False
            
            self.logger.info("Checksum calculation and file catalog update")
            pathPrefix = self.storageStorePath.replace("{username}", self.username)
            tstampWrapperDirPattern = re.compile("/[0-9]{4}_[0-9]{2}_[0-9]{2}-[0-9]{2}_[0-9]{2}_[0-9]{2}-vos_wrapper")
            for dir in dirs:
                basePath = os.path.dirname(dir).replace(pathPrefix, "/" + self.username)
                nodeName = os.path.basename(dir)
                cnode = Node(nodeName, "container")
                if not tstampWrapperDirPattern.match("/" + nodeName):
                    if tstampWrapperDirPattern.search(basePath):
                        tstampWrapperDir = tstampWrapperDirPattern.search(basePath).group(0).lstrip('/')
                        basePath = tstampWrapperDirPattern.sub("", basePath)
                        cnode.setWrapperDir(tstampWrapperDir)                
                    cnode.setParentPath(basePath)                    
                    cnode.setLocationId(locationId)
                    cnode.setJobId(self.jobId)
                    cnode.setCreatorId(self.userId)
                    cnode.setContentLength(0)
                    cnode.setSticky(True)

                    vospacePath = basePath + '/' + nodeName

                    try:
                        if os.path.islink(dir):
                            # node is a symlink, do not import it...
                            now = dt.now().isoformat()
                            self.nodeList.append([ now, dir, vospacePath, "container", "SYMLINK" ])
                        elif self.dbConn.insertNode(cnode):
                            now = dt.now().isoformat()
                            self.nodeList.append([ now, dir, vospacePath, "container", "DONE" ])
                        else:
                            # node already exists, skip it...
                            now = dt.now().isoformat()
                            self.nodeList.append([ now, dir, vospacePath, "container", "SKIPPED" ])
                    except Exception:
                        self.logger.exception("FATAL: unable to update the file catalog.")
                        return False

            for flist in files:
                for file in flist:
                    if self.md5calc.fileIsValid(file):
                        basePath = os.path.dirname(file).replace(pathPrefix, "/" + self.username)
                        nodeName = os.path.basename(file)
                        dnode = Node(nodeName, "data")
                        if tstampWrapperDirPattern.search(basePath):
                            tstampWrapperDir = tstampWrapperDirPattern.search(basePath).group(0).lstrip('/')
                            basePath = tstampWrapperDirPattern.sub("", basePath)
                            dnode.setWrapperDir(tstampWrapperDir)                    
                        dnode.setParentPath(basePath)
                        dnode.setLocationId(locationId)
                        dnode.setJobId(self.jobId)
                        dnode.setCreatorId(self.userId)
                        dnode.setContentLength(os.path.getsize(file))
                        dnode.setContentMD5(self.md5calc.getMD5(file))
                        dnode.setSticky(True)
                    
                        vospacePath = basePath + '/' + nodeName

                        try:
                            if os.path.islink(file):
                                # node is a symlink, do not import it...
                                now = dt.now().isoformat()
                                self.nodeList.append([ now, file, vospacePath, "data", "SYMLINK" ])
                            elif self.dbConn.insertNode(dnode):
                                now = dt.now().isoformat()
                                self.nodeList.append([ now, file, vospacePath, "data", "DONE" ])
                            else:
                                # node already exists, skip it...
                                now = dt.now().isoformat()
                                self.nodeList.append([ now, file, vospacePath, "data", "SKIPPED" ])
                        except Exception:
                            self.logger.exception("FATAL: unable to update the file catalog.")
                            return False
            self.logger.info("Overall data size calculation")
            self.jobObj.jobInfo["dataSize"] = self.systemUtils.getSize(self.path)
        except Exception:
            self.logger.exception("FATAL: something went wrong during the preprocessing phase.")
            return False
        else:
            self.logger.info("++++++++++ End of preprocessing phase ++++++++++")
            return True
        
    def update(self, status):
        try:
            m = Mailer(self.logger)
            m.addRecipient(self.adminEmail)
            userEmail = self.dbConn.getUserEmail(self.jobObj.ownerId)
            if userEmail != self.adminEmail:
                m.addRecipient(userEmail)
            
            if status == "OK":
                self.jobObj.setPhase("QUEUED")
                self.dbConn.setPhase(self.jobId, "QUEUED")
                self.logger.info("Job phase updated to QUEUED.")

                msg = f"""
        Dear user,
        your job has been QUEUED.

        Job ID: {self.jobId}
        Job type: {self.jobObj.type}
        Storage type: {self.storageType}
        Storage ID: {self.storageId}
        Owner ID: {self.jobObj.ownerId}

        You will be notified by email once the job is completed.

        """
                m.setMessage("VOSpace data storage notification: Job QUEUED", msg)
                m.send()
            else:
                self.jobObj.setPhase("ERROR")
                self.jobObj.setErrorType("fatal")
                self.jobObj.setErrorMessage("FATAL: something went wrong during the preprocessing phase.")                
                self.dbConn.insertJob(self.jobObj)
                self.dbConn.setEndTime(self.jobId)
                self.setDestinationQueueName("write_terminated")
                self.logger.info("Job phase updated to ERROR.")
                
                msg = f"""
        Dear user,
        your job FAILED during the preprocessing phase.

        Job ID: {self.jobId}
        Job type: {self.jobObj.type}
        Storage type: {self.storageType}
        Storage ID: {self.storageId}
        Owner ID: {self.jobObj.ownerId}

        This issue will be automatically reported to the administrator.

        """

                # Send e-mail notification
                m.setMessage("VOSpace data storage notification: Job ERROR", msg)
                m.send()
        except Exception:
            self.logger.exception(f"FATAL: unable to update the job status for job {self.jobId}")
        finally:
            self.jobObj.jobInfo["nodeList"] = self.nodeList.copy()
            self.nodeList.clear()

    def run(self):
        self.logger.info("Starting store preprocessor...")
        self.setSourceQueueName("write_pending")
        self.setDestinationQueueName("write_ready")
        while True:
            self.wait()
            try:
                srcQueueLen = self.srcQueue.len()
                destQueueLen = self.destQueue.len()
            except Exception:
                self.logger.exception("Cache error: failed to retrieve queue length.")
            else:
                if destQueueLen < self.maxReadyJobs and srcQueueLen > 0:
                    self.jobObj = self.srcQueue.getJob()
                    self.jobId = self.jobObj.jobId
                    self.storageId = self.jobObj.jobInfo["storageId"]
                    self.storageType = self.jobObj.jobInfo["storageType"]
                    self.userId = self.jobObj.ownerId
                    self.username = self.jobObj.jobInfo["userName"]
                    self.prepare(self.username)
                    if self.execute():
                        self.update("OK")
                    else:
                        self.update("ERROR")
                    try:
                        self.destQueue.insertJob(self.jobObj)
                        self.srcQueue.extractJob()
                    except Exception:
                        self.logger.exception(f"Failed to move job {self.jobObj.jobId} from '{self.srcQueue.name()}' to '{self.destQueue.name()}'")
                    else:
                        self.logger.info(f"Job {self.jobObj.jobId} MOVED from '{self.srcQueue.name()}' to '{self.destQueue.name()}'")
            finally:
                self.setDestinationQueueName("write_ready")

# Test
#sp = StorePreprocessor()
#sp.prepare("curban")
#sp.execute()
