#!/usr/bin/env python
#
# This file is part of vospace-transfer-service
# Copyright (C) 2021 Istituto Nazionale di Astrofisica
# SPDX-License-Identifier: GPL-3.0-or-later
#

import datetime
import json
import logging
import os
import re
import shutil
import sys
import time

from tabulate import tabulate

from checksum import Checksum
from config import Config
from exceptions import IllegalCharacterException
from file_grouper import FileGrouper
from db_connector import DbConnector
from mailer import Mailer
from node import Node
from redis_log_handler import RedisLogHandler
from system_utils import SystemUtils
from task_executor import TaskExecutor


class StorePreprocessor(TaskExecutor):

    def __init__(self):
        self.type = "store_preprocessor"
        self.systemUtils = SystemUtils()
        self.md5calc = Checksum()
        config = Config("/etc/vos_ts/vos_ts.conf")
        params = config.loadSection("file_grouper")
        self.fileGrouper = FileGrouper(params.getint("min_num_files"),
                                       self.systemUtils.convertSizeToBytes(params["max_dir_size"]))
        params = config.loadSection("transfer_node")
        self.storageStorePath = params["store_path"]
        params = config.loadSection("mail")
        self.adminEmail = params["admin_email"]
        params = config.loadSection("logging")
        self.logger = logging.getLogger(__name__)
        logLevel = "logging." + params["log_level"]
        logFormat = params["log_format"]
        logFormatter = logging.Formatter(logFormat)
        self.logger.setLevel(eval(logLevel))
        redisLogHandler = RedisLogHandler()
        redisLogHandler.setFormatter(logFormatter)
        self.resDir = params["res_dir"]
        self.logger.addHandler(redisLogHandler)
        params = config.loadSection("file_catalog")
        self.dbConn = DbConnector(params["user"],
                                  params["password"],
                                  params["host"],
                                  params.getint("port"),
                                  params["db"],
                                  1,
                                  1,
                                  self.logger)
        self.storageId = None
        self.storageType = None
        self.jobObj = None
        self.jobId = None
        self.username = None
        self.userId = None
        self.nodeList = []
        self.invalidFileAndDirNames = []
        super(StorePreprocessor, self).__init__()

    def prepare(self, username):
        self.logger.info("File permissions setup")
        self.username = username
        #self.path = "/home/" + username + "/store"
        self.path = self.storageStorePath.replace("{username}", self.username)
        for folder, subfolders, files in os.walk(self.path):
            try:
                os.chown(folder, 0, 0)
                os.chmod(folder, 0o555)
                for s in subfolders:
                    absPath = os.path.join(folder, s)
                    os.chown(absPath, 0, 0)
                    os.chmod(absPath, 0o555)
                for f in files:
                    absPath = os.path.join(folder, f)
                    os.chown(absPath, 0, 0)
                    os.chmod(absPath, 0o555)
            except OSError:
                self.logger.error(f"Unable to set permissions for '{absPath}', skip.")

    def execute(self):
        try:
            self.logger.info("++++++++++ Start of preprocessing phase ++++++++++")
        
            # First scan to find crowded dirs
            self.logger.info("Searching for 'crowded' dirs")
            [ dirs, files ] = self.systemUtils.scan(self.path)

            # Create a .tar for all dirs matching the constraints, if any
            self.logger.info("Starting .tar file generation for 'crowded' dirs (if any)")
            for dir in dirs:
                self.fileGrouper.recursive(self.path + '/' + dir)

            # Second scan after file grouper execution
            self.logger.info(f"First-level scan of '{self.path}'")
            [ dirs, files ] = self.systemUtils.scan(self.path)
            timestamp = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S")
            
            # Third scan to check illegal characters in file/dir names (if any)
            self.logger.info(f"Checking for invalid file/dir names in '{self.path}'")
            self.invalidFileAndDirNames = self.systemUtils.findInvalidFileAndDirNames(self.path)
            if self.invalidFileAndDirNames:
                self.logger.warning("Found invalid file/dir names")
                reportFile = os.path.join(self.resDir, "vos_data_report-" + self.jobId)
                try:
                    rfp = open(reportFile, "w")
                except IOError:
                    self.logger.exception("Unable to generate the 'vos_data_report'.")
                else:
                    rfp.write(tabulate(self.invalidFileAndDirNames,
                                       headers = [ "Path list of invalid file/dir names" ],
                                       tablefmt = "simple"))
                    rfp.close()
                    self.cleanup()
                raise IllegalCharacterException

            # Check if /home/user/store contains files or dirs
            if files or dirs:
                destPath = self.path + '/' + timestamp + "-vos_wrapper"
                os.mkdir(destPath)
                for file in files:
                    srcPath = self.path + '/' + file
                    shutil.move(srcPath, destPath)
                for dir in dirs:
                    srcPath = self.path + '/' + dir
                    shutil.move(srcPath, destPath)
                self.md5calc.recursive(destPath)
            # /home/user/store is empty (this should be handled by data_rpc_server.py)
            else:
                self.logger.error("FATAL: the 'store' directory is empty.")
                self.cleanup()
                return False

            # Fourth (and last) recursive scan
            self.logger.info(f"Recursive scan of '{self.path}'")
            [ dirs, files ] = self.systemUtils.scanRecursive(self.path)
            
            try:
                locationId = self.dbConn.getLocationId(self.storageId)
            except Exception:
                self.logger.exception("FATAL: unable to obtain the location ID for the storage point")
                return False
            
            self.logger.info("Checksum calculation and file catalog update")
            pathPrefix = self.storageStorePath.replace("{username}", self.username)
            tstampWrapperDirPattern = re.compile("/[0-9]{4}_[0-9]{2}_[0-9]{2}-[0-9]{2}_[0-9]{2}_[0-9]{2}-vos_wrapper")
            for dir in dirs:
                basePath = os.path.dirname(dir).replace(pathPrefix, "/" + self.username)
                nodeName = os.path.basename(dir)
                cnode = Node(nodeName, "container")
                if not tstampWrapperDirPattern.match("/" + nodeName):
                    if tstampWrapperDirPattern.search(basePath):
                        tstampWrapperDir = tstampWrapperDirPattern.search(basePath).group(0).lstrip('/')
                        basePath = tstampWrapperDirPattern.sub("", basePath)
                        cnode.setWrapperDir(tstampWrapperDir)
                    vospacePath = basePath + '/' + nodeName
                    fsPath = vospacePath.split("/" + self.username + "/")[1]
                    cnode.setParentPath(basePath)
                    cnode.setFsPath(fsPath)
                    cnode.setLocationId(locationId)
                    cnode.setJobId(self.jobId)
                    cnode.setCreatorId(self.userId)
                    cnode.setContentLength(0)
                    cnode.setSticky(True)
                    try:
                        now = datetime.datetime.now().isoformat()
                        if os.path.islink(dir):
                            # node is a symlink, do not import it...
                            self.nodeList.append([ now, dir, vospacePath, "container", "SYMLINK" ])
                        elif self.dbConn.insertNode(cnode):
                            self.nodeList.append([ now, dir, vospacePath, "container", "DONE" ])
                        else:
                            # node already exists, skip it...
                            self.nodeList.append([ now, dir, vospacePath, "container", "SKIPPED" ])
                    except Exception:
                        self.logger.exception("FATAL: unable to update the file catalog.")
                        return False

            for flist in files:
                for file in flist:
                    if self.md5calc.fileIsValid(file):
                        basePath = os.path.dirname(file).replace(pathPrefix, "/" + self.username)
                        nodeName = os.path.basename(file)
                        dnode = Node(nodeName, "data")
                        if tstampWrapperDirPattern.search(basePath):
                            tstampWrapperDir = tstampWrapperDirPattern.search(basePath).group(0).lstrip('/')
                            basePath = tstampWrapperDirPattern.sub("", basePath)
                            dnode.setWrapperDir(tstampWrapperDir)
                        vospacePath = basePath + '/' + nodeName
                        fsPath = vospacePath.split("/" + self.username + "/")[1]
                        dnode.setParentPath(basePath)
                        dnode.setFsPath(fsPath)
                        dnode.setLocationId(locationId)
                        dnode.setJobId(self.jobId)
                        dnode.setCreatorId(self.userId)
                        if not os.path.islink(file):
                            dnode.setContentLength(os.path.getsize(file))
                            dnode.setContentMD5(self.md5calc.getMD5(file))
                        else:
                            dnode.setContentLength(0)
                            dnode.setContentMD5(None)
                        dnode.setSticky(True)
                        try:
                            now = datetime.datetime.now().isoformat()
                            if os.path.islink(file):
                                # node is a symlink, do not import it...
                                self.nodeList.append([ now, file, vospacePath, "data", "SYMLINK" ])
                            elif self.dbConn.insertNode(dnode):
                                self.nodeList.append([ now, file, vospacePath, "data", "DONE" ])
                            else:
                                # node already exists, skip it...
                                self.nodeList.append([ now, file, vospacePath, "data", "SKIPPED" ])
                        except Exception:
                            self.logger.exception("FATAL: unable to update the file catalog.")
                            return False
            self.logger.info("Overall data size calculation")
            self.jobObj.jobInfo["dataSize"] = self.systemUtils.getSize(self.path)
        except Exception:
            self.logger.exception("FATAL: something went wrong during the preprocessing phase.")
            return False
        else:
            self.logger.info("++++++++++ End of preprocessing phase ++++++++++")
            return True
        
    def update(self, status):
        try:
            m = Mailer(self.logger)
            m.addRecipient(self.adminEmail)
            userEmail = self.dbConn.getUserEmail(self.jobObj.ownerId)
            if userEmail != self.adminEmail:
                m.addRecipient(userEmail)
            
            if status == "OK":
                self.jobObj.setPhase("QUEUED")
                self.dbConn.insertJob(self.jobObj)
                self.logger.info("Job phase updated to QUEUED.")

                msg = f"""
        Dear user,
        your job has been QUEUED.

        Job ID: {self.jobId}
        Job type: {self.jobObj.type}
        Storage type: {self.storageType}
        Storage ID: {self.storageId}
        Owner ID: {self.jobObj.ownerId}

        You will be notified by email once the job is completed.

        """
                m.setMessage("VOSpace data storage notification: Job QUEUED", msg)
                m.send()
            else:
                self.jobObj.setPhase("ERROR")
                self.jobObj.setErrorType("fatal")
                self.jobObj.setErrorMessage("FATAL: something went wrong during the preprocessing phase.")
                self.jobObj.setEndTime(datetime.datetime.now().isoformat())
                self.dbConn.insertJob(self.jobObj)
                self.setDestinationQueueName("write_terminated")
                self.logger.info("Job phase updated to ERROR.")
                self.logger.info("Removing VOSpace nodes from the database...")
                self.dbConn.deleteNodesByJobId(self.jobId)
                self.logger.info("Database cleanup completed")
                
                msg = f"""
        Dear user,
        your job FAILED during the preprocessing phase.

        Job ID: {self.jobId}
        Job type: {self.jobObj.type}
        Storage type: {self.storageType}
        Storage ID: {self.storageId}
        Owner ID: {self.jobObj.ownerId}

        This issue will be automatically reported to the administrator.

        """

                # Send e-mail notification
                m.setMessage("VOSpace data storage notification: Job ERROR", msg)
                m.send()
        except Exception:
            self.logger.exception(f"FATAL: unable to update the job status for job {self.jobId}")
        finally:
            self.jobObj.jobInfo["nodeList"] = self.nodeList.copy()
            self.nodeList.clear()
            self.invalidFileAndDirNames.clear()
            
    def cleanup(self):
        try:
            self.logger.info(f"Restoring user permissions for '{self.path}'...")
            userInfo = self.systemUtils.userInfo(self.username)
            uid = userInfo[1]
            gid = userInfo[2]
            os.chown(self.path, uid, gid)
            os.chmod(self.path, 0o755)
            for root, dirs, files in os.walk(self.path):
                for d in dirs:
                    os.chown(os.path.join(root, d), uid, gid)
                    os.chmod(os.path.join(root, d), 0o755)
                for f in files:
                    os.chown(os.path.join(root, f), uid, gid)
                    os.chmod(os.path.join(root, f), 0o755)
        except Exception:
            self.logger.exception(f"Unable to restore user permissions for {self.path}.")

    def run(self):
        self.logger.info("Starting store preprocessor...")
        self.setSourceQueueName("write_pending")
        self.setDestinationQueueName("write_ready")
        while True:
            self.wait()
            try:
                srcQueueLen = self.srcQueue.len()
                destQueueLen = self.destQueue.len()
            except Exception:
                self.logger.exception("Cache error: failed to retrieve queue length.")
            else:
                if destQueueLen < self.maxReadyJobs and srcQueueLen > 0:
                    self.jobObj = self.srcQueue.getJob()
                    self.jobId = self.jobObj.jobId
                    self.storageId = self.jobObj.jobInfo["storageId"]
                    self.storageType = self.jobObj.jobInfo["storageType"]
                    self.userId = self.jobObj.ownerId
                    self.username = self.jobObj.jobInfo["userName"]
                    self.prepare(self.username)
                    if self.execute():
                        self.update("OK")
                    else:
                        self.update("ERROR")
                    try:
                        self.destQueue.insertJob(self.jobObj)
                        self.srcQueue.extractJob()
                    except Exception:
                        self.logger.exception(f"Failed to move job {self.jobObj.jobId} from '{self.srcQueue.name()}' to '{self.destQueue.name()}'")
                    else:
                        self.logger.info(f"Job {self.jobObj.jobId} MOVED from '{self.srcQueue.name()}' to '{self.destQueue.name()}'")
            finally:
                self.setDestinationQueueName("write_ready")

# Test
#sp = StorePreprocessor()
#sp.prepare("curban")
#sp.execute()
