#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
#--------------------------------------------------------------------------------#
# This file is part of the PALM model system.
#
# PALM is free software: you can redistribute it and/or modify it under the terms
# of the GNU General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
#
# PALM is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# PALM. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright 1997-2021  Leibniz Universitaet Hannover
#--------------------------------------------------------------------------------#
#
# Current revisions:
# -----------------
#
#
# Former revisions:
# -----------------
# $Id: postprocess_vm_measurements.py 4853 2021-01-15 15:22:11Z suehring #
# Initial revision
#
#
#
#
#--------------------------------------------------------------------------------#
#

#
# Description:
# ------------
# Postprocessing tool to merge output of virtual measurements into site-specific
# files.
#
# Usage:
#-------
"""Merge virtual measurement output: removes empty timestamps from the netcdf
   files and concatenates files from several restart files into one file-

Example:
module load nco anaconda3
python3 postprocess_vm_measurements.py my_palm_simulation/OUTPUT
"""
#
# @Authors Matthias Suehring (suehring@muk.uni-hannover.de)
#          Tobias Gronemeier (gronemeier@muk.uni-hannover.de)
#
#--------------------------------------------------------------------------------#
#


import argparse
import subprocess
import os
import sys
try:
    import numpy as np
except ImportError:
    sys.exit(
        'package "numpy" is required but not installed! Run\n'
        + 'python -m pip install --user numpy\nto install it.')
try:
    from netCDF4 import Dataset
except ImportError:
    sys.exit(
        'package "netCDF4" is required but not installed! Run\n'
        + 'python -m pip install --user netCDF4\nto install it.')

# - - - - - - - - - - - - - - -


def concatenate(file_list, inds, sites, out_dir, override=True):
    """Concatenate netCDF files via ncrcat.

    Concatenate a list of netCDF files using NCO command 'ncrcat'.
    Return value: output file
    """

    if not os.path.isdir(out_dir):
        mkdir = os.mkdir(out_dir)

    nco_command = ["ncrcat"]
    counter_file = 0
    for files in file_list:

        ncrcat_command = "ncrcat -O "
        counter_dir = 0
        for single_file in files:
            if counter_dir != 0:
                ncfile = Dataset(single_file, "r")

                soil = False
                for var in ncfile.variables.keys():
                    if var == "time_soil":
                        soil = True

                nco_command = "ncap2 -O -s 'ntime=ntime+{}' ".format(
                    inds[counter_file][counter_dir])
                if soil is True:
                    nco_command += " -s 'ntime_soil=ntime_soil+{}' ".format(
                        inds[counter_file][counter_dir])

                nco_command += " " + single_file + " " + single_file
                print(nco_command)
                nco_output = subprocess.run(nco_command, shell=True, check=True)

            # if counter_dir == 0:
            #     cp = os.system("cp " + single_file + " " + out_dir + "/" + sites[counter_file])
            #     print("cp " + single_file + " " + out_dir + "/" + sites[counter_file])
            #     ncrcat_command += "/" + out_dir + "/" + sites[counter_file] + " "
            # else:
            #     ncrcat_command += single_file + " "
            ncrcat_command += single_file + " "
            counter_dir += 1

        ncrcat_command += out_dir + "/"
        if os.path.isfile(out_dir + "/" + sites[counter_file]):
            start = sites[counter_file].find('site')
            end = start + 7

            string_dum = sites[counter_file]
            outfile = sites[counter_file] + "_" + string_dum[start:end]
            print(string_dum[start:end])
        else:
            outfile = sites[counter_file]

        ncrcat_command += outfile
        print(ncrcat_command)
        ncrcat_output = subprocess.run(ncrcat_command, shell=True, check=True)
        counter_file += 1

        # nco_output = subprocess.run(nco_command, shell=True, check=True, stdout=subprocess.PIPE)

    return nco_command

# - - - - - - - - - - - - - - -


def truncate(file_process, override=True):

    # print("file "   + file_process)
    # Gather information about time coordinate in file
    ncfile = Dataset(file_process, "r")
    time_dim = ncfile.dimensions["ntime"]
    time_var = ncfile.variables["time"][:, :]
    time_mask = ~np.ma.getmaskarray(time_var)
    start_index = 0

    soil = False
    for var in ncfile.variables.keys():
        if var == "time_soil":
            soil = True

    if np.any(time_mask is False):
        end_ind = np.where(time_mask is False)
        end_index = end_ind[0][0] - 1
        cut = True
    elif np.any(time_var == 0):
        end_ind = np.where(time_var == 0)
        end_index = end_ind[0][0] - 1
        cut = True
    else:
        end_index = len(time_var[:][0])
        cut = False

    for att in ncfile.ncattrs():
        if (att == "site"):
            site = ncfile.getncattr(att)
        if (att == "featureType"):
            feat = ncfile.getncattr(att)

    # if feat == "timeSeries":
    #     site = site + "_ts"
    # if feat == "timeSeriesProfile":
    #     site = site + "_tspr"
    # if feat == "trajectory":
    #     site = site + "_traj"
    # print(cut)
    ncks_output = []
    if cut:
        # Compose ncks command
        ncks_command = "ncks -O -d ntime,{0},{1}".format(start_index, end_index)
        if not time_dim.isunlimited():
            ncks_command += " --mk_rec_dmn"
            ncks_command += " ntime"
        if soil is True:
            ncks_command += " -d ntime_soil,{0},{1}".format(start_index, end_index)
            ncks_command += " --mk_rec_dmn"
            ncks_command += " ntime_soil"

        ncks_command += " " + file_process + " " + file_process

        # Cut time levels using ncks
        print(ncks_command)
        ncks_output = subprocess.run(ncks_command, shell=True, check=True, stdout=subprocess.PIPE)
    else:
        end_index = len(time_var[:][0])

    return end_index, site

# - - - - - - - - - - - - - - -


def main(path_data, output_directory, override=True):

    # Get current working directory
    work_dir = os.getcwd()

    if path_data[-1] != '/':
        path_data += '/'

    # Get directory list
    list_output_dirs = [path_data + directory + '/' for directory in sorted(os.listdir(path_data))]
    filelist = []
    output_file_list = []
    counter = 0

    # Obtain list of sites that need to be processed
    for directory in list_output_dirs:
        # Get file list
        file_list = sorted(os.listdir(directory))
        for filename in file_list:
            if counter == 0:
                output_file_list.append(filename)
        counter += 1

    start_inds = [[0] * len(list_output_dirs) for i in range(len(output_file_list))]
    end_inds = [[0] * len(list_output_dirs) for i in range(len(output_file_list))]

    input_files = [[None] * len(list_output_dirs) for i in range(len(output_file_list))]
    sites = [None] * len(output_file_list)

    counter_file = 0
    for filename in output_file_list:

        counter_dir = 0
        for directory in list_output_dirs:
            file_process = directory + filename
            end_ind, sites[counter_file] = truncate(file_process, override)
            sites[counter_file] = filename

            if not counter_dir == 0:
                start_inds[counter_file][counter_dir] = end_inds[counter_file][counter_dir - 1]

            end_inds[counter_file][counter_dir] = start_inds[counter_file][counter_dir] + end_ind

            input_files[counter_file][counter_dir] = file_process
            counter_dir += 1

        counter_file += 1

    # Concatenate all files
    outfile = concatenate(input_files, start_inds, sites, output_directory, override)


# - - - - - - - - - - - - - - -

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Merge virtual measurement output from multiple PALM run cycles',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('input', metavar='IN',
                        help='PALM output directory containing virtual measurements')
    parser.add_argument('--out', '-o', metavar='OUT', nargs=1, default='./merge',
                        help='Output directory to store merged data')

    args = parser.parse_args()
    path_data = args.input
    output_directory = args.out

    main(path_data, output_directory=output_directory, override=True)