#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
#--------------------------------------------------------------------------------#
# This file is part of the PALM model system.
#
# PALM is free software: you can redistribute it and/or modify it under the terms
# of the GNU General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
#
# PALM is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# PALM. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright 1997-2020  Leibniz Universitaet Hannover
#--------------------------------------------------------------------------------#
#
# Current revisions:
# -----------------
# 
# 
# Former revisions:
# -----------------
# $Id$
# Initial revision
# 
# 
# $
#
# Description:
# ------------
# Processing tool for creating PIDS conform virtual measurement setup file
# from UC2 data-standard conform observational data or from prescribed input
# coordinates.
#
# @Authors Matthias Suehring (suehring@muk.uni-hannover.de)
#          Tobias Gronemeier (gronemeier@muk.uni-hannover.de)
#
# @todo Add further feature tpyes for customized observations. At the moment only
#       timeSeries is possible.
#--------------------------------------------------------------------------------#


import netCDF4
from netCDF4 import Dataset, stringtochar
import os
import numpy as np


# Function to read the config file
def read_config_file():

   import configparser
   import os
   import sys
   import json

   # Definition of global configuration parameters
   global global_acronym     
   global global_author      
   global global_campaign    
   global global_comment     
   global global_contact     
   global global_data_content
   global global_dependencies
   global global_institution 
   global global_keywords    
   global global_location    
   global global_references  
   global global_site        
   global global_source      
   global global_palm_version
   global data_path          
   global output_path        
   global output_filename    
   global number_positions
   global input_from_observations
   global coordinates
   global vars_to_be_measured
   global custom_coordinates

   global_acronym          = " "
   global_author           = " "
   global_campaign         = " "
   global_comment          = " "
   global_contact          = " "
   global_data_content     = " "
   global_dependencies     = " "
   global_institution      = " "
   global_keywords         = " "
   global_location         = " "
   global_references       = " "
   global_site             = " "
   global_source           = " "
   global_palm_version     = 6.0
   data_path               = " "
   output_path             = " "
   output_filename         = "none"
   number_positions        = -999
   input_from_observations = False
   coordinates             = []
   vars_to_be_measured     = []
   custom_coordinates      = False

   # Check if configuration files exists and quit otherwise
   input_config = ".cvd.config.default"
   for i in range(1,len(sys.argv)): 
      input_config = str(sys.argv[i])

   # Allow empty settings
   config = configparser.RawConfigParser(allow_no_value=True)

   # Check if a config file exists.
   if ( os.path.isfile(input_config) == False ):
      print ("Error. No configuration file " + input_config + " found.")
      quit()

   config.read(input_config)
   
   for section in range( 0, len( config.sections() ) ):

      current_section = config.sections()[section]

      # read global attributes which are written into the output file header
      if ( current_section == 'global' ):

         global_acronym      = config.get( current_section, 'acronym'        )
         global_author       = config.get( current_section, 'author'         )
         global_campaign     = config.get( current_section, 'campaign'       )
         global_comment      = config.get( current_section, 'comment'        )
         global_contact      = config.get( current_section, 'contact_person' )
         global_data_content = config.get( current_section, 'data_content'   )
         global_dependencies = config.get( current_section, 'dependencies'   )
         global_institution  = config.get( current_section, 'institution'    )
         global_keywords     = config.get( current_section, 'keywords'       )
         global_location     = config.get( current_section, 'location'       )
         global_references   = config.get( current_section, 'references'     )
         global_site         = config.get( current_section, 'site'           )
         global_source       = config.get( current_section, 'source'         )
         global_palm_version = float( config.get( current_section, 'palm_version' ) )

      # Read data input path for observational data
      elif ( current_section == 'input' ):

         data_path = config.get( current_section, 'data_path' )
         input_from_observations = True

      # Read output path and filename for the VM driver
      elif ( current_section == 'output' ):

         output_path     = config.get( current_section, 'output_path' )
         output_filename = config.get( current_section, 'output_filename' )

      # Read customized coordinates where virtual measurements shall be taken, 
      # as well as the variables that should be sampled.
      elif ( current_section == 'custom_positions' ):

         number_positions = config.get( current_section, 'number_positions' )

         for count in range( 0, int( number_positions ) ):
            coordinates.append( json.loads( config.get( current_section, \
                                                        "coordinates" + str( count + 1 ) ) ) )
            # If coordinates are given, set a global flag.
            custom_coordinates = True

         for count in range( 0, int( number_positions ) ):
            vars_to_be_measured.append( json.loads( config.get( current_section, \
                                                    "vars_to_be_measured" + str( count + 1 ) ) ) )


   return 0

#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Main program:
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


# Define strings
name_featuretype   = "featureType"
name_ts            = "timeSeries"
name_traj          = "trajectory"
name_ntime         = "ntime"
name_time          = "time"
name_station       = "station"
name_traj_dim      = "traj"
name_nz            = "nz"
name_datacontent   = "data_content"
name_eutm          = "E_UTM"
name_nutm          = "N_UTM"
name_hao           = "height_above_origin"
name_station_h     = "station_h"
name_z             = "z"
name_soil_sampling = "soil_sample"
name_num_stat      = "number_of_stations"
name_fill          = "_FillValue"
name_site          = "site"
name_orig_x        = "origin_x"
name_orig_y        = "origin_y"
name_orig_z        = "origin_z"

max_string_len     = 50

name_measvars      = "measured_variables"

non_measurable_vars = ['station_name', 'time', 'time_bounds', 'crs', \
                       'vrs', 'x', 'y', 'z', 'lon', 'lat', 'ntime', 'station', 'traj', \
                       'E_UTM', 'N_UTM', 'height_above_origin', 'station_h', \
                       'traj_name', 'height', 'band_pm_size', 'bands_pm', 'bands_pm_size_bounds' \
                       'bands_pm_size', 'ancillary_detected_layer' ]

soil_vars            = [ 't_soil', 'm_soil', 'lwc', 'lwcs', 'smp' ]

dims_out             = [ name_eutm, name_nutm, name_hao, name_z, name_station_h ]

# Define list of attributes which need to be of type float. In the data set this is not 
# necessarily guranteed.
atts_float           = [ 'origin_x', 'origin_y', 'origin_z', 'origin_lon', 'origin_lat', 'rotation_angle' ]

# Define list of default variables that shall be measured at each site
vars_default         = [ 'u', 'v', 'w', 'theta', 'hus' ]


#Read config file
read_config_file()

# Initialize counter variable for the number of sites
num_sites = 0

# Set the output path for the data
output_filename = output_path + output_filename

# Open output file
ncfile_out = Dataset( output_filename, "w", format="NETCDF4" )

# First, add global attributes
ncfile_out.setncattr( 'acronym',        global_acronym      )
ncfile_out.setncattr( 'author',         global_author       )
ncfile_out.setncattr( 'campaign',       global_campaign     )
ncfile_out.setncattr( 'comment',        global_comment      )
ncfile_out.setncattr( 'contact_person', global_contact      )
ncfile_out.setncattr( 'data_content',   global_data_content )
ncfile_out.setncattr( 'dependencies',   global_dependencies )
ncfile_out.setncattr( 'institution',    global_institution  )
ncfile_out.setncattr( 'keywords',       global_keywords     )
ncfile_out.setncattr( 'location',       global_location     )
ncfile_out.setncattr( 'references',     global_references   )
ncfile_out.setncattr( 'site',           global_site         )
ncfile_out.setncattr( 'source',         global_source       )
ncfile_out.setncattr( 'palm_version',   global_palm_version )

# Create universal dimension for the string length.
ncfile_out.createDimension("string_len", max_string_len)


# Check if observational data is available. This case, 
# obtain an alphabetically sorted list of input data. List is sorted 
# just for the sake of clarity in the resulting setup file.
if ( input_from_observations == True ):
   list_input_data = sorted( os.listdir( data_path ) )

if ( input_from_observations ):

   # Run loop over all subdirectories, detect the files and extract a list of sites.
   # This is done to reduce the number of virtual measurements in the model. Each
   # virtual measurement has an overhead and consumes memory.
   sites = []
   for dirname in list_input_data:
       data_file = data_path + dirname

       # Directory may contain various file versions.
       # Take the one with highest cycle number.
       highest_cycle_nr = 0
       for filename in os.listdir(data_file):
          start_seq = len( filename ) - 6
          end_seq   = len( filename ) - 3
          if int( filename[start_seq:end_seq] ) > highest_cycle_nr:
             highest_cycle_nr = int(filename[start_seq:end_seq])
             latest_file      = filename

       # Open the NetCDF file
       input_file = data_file + "/" + latest_file
       ncfile_in = Dataset( input_file, "r", format="NETCDF4", encoding='ascii')

       # Read global attributes and write them immediately into the output file
       for att in ncfile_in.ncattrs():
          if ( att == name_site ):
             site = ncfile_in.getncattr(att)

       if ( site not in sites ):
          sites.append(site)

   # Define a flag array that is used to identify whether site dimensions are already
   # defined or not.
   create_metadata_for_site = [None] * len(sites)

   # Define a nested list of default variables that shall be measured. Based on this list,
   # the final number of measured variables is determined.
   measured_variables_all_sites = [ ['u', 'v', 'w', 'theta', 'hus'] for var in range(0, len(sites))]

   # Run loop over all subdirectories that contain observational data
   for dirname in list_input_data:
      data_file = data_path + dirname

      # Directory may contain various file versions.
      # Take the one with highest cycle number.
      highest_cycle_nr = 0
      for filename in os.listdir(data_file):
         start_seq = len( filename ) - 6
         end_seq   = len( filename ) - 3
         if int( filename[start_seq:end_seq] ) > highest_cycle_nr:
            highest_cycle_nr = int(filename[start_seq:end_seq])
            latest_file      = filename

      # Open the NetCDF input file
      input_file = data_file + "/" + latest_file
      ncfile_in = Dataset( input_file, "r", format="NETCDF4", encoding='ascii' )

      # Read site attribue first
      for att in ncfile_in.ncattrs():
         if ( att == name_site ):
            site = ncfile_in.getncattr(att)

      # Determine index for the treated site
      num_vmeas = sites.index( site ) + 1

      # Check whether metadata for this site has been already created
      if ( create_metadata_for_site[sites.index( site )] != "Done" ):

         # Read global attributes and write them immediately into the output file
         for att in ncfile_in.ncattrs():
            if ( att == name_featuretype ):
               feature = ncfile_in.getncattr(att)
            if ( att == name_datacontent ):
               content = ncfile_in.getncattr(att)
            if ( att == name_site ):
               site = ncfile_in.getncattr(att)

            if ( att in atts_float ):
               ncfile_out.setncattr( att + str(num_vmeas), np.double(ncfile_in.getncattr(att)) )
            else:
               ncfile_out.setncattr( att + str(num_vmeas), ncfile_in.getncattr(att) )

         #timeSeries
         if ( feature == name_ts ):
            ntime = len( ncfile_in.dimensions[name_ntime]   )
            nstat = len( ncfile_in.dimensions[name_station] )
            ncfile_out.createDimension( name_ntime   + str(num_vmeas), ntime )
            ncfile_out.createDimension( name_station + str(num_vmeas), nstat )

         #trajectory
         elif ( feature == name_traj ):
            ntime = len( ncfile_in.dimensions[name_ntime]   )
            ntraj = len( ncfile_in.dimensions[name_traj_dim] )
            ncfile_out.createDimension( name_ntime    + str(num_vmeas), ntime )
            ncfile_out.createDimension( name_traj_dim + str(num_vmeas), ntraj )

         #timeseriesProfile
         else:
            ntime = len( ncfile_in.dimensions[name_ntime]   )
            nstat = len( ncfile_in.dimensions[name_station] )
            nz    = len( ncfile_in.dimensions[name_nz]      )
            ncfile_out.createDimension( name_ntime   + str(num_vmeas), ntime )
            ncfile_out.createDimension( name_station + str(num_vmeas), nstat )
            ncfile_out.createDimension( name_nz      + str(num_vmeas), nz    )

         for var in ncfile_in.variables.keys():
            if ( var in dims_out ):
               # Create a variable and write it to file after it is read. In order to 
               # avoid fill values in the dimensions, these are converted to zero 
               # before written to file. Depending on the featureType of the measurement,
               # the array shape is different. For more informations, please see 
               # [UC]2 data standard.
               # Timeseries
               if ( feature == name_ts  ): 
                  temp_ts = ncfile_out.createVariable( var + str(num_vmeas), float, \
                                                       name_station + str(num_vmeas))
                  temp_ts[:] = np.nan_to_num( ncfile_in.variables[var][:] )

               # Trajectories
               elif ( feature == name_traj ):
                  temp_traj = ncfile_out.createVariable( var + str(num_vmeas), float, \
                                                         ( name_traj_dim + str(num_vmeas), \
                                                           name_ntime + str(num_vmeas) ) )
                  temp_traj[:,:] = np.nan_to_num( ncfile_in.variables[var][:,:] )

               # TimeseriesProfiles
               else:
                  if ( var == 'z' ):
                     temp_pr = ncfile_out.createVariable( var + str(num_vmeas), float, \
                                                         ( name_station + str(num_vmeas), \
                                                           name_nz + str(num_vmeas) ) )
                     temp_pr[:] = np.nan_to_num( ncfile_in.variables[var][:,0,:] )
                  else:
                     temp_pr = ncfile_out.createVariable( var + str(num_vmeas), float, \
                                                          name_station + str(num_vmeas))
                     temp_pr[:] = np.nan_to_num( ncfile_in.variables[var][:] )

      # Search for variables to be measured. In case the variable isn't already defined,
      # append the variable to the list.
      for var in ncfile_in.variables.keys():
         if ( var not in non_measurable_vars  and  \
              var not in vars_default         and  \
              var not in measured_variables_all_sites[sites.index( site )] ):

            measured_variables_all_sites[sites.index( site )].append(var)

      # Close the NetCDF input file
      ncfile_in.close()

      # Set flag to indicate that for this specific site dimensions have been 
      # already created and attributes are already set.
      if ( create_metadata_for_site[sites.index( site )] != "Done" ):
         create_metadata_for_site[sites.index( site )] = "Done"

   # After variables are gathered and dimensions / attributes are already written to file,
   # the list of measured variables is written to file.
   for site in sites:

      num_vmeas = sites.index( site ) + 1

      ncfile_out.createDimension( "nvar"+ str(num_vmeas), \
                                  len( measured_variables_all_sites[sites.index( site )] ) )

      measured = ncfile_out.createVariable( 'measured_variables' + str(num_vmeas), 'S1', \
                                            ("nvar" + str(num_vmeas), "string_len")) # must be NC_CHAR

      for counter, meas in enumerate( measured_variables_all_sites[sites.index( site )] ):
         measured[counter] = stringtochar( np.array( meas,"S%s"%(max_string_len) ) )

      # Check if any of the measured variables is a soil variable. Set flag accordingly.
      soil = False
      for var in measured_variables_all_sites[sites.index( site )]:
         if ( var in soil_vars ):
            soil = True

      # Write soil flag
      ncfile_out.setncattr( name_soil_sampling + str( num_vmeas), np.int8(soil) )


    # Store the number of observational sites
   num_sites += len( sites )


#  Now process the customized input data. Please note, at the moment only timeseries are
# are possible.
if ( custom_coordinates ):

   count_site = num_sites + 1
   for coord in coordinates:
      # Define mandatory attributes
      ncfile_out.setncattr( name_featuretype + str(count_site),  \
                            name_ts )
      ncfile_out.setncattr( name_site        + str(count_site),  \
                            "custom"         + str(count_site - num_sites) )
      ncfile_out.setncattr( name_orig_x      + str(count_site),  \
                            coord[0] )
      ncfile_out.setncattr( name_orig_y      + str(count_site),  \
                            coord[1] )
      ncfile_out.setncattr( name_orig_z      + str(count_site),  \
                            0.0 )

      # Define dimensions
      ntime = 1
      nstat = 1
      ncfile_out.createDimension( name_ntime   + str(count_site), ntime )
      ncfile_out.createDimension( name_station + str(count_site), nstat )

      # Define coordinate variables
      temp_ts = ncfile_out.createVariable( name_eutm      + str(count_site), \
                                           float,                            \
                                           name_station   + str(count_site) )
      temp_ts[:] = np.array( coord[0] )

      temp_ts = ncfile_out.createVariable( name_nutm      + str(count_site), \
                                           float,                            \
                                           name_station   + str(count_site) )
      temp_ts[:] = np.array( coord[1] )

      temp_ts = ncfile_out.createVariable( name_z         + str(count_site), \
                                           float,                            \
                                           name_station   + str(count_site) )
      temp_ts[:] = np.array( coord[2] )

      temp_ts = ncfile_out.createVariable( name_station_h + str(count_site), \
                                           float,                            \
                                           name_station   + str(count_site) )
      temp_ts[:] = np.array( 0.0 )


      count_site += 1

   # Reset counter variable
   count_site = num_sites + 1

   # check if variables are prescribed. If so, prepare final output string
   # stored in measured_variables.
   if ( vars_to_be_measured ):

      for custom_vars in vars_to_be_measured:

         measured_variables = []
         for var in vars_default:
            measured_variables.append(var)

         # Check if given variables are already in the default variables. 
         # If not, extend.
         for var in custom_vars:
             if ( var  not in  measured_variables ):

                measured_variables.append(var)

         ncfile_out.createDimension( "nvar"+ str(count_site), \
                                     len( measured_variables ) )

         measured_var = ncfile_out.createVariable( 'measured_variables' + str(count_site), 'S1', \
                                                  ("nvar" + str(count_site), "string_len") ) # must be NC_CHAR

         # Write the variables to the file
         for counter, meas in enumerate( measured_variables ):
            measured_var[counter] = stringtochar( np.array( meas,"S%s"%(max_string_len) ) )

         # Add soil attribute for the current measurement.
         soil = False
         if ( any( var == soil_vars for var in measured_variables) ):
            soil = True

         # Write soil flag
         ncfile_out.setncattr( name_soil_sampling + str( count_site), np.int8(soil) )

         # Increment counter variable
         count_site += 1

         del ( measured_variables[:] )

   # Add the number of customized sites.
   num_sites += int( number_positions )


# Finally, write the total number of sites to the output file
ncfile_out.setncattr( name_num_stat, num_sites )


print( "*** palm_cvd has been finished. You can find the output file under: " )
print( "    " + output_filename )

quit()