#!/usr/bin/env python3 # -*- coding: utf-8 -*- # #--------------------------------------------------------------------------------# # This file is part of the PALM model system. # # PALM is free software: you can redistribute it and/or modify it under the terms # of the GNU General Public License as published by the Free Software Foundation, # either version 3 of the License, or (at your option) any later version. # # PALM is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # PALM. If not, see . # # Copyright 1997-2020 Leibniz Universitaet Hannover #--------------------------------------------------------------------------------# # # Current revisions: # ----------------- # # # Former revisions: # ----------------- # $Id$ # Initial revision # # # $ # # Description: # ------------ # Processing tool for creating PIDS conform virtual measurement setup file # from UC2 data-standard conform observational data or from prescribed input # coordinates. # # @Authors Matthias Suehring (suehring@muk.uni-hannover.de) # Tobias Gronemeier (gronemeier@muk.uni-hannover.de) # # @todo Add further feature tpyes for customized observations. At the moment only # timeSeries is possible. #--------------------------------------------------------------------------------# import netCDF4 from netCDF4 import Dataset, stringtochar import os import numpy as np # Function to read the config file def read_config_file(): import configparser import os import sys import json # Definition of global configuration parameters global global_acronym global global_author global global_campaign global global_comment global global_contact global global_data_content global global_dependencies global global_institution global global_keywords global global_location global global_references global global_site global global_source global global_palm_version global data_path global output_path global output_filename global number_positions global input_from_observations global coordinates global vars_to_be_measured global custom_coordinates global_acronym = " " global_author = " " global_campaign = " " global_comment = " " global_contact = " " global_data_content = " " global_dependencies = " " global_institution = " " global_keywords = " " global_location = " " global_references = " " global_site = " " global_source = " " global_palm_version = 6.0 data_path = " " output_path = " " output_filename = "none" number_positions = -999 input_from_observations = False coordinates = [] vars_to_be_measured = [] custom_coordinates = False # Check if configuration files exists and quit otherwise input_config = ".cvd.config.default" for i in range(1,len(sys.argv)): input_config = str(sys.argv[i]) # Allow empty settings config = configparser.RawConfigParser(allow_no_value=True) # Check if a config file exists. if ( os.path.isfile(input_config) == False ): print ("Error. No configuration file " + input_config + " found.") quit() config.read(input_config) for section in range( 0, len( config.sections() ) ): current_section = config.sections()[section] # read global attributes which are written into the output file header if ( current_section == 'global' ): global_acronym = config.get( current_section, 'acronym' ) global_author = config.get( current_section, 'author' ) global_campaign = config.get( current_section, 'campaign' ) global_comment = config.get( current_section, 'comment' ) global_contact = config.get( current_section, 'contact_person' ) global_data_content = config.get( current_section, 'data_content' ) global_dependencies = config.get( current_section, 'dependencies' ) global_institution = config.get( current_section, 'institution' ) global_keywords = config.get( current_section, 'keywords' ) global_location = config.get( current_section, 'location' ) global_references = config.get( current_section, 'references' ) global_site = config.get( current_section, 'site' ) global_source = config.get( current_section, 'source' ) global_palm_version = float( config.get( current_section, 'palm_version' ) ) # Read data input path for observational data elif ( current_section == 'input' ): data_path = config.get( current_section, 'data_path' ) input_from_observations = True # Read output path and filename for the VM driver elif ( current_section == 'output' ): output_path = config.get( current_section, 'output_path' ) output_filename = config.get( current_section, 'output_filename' ) # Read customized coordinates where virtual measurements shall be taken, # as well as the variables that should be sampled. elif ( current_section == 'custom_positions' ): number_positions = config.get( current_section, 'number_positions' ) for count in range( 0, int( number_positions ) ): coordinates.append( json.loads( config.get( current_section, \ "coordinates" + str( count + 1 ) ) ) ) # If coordinates are given, set a global flag. custom_coordinates = True for count in range( 0, int( number_positions ) ): vars_to_be_measured.append( json.loads( config.get( current_section, \ "vars_to_be_measured" + str( count + 1 ) ) ) ) return 0 #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Main program: #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Define strings name_featuretype = "featureType" name_ts = "timeSeries" name_traj = "trajectory" name_ntime = "ntime" name_time = "time" name_station = "station" name_traj_dim = "traj" name_nz = "nz" name_datacontent = "data_content" name_eutm = "E_UTM" name_nutm = "N_UTM" name_hao = "height_above_origin" name_station_h = "station_h" name_z = "z" name_soil_sampling = "soil_sample" name_num_stat = "number_of_stations" name_fill = "_FillValue" name_site = "site" name_orig_x = "origin_x" name_orig_y = "origin_y" name_orig_z = "origin_z" max_string_len = 50 name_measvars = "measured_variables" non_measurable_vars = ['station_name', 'time', 'time_bounds', 'crs', \ 'vrs', 'x', 'y', 'z', 'lon', 'lat', 'ntime', 'station', 'traj', \ 'E_UTM', 'N_UTM', 'height_above_origin', 'station_h', \ 'traj_name', 'height', 'band_pm_size', 'bands_pm', 'bands_pm_size_bounds' \ 'bands_pm_size', 'ancillary_detected_layer' ] soil_vars = [ 't_soil', 'm_soil', 'lwc', 'lwcs', 'smp' ] dims_out = [ name_eutm, name_nutm, name_hao, name_z, name_station_h ] # Define list of attributes which need to be of type float. In the data set this is not # necessarily guranteed. atts_float = [ 'origin_x', 'origin_y', 'origin_z', 'origin_lon', 'origin_lat', 'rotation_angle' ] # Define list of default variables that shall be measured at each site vars_default = [ 'u', 'v', 'w', 'theta', 'hus' ] #Read config file read_config_file() # Initialize counter variable for the number of sites num_sites = 0 # Set the output path for the data output_filename = output_path + output_filename # Open output file ncfile_out = Dataset( output_filename, "w", format="NETCDF4" ) # First, add global attributes ncfile_out.setncattr( 'acronym', global_acronym ) ncfile_out.setncattr( 'author', global_author ) ncfile_out.setncattr( 'campaign', global_campaign ) ncfile_out.setncattr( 'comment', global_comment ) ncfile_out.setncattr( 'contact_person', global_contact ) ncfile_out.setncattr( 'data_content', global_data_content ) ncfile_out.setncattr( 'dependencies', global_dependencies ) ncfile_out.setncattr( 'institution', global_institution ) ncfile_out.setncattr( 'keywords', global_keywords ) ncfile_out.setncattr( 'location', global_location ) ncfile_out.setncattr( 'references', global_references ) ncfile_out.setncattr( 'site', global_site ) ncfile_out.setncattr( 'source', global_source ) ncfile_out.setncattr( 'palm_version', global_palm_version ) # Create universal dimension for the string length. ncfile_out.createDimension("string_len", max_string_len) # Check if observational data is available. This case, # obtain an alphabetically sorted list of input data. List is sorted # just for the sake of clarity in the resulting setup file. if ( input_from_observations == True ): list_input_data = sorted( os.listdir( data_path ) ) if ( input_from_observations ): # Run loop over all subdirectories, detect the files and extract a list of sites. # This is done to reduce the number of virtual measurements in the model. Each # virtual measurement has an overhead and consumes memory. sites = [] for dirname in list_input_data: data_file = data_path + dirname # Directory may contain various file versions. # Take the one with highest cycle number. highest_cycle_nr = 0 for filename in os.listdir(data_file): start_seq = len( filename ) - 6 end_seq = len( filename ) - 3 if int( filename[start_seq:end_seq] ) > highest_cycle_nr: highest_cycle_nr = int(filename[start_seq:end_seq]) latest_file = filename # Open the NetCDF file input_file = data_file + "/" + latest_file ncfile_in = Dataset( input_file, "r", format="NETCDF4", encoding='ascii') # Read global attributes and write them immediately into the output file for att in ncfile_in.ncattrs(): if ( att == name_site ): site = ncfile_in.getncattr(att) if ( site not in sites ): sites.append(site) # Define a flag array that is used to identify whether site dimensions are already # defined or not. create_metadata_for_site = [None] * len(sites) # Define a nested list of default variables that shall be measured. Based on this list, # the final number of measured variables is determined. measured_variables_all_sites = [ ['u', 'v', 'w', 'theta', 'hus'] for var in range(0, len(sites))] # Run loop over all subdirectories that contain observational data for dirname in list_input_data: data_file = data_path + dirname # Directory may contain various file versions. # Take the one with highest cycle number. highest_cycle_nr = 0 for filename in os.listdir(data_file): start_seq = len( filename ) - 6 end_seq = len( filename ) - 3 if int( filename[start_seq:end_seq] ) > highest_cycle_nr: highest_cycle_nr = int(filename[start_seq:end_seq]) latest_file = filename # Open the NetCDF input file input_file = data_file + "/" + latest_file ncfile_in = Dataset( input_file, "r", format="NETCDF4", encoding='ascii' ) # Read site attribue first for att in ncfile_in.ncattrs(): if ( att == name_site ): site = ncfile_in.getncattr(att) # Determine index for the treated site num_vmeas = sites.index( site ) + 1 # Check whether metadata for this site has been already created if ( create_metadata_for_site[sites.index( site )] != "Done" ): # Read global attributes and write them immediately into the output file for att in ncfile_in.ncattrs(): if ( att == name_featuretype ): feature = ncfile_in.getncattr(att) if ( att == name_datacontent ): content = ncfile_in.getncattr(att) if ( att == name_site ): site = ncfile_in.getncattr(att) if ( att in atts_float ): ncfile_out.setncattr( att + str(num_vmeas), np.double(ncfile_in.getncattr(att)) ) else: ncfile_out.setncattr( att + str(num_vmeas), ncfile_in.getncattr(att) ) #timeSeries if ( feature == name_ts ): ntime = len( ncfile_in.dimensions[name_ntime] ) nstat = len( ncfile_in.dimensions[name_station] ) ncfile_out.createDimension( name_ntime + str(num_vmeas), ntime ) ncfile_out.createDimension( name_station + str(num_vmeas), nstat ) #trajectory elif ( feature == name_traj ): ntime = len( ncfile_in.dimensions[name_ntime] ) ntraj = len( ncfile_in.dimensions[name_traj_dim] ) ncfile_out.createDimension( name_ntime + str(num_vmeas), ntime ) ncfile_out.createDimension( name_traj_dim + str(num_vmeas), ntraj ) #timeseriesProfile else: ntime = len( ncfile_in.dimensions[name_ntime] ) nstat = len( ncfile_in.dimensions[name_station] ) nz = len( ncfile_in.dimensions[name_nz] ) ncfile_out.createDimension( name_ntime + str(num_vmeas), ntime ) ncfile_out.createDimension( name_station + str(num_vmeas), nstat ) ncfile_out.createDimension( name_nz + str(num_vmeas), nz ) for var in ncfile_in.variables.keys(): if ( var in dims_out ): # Create a variable and write it to file after it is read. In order to # avoid fill values in the dimensions, these are converted to zero # before written to file. Depending on the featureType of the measurement, # the array shape is different. For more informations, please see # [UC]2 data standard. # Timeseries if ( feature == name_ts ): temp_ts = ncfile_out.createVariable( var + str(num_vmeas), float, \ name_station + str(num_vmeas)) temp_ts[:] = np.nan_to_num( ncfile_in.variables[var][:] ) # Trajectories elif ( feature == name_traj ): temp_traj = ncfile_out.createVariable( var + str(num_vmeas), float, \ ( name_traj_dim + str(num_vmeas), \ name_ntime + str(num_vmeas) ) ) temp_traj[:,:] = np.nan_to_num( ncfile_in.variables[var][:,:] ) # TimeseriesProfiles else: if ( var == 'z' ): temp_pr = ncfile_out.createVariable( var + str(num_vmeas), float, \ ( name_station + str(num_vmeas), \ name_nz + str(num_vmeas) ) ) temp_pr[:] = np.nan_to_num( ncfile_in.variables[var][:,0,:] ) else: temp_pr = ncfile_out.createVariable( var + str(num_vmeas), float, \ name_station + str(num_vmeas)) temp_pr[:] = np.nan_to_num( ncfile_in.variables[var][:] ) # Search for variables to be measured. In case the variable isn't already defined, # append the variable to the list. for var in ncfile_in.variables.keys(): if ( var not in non_measurable_vars and \ var not in vars_default and \ var not in measured_variables_all_sites[sites.index( site )] ): measured_variables_all_sites[sites.index( site )].append(var) # Close the NetCDF input file ncfile_in.close() # Set flag to indicate that for this specific site dimensions have been # already created and attributes are already set. if ( create_metadata_for_site[sites.index( site )] != "Done" ): create_metadata_for_site[sites.index( site )] = "Done" # After variables are gathered and dimensions / attributes are already written to file, # the list of measured variables is written to file. for site in sites: num_vmeas = sites.index( site ) + 1 ncfile_out.createDimension( "nvar"+ str(num_vmeas), \ len( measured_variables_all_sites[sites.index( site )] ) ) measured = ncfile_out.createVariable( 'measured_variables' + str(num_vmeas), 'S1', \ ("nvar" + str(num_vmeas), "string_len")) # must be NC_CHAR for counter, meas in enumerate( measured_variables_all_sites[sites.index( site )] ): measured[counter] = stringtochar( np.array( meas,"S%s"%(max_string_len) ) ) # Check if any of the measured variables is a soil variable. Set flag accordingly. soil = False for var in measured_variables_all_sites[sites.index( site )]: if ( var in soil_vars ): soil = True # Write soil flag ncfile_out.setncattr( name_soil_sampling + str( num_vmeas), np.int8(soil) ) # Store the number of observational sites num_sites += len( sites ) # Now process the customized input data. Please note, at the moment only timeseries are # are possible. if ( custom_coordinates ): count_site = num_sites + 1 for coord in coordinates: # Define mandatory attributes ncfile_out.setncattr( name_featuretype + str(count_site), \ name_ts ) ncfile_out.setncattr( name_site + str(count_site), \ "custom" + str(count_site - num_sites) ) ncfile_out.setncattr( name_orig_x + str(count_site), \ coord[0] ) ncfile_out.setncattr( name_orig_y + str(count_site), \ coord[1] ) ncfile_out.setncattr( name_orig_z + str(count_site), \ 0.0 ) # Define dimensions ntime = 1 nstat = 1 ncfile_out.createDimension( name_ntime + str(count_site), ntime ) ncfile_out.createDimension( name_station + str(count_site), nstat ) # Define coordinate variables temp_ts = ncfile_out.createVariable( name_eutm + str(count_site), \ float, \ name_station + str(count_site) ) temp_ts[:] = np.array( coord[0] ) temp_ts = ncfile_out.createVariable( name_nutm + str(count_site), \ float, \ name_station + str(count_site) ) temp_ts[:] = np.array( coord[1] ) temp_ts = ncfile_out.createVariable( name_z + str(count_site), \ float, \ name_station + str(count_site) ) temp_ts[:] = np.array( coord[2] ) temp_ts = ncfile_out.createVariable( name_station_h + str(count_site), \ float, \ name_station + str(count_site) ) temp_ts[:] = np.array( 0.0 ) count_site += 1 # Reset counter variable count_site = num_sites + 1 # check if variables are prescribed. If so, prepare final output string # stored in measured_variables. if ( vars_to_be_measured ): for custom_vars in vars_to_be_measured: measured_variables = [] for var in vars_default: measured_variables.append(var) # Check if given variables are already in the default variables. # If not, extend. for var in custom_vars: if ( var not in measured_variables ): measured_variables.append(var) ncfile_out.createDimension( "nvar"+ str(count_site), \ len( measured_variables ) ) measured_var = ncfile_out.createVariable( 'measured_variables' + str(count_site), 'S1', \ ("nvar" + str(count_site), "string_len") ) # must be NC_CHAR # Write the variables to the file for counter, meas in enumerate( measured_variables ): measured_var[counter] = stringtochar( np.array( meas,"S%s"%(max_string_len) ) ) # Add soil attribute for the current measurement. soil = False if ( any( var == soil_vars for var in measured_variables) ): soil = True # Write soil flag ncfile_out.setncattr( name_soil_sampling + str( count_site), np.int8(soil) ) # Increment counter variable count_site += 1 del ( measured_variables[:] ) # Add the number of customized sites. num_sites += int( number_positions ) # Finally, write the total number of sites to the output file ncfile_out.setncattr( name_num_stat, num_sites ) print( "*** palm_cvd has been finished. You can find the output file under: " ) print( " " + output_filename ) quit()