source: palm/trunk/SCRIPTS/palm_cvd @ 4760

Last change on this file since 4760 was 4758, checked in by suehring, 4 years ago

script to create input file for virtual measurements - palm_cvd: In order to do not omit observations that are on the same site but have different oordinates or feature-types, process all files rather than only one and omit the rest

  • Property svn:executable set to *
  • Property svn:keywords set to ID
File size: 20.7 KB
RevLine 
[4400]1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4#--------------------------------------------------------------------------------#
5# This file is part of the PALM model system.
6#
7# PALM is free software: you can redistribute it and/or modify it under the terms
8# of the GNU General Public License as published by the Free Software Foundation,
9# either version 3 of the License, or (at your option) any later version.
10#
11# PALM is distributed in the hope that it will be useful, but WITHOUT ANY
12# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License along with
16# PALM. If not, see <http://www.gnu.org/licenses/>.
17#
18# Copyright 1997-2020  Leibniz Universitaet Hannover
19#--------------------------------------------------------------------------------#
20#
21# Current revisions:
22# -----------------
[4663]23#
24#
[4400]25# Former revisions:
26# -----------------
27# $Id$
[4758]28# In order to do not omit observations that are on the same site but have different
29# coordinates or feature-types, process all files rather than only one and omit
30# the rest.
31#
32# 4663 2020-09-02 14:54:09Z gronemeier
[4663]33# bugfix in non_measurable_vars; ignore station_h if featureType is trajectory
34#
35# 4400 2020-02-10 20:32:41Z suehring
[4400]36# Initial revision
37#
38# Description:
39# ------------
40# Processing tool for creating PIDS conform virtual measurement setup file
41# from UC2 data-standard conform observational data or from prescribed input
42# coordinates.
43#
44# @Authors Matthias Suehring (suehring@muk.uni-hannover.de)
45#          Tobias Gronemeier (gronemeier@muk.uni-hannover.de)
46#
47# @todo Add further feature tpyes for customized observations. At the moment only
48#       timeSeries is possible.
49#--------------------------------------------------------------------------------#
50
51
52import netCDF4
53from netCDF4 import Dataset, stringtochar
54import os
55import numpy as np
56
57
58# Function to read the config file
59def read_config_file():
60
61   import configparser
62   import os
63   import sys
64   import json
65
66   # Definition of global configuration parameters
[4663]67   global global_acronym
68   global global_author
69   global global_campaign
70   global global_comment
71   global global_contact
[4400]72   global global_data_content
73   global global_dependencies
[4663]74   global global_institution
75   global global_keywords
76   global global_location
77   global global_references
78   global global_site
79   global global_source
[4400]80   global global_palm_version
[4663]81   global data_path
82   global output_path
83   global output_filename
[4400]84   global number_positions
85   global input_from_observations
86   global coordinates
87   global vars_to_be_measured
88   global custom_coordinates
89
90   global_acronym          = " "
91   global_author           = " "
92   global_campaign         = " "
93   global_comment          = " "
94   global_contact          = " "
95   global_data_content     = " "
96   global_dependencies     = " "
97   global_institution      = " "
98   global_keywords         = " "
99   global_location         = " "
100   global_references       = " "
101   global_site             = " "
102   global_source           = " "
103   global_palm_version     = 6.0
104   data_path               = " "
105   output_path             = " "
106   output_filename         = "none"
107   number_positions        = -999
108   input_from_observations = False
109   coordinates             = []
110   vars_to_be_measured     = []
111   custom_coordinates      = False
112
113   # Check if configuration files exists and quit otherwise
114   input_config = ".cvd.config.default"
[4663]115   for i in range(1,len(sys.argv)):
[4400]116      input_config = str(sys.argv[i])
117
118   # Allow empty settings
119   config = configparser.RawConfigParser(allow_no_value=True)
120
121   # Check if a config file exists.
122   if ( os.path.isfile(input_config) == False ):
123      print ("Error. No configuration file " + input_config + " found.")
124      quit()
125
126   config.read(input_config)
[4663]127
[4400]128   for section in range( 0, len( config.sections() ) ):
129
130      current_section = config.sections()[section]
131
132      # read global attributes which are written into the output file header
133      if ( current_section == 'global' ):
134
135         global_acronym      = config.get( current_section, 'acronym'        )
136         global_author       = config.get( current_section, 'author'         )
137         global_campaign     = config.get( current_section, 'campaign'       )
138         global_comment      = config.get( current_section, 'comment'        )
139         global_contact      = config.get( current_section, 'contact_person' )
140         global_data_content = config.get( current_section, 'data_content'   )
141         global_dependencies = config.get( current_section, 'dependencies'   )
142         global_institution  = config.get( current_section, 'institution'    )
143         global_keywords     = config.get( current_section, 'keywords'       )
144         global_location     = config.get( current_section, 'location'       )
145         global_references   = config.get( current_section, 'references'     )
146         global_site         = config.get( current_section, 'site'           )
147         global_source       = config.get( current_section, 'source'         )
148         global_palm_version = float( config.get( current_section, 'palm_version' ) )
149
150      # Read data input path for observational data
151      elif ( current_section == 'input' ):
152
153         data_path = config.get( current_section, 'data_path' )
154         input_from_observations = True
155
156      # Read output path and filename for the VM driver
157      elif ( current_section == 'output' ):
158
159         output_path     = config.get( current_section, 'output_path' )
160         output_filename = config.get( current_section, 'output_filename' )
161
[4663]162      # Read customized coordinates where virtual measurements shall be taken,
[4400]163      # as well as the variables that should be sampled.
164      elif ( current_section == 'custom_positions' ):
165
166         number_positions = config.get( current_section, 'number_positions' )
167
168         for count in range( 0, int( number_positions ) ):
169            coordinates.append( json.loads( config.get( current_section, \
170                                                        "coordinates" + str( count + 1 ) ) ) )
171            # If coordinates are given, set a global flag.
172            custom_coordinates = True
173
174         for count in range( 0, int( number_positions ) ):
175            vars_to_be_measured.append( json.loads( config.get( current_section, \
176                                                    "vars_to_be_measured" + str( count + 1 ) ) ) )
177
178
179   return 0
180
181#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
182# Main program:
183#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
184
185
186# Define strings
187name_featuretype   = "featureType"
188name_ts            = "timeSeries"
189name_traj          = "trajectory"
190name_ntime         = "ntime"
191name_time          = "time"
192name_station       = "station"
193name_traj_dim      = "traj"
194name_nz            = "nz"
195name_datacontent   = "data_content"
196name_eutm          = "E_UTM"
197name_nutm          = "N_UTM"
198name_hao           = "height_above_origin"
199name_station_h     = "station_h"
200name_z             = "z"
201name_soil_sampling = "soil_sample"
202name_num_stat      = "number_of_stations"
203name_fill          = "_FillValue"
204name_site          = "site"
[4758]205name_acro          = "acronym"
206name_content       = "data_content"
[4400]207name_orig_x        = "origin_x"
208name_orig_y        = "origin_y"
209name_orig_z        = "origin_z"
210
211max_string_len     = 50
212
213name_measvars      = "measured_variables"
214
215non_measurable_vars = ['station_name', 'time', 'time_bounds', 'crs', \
216                       'vrs', 'x', 'y', 'z', 'lon', 'lat', 'ntime', 'station', 'traj', \
217                       'E_UTM', 'N_UTM', 'height_above_origin', 'station_h', \
[4663]218                       'traj_name', 'height', 'band_pm_size', 'bands_pm', 'bands_pm_size_bounds', \
[4400]219                       'bands_pm_size', 'ancillary_detected_layer' ]
220
221soil_vars            = [ 't_soil', 'm_soil', 'lwc', 'lwcs', 'smp' ]
222
223dims_out             = [ name_eutm, name_nutm, name_hao, name_z, name_station_h ]
224
[4663]225# Define list of attributes which need to be of type float. In the data set this is not
[4400]226# necessarily guranteed.
227atts_float           = [ 'origin_x', 'origin_y', 'origin_z', 'origin_lon', 'origin_lat', 'rotation_angle' ]
228
229# Define list of default variables that shall be measured at each site
230vars_default         = [ 'u', 'v', 'w', 'theta', 'hus' ]
231
232
233#Read config file
234read_config_file()
235
236# Initialize counter variable for the number of sites
237num_sites = 0
238
239# Set the output path for the data
240output_filename = output_path + output_filename
241
242# Open output file
243ncfile_out = Dataset( output_filename, "w", format="NETCDF4" )
244
245# First, add global attributes
246ncfile_out.setncattr( 'acronym',        global_acronym      )
247ncfile_out.setncattr( 'author',         global_author       )
248ncfile_out.setncattr( 'campaign',       global_campaign     )
249ncfile_out.setncattr( 'comment',        global_comment      )
250ncfile_out.setncattr( 'contact_person', global_contact      )
251ncfile_out.setncattr( 'data_content',   global_data_content )
252ncfile_out.setncattr( 'dependencies',   global_dependencies )
253ncfile_out.setncattr( 'institution',    global_institution  )
254ncfile_out.setncattr( 'keywords',       global_keywords     )
255ncfile_out.setncattr( 'location',       global_location     )
256ncfile_out.setncattr( 'references',     global_references   )
257ncfile_out.setncattr( 'site',           global_site         )
258ncfile_out.setncattr( 'source',         global_source       )
259ncfile_out.setncattr( 'palm_version',   global_palm_version )
260
261# Create universal dimension for the string length.
262ncfile_out.createDimension("string_len", max_string_len)
263
264
[4663]265# Check if observational data is available. This case,
266# obtain an alphabetically sorted list of input data. List is sorted
[4400]267# just for the sake of clarity in the resulting setup file.
268if ( input_from_observations == True ):
269   list_input_data = sorted( os.listdir( data_path ) )
270
271if ( input_from_observations ):
272
273   # Run loop over all subdirectories, detect the files and extract a list of sites.
274   # This is done to reduce the number of virtual measurements in the model. Each
275   # virtual measurement has an overhead and consumes memory.
276   sites = []
[4758]277   input_files = []
[4400]278   for dirname in list_input_data:
279       data_file = data_path + dirname
280
281       # Directory may contain various file versions.
282       # Take the one with highest cycle number.
283       highest_cycle_nr = 0
284       for filename in os.listdir(data_file):
285          start_seq = len( filename ) - 6
286          end_seq   = len( filename ) - 3
287          if int( filename[start_seq:end_seq] ) > highest_cycle_nr:
288             highest_cycle_nr = int(filename[start_seq:end_seq])
289             latest_file      = filename
290
291       # Open the NetCDF file
292       input_file = data_file + "/" + latest_file
293       ncfile_in = Dataset( input_file, "r", format="NETCDF4", encoding='ascii')
[4758]294       input_files.append(input_file)
[4400]295
296   # Define a nested list of default variables that shall be measured. Based on this list,
297   # the final number of measured variables is determined.
[4758]298   measured_variables_all_sites = [ ['u', 'v', 'w', 'theta', 'hus'] for var in range(0, len(input_files))]
[4400]299
300   # Run loop over all subdirectories that contain observational data
[4758]301   for counter, file in enumerate(input_files):
[4400]302
303      # Open the NetCDF input file
[4758]304      input_file = input_files[counter]
[4400]305      ncfile_in = Dataset( input_file, "r", format="NETCDF4", encoding='ascii' )
306
[4758]307      # Determine index for the treated site
308      num_vmeas = input_files.index( input_file ) + 1
309      print( counter, num_vmeas )
310
311      # Read global attributes and write them immediately into the output file
[4400]312      for att in ncfile_in.ncattrs():
[4758]313         if ( att == name_featuretype ):
314            feature = ncfile_in.getncattr(att)
315         if ( att == name_datacontent ):
316            content = ncfile_in.getncattr(att)
[4400]317         if ( att == name_site ):
318            site = ncfile_in.getncattr(att)
319
[4758]320         if ( att in atts_float ):
321            ncfile_out.setncattr( att + str(num_vmeas), np.double(ncfile_in.getncattr(att)) )
322         else:
323            ncfile_out.setncattr( att + str(num_vmeas), ncfile_in.getncattr(att) )
[4400]324
[4758]325      #timeSeries
326      if ( feature == name_ts ):
327         ntime = len( ncfile_in.dimensions[name_ntime]   )
328         nstat = len( ncfile_in.dimensions[name_station] )
329         ncfile_out.createDimension( name_ntime   + str(num_vmeas), ntime )
330         ncfile_out.createDimension( name_station + str(num_vmeas), nstat )
[4400]331
[4758]332      #trajectory
333      elif ( feature == name_traj ):
334         ntime = len( ncfile_in.dimensions[name_ntime]   )
335         ntraj = len( ncfile_in.dimensions[name_traj_dim] )
336         ncfile_out.createDimension( name_ntime    + str(num_vmeas), ntime )
337         ncfile_out.createDimension( name_traj_dim + str(num_vmeas), ntraj )
[4400]338
[4758]339      #timeseriesProfile
340      else:
341         ntime = len( ncfile_in.dimensions[name_ntime]   )
342         nstat = len( ncfile_in.dimensions[name_station] )
343         nz    = len( ncfile_in.dimensions[name_nz]      )
344         ncfile_out.createDimension( name_ntime   + str(num_vmeas), ntime )
345         ncfile_out.createDimension( name_station + str(num_vmeas), nstat )
346         ncfile_out.createDimension( name_nz      + str(num_vmeas), nz    )
[4400]347
[4758]348      for var in ncfile_in.variables.keys():
349         if ( var in dims_out ):
350            # Create a variable and write it to file after it is read. In order to
351            # avoid fill values in the dimensions, these are converted to zero
352            # before written to file. Depending on the featureType of the measurement,
353            # the array shape is different. For more informations, please see
354            # [UC]2 data standard.
355            # Timeseries
356            if ( feature == name_ts  ):
357               temp_ts = ncfile_out.createVariable( var + str(num_vmeas), float, \
358                                                    name_station + str(num_vmeas))
359               temp_ts[:] = np.nan_to_num( ncfile_in.variables[var][:] )
[4400]360
[4758]361            # Trajectories
362            elif ( feature == name_traj ):
363               # @note: If there are files where station_h is present although featureType is
364               #        trajectory, station_h must not be read.
365               if var != name_station_h:
366                  temp_traj = ncfile_out.createVariable( var + str(num_vmeas), float, \
367                                                         ( name_traj_dim + str(num_vmeas), \
368                                                           name_ntime + str(num_vmeas) ) )
369                  temp_traj[:,:] = np.nan_to_num( ncfile_in.variables[var][:,:] )
[4400]370
[4758]371            # TimeseriesProfiles
372            else:
373               if ( var == 'z' ):
374                  temp_pr = ncfile_out.createVariable( var + str(num_vmeas), float, \
375                                                      ( name_station + str(num_vmeas), \
376                                                        name_nz + str(num_vmeas) ) )
377                  temp_pr[:] = np.nan_to_num( ncfile_in.variables[var][:,0,:] )
378               else:
379                  temp_pr = ncfile_out.createVariable( var + str(num_vmeas), float, \
[4400]380                                                       name_station + str(num_vmeas))
[4758]381                  temp_pr[:] = np.nan_to_num( ncfile_in.variables[var][:] )
[4400]382
383      # Search for variables to be measured. In case the variable isn't already defined,
384      # append the variable to the list.
385      for var in ncfile_in.variables.keys():
386         if ( var not in non_measurable_vars  and  \
387              var not in vars_default         and  \
[4758]388              var not in measured_variables_all_sites[input_files.index( input_file )] ):
[4400]389
[4758]390            measured_variables_all_sites[input_files.index( input_file )].append(var)
[4400]391
392      # Close the NetCDF input file
393      ncfile_in.close()
394
395   # After variables are gathered and dimensions / attributes are already written to file,
396   # the list of measured variables is written to file.
[4758]397   for site in input_files:
[4400]398
[4758]399      num_vmeas = input_files.index( site ) + 1
[4400]400
401      ncfile_out.createDimension( "nvar"+ str(num_vmeas), \
[4758]402                                  len( measured_variables_all_sites[input_files.index( site )] ) )
[4400]403
404      measured = ncfile_out.createVariable( 'measured_variables' + str(num_vmeas), 'S1', \
405                                            ("nvar" + str(num_vmeas), "string_len")) # must be NC_CHAR
406
[4758]407      for counter, meas in enumerate( measured_variables_all_sites[input_files.index( site )] ):
[4400]408         measured[counter] = stringtochar( np.array( meas,"S%s"%(max_string_len) ) )
409
410      # Check if any of the measured variables is a soil variable. Set flag accordingly.
411      soil = False
[4758]412      for var in measured_variables_all_sites[input_files.index( site )]:
[4400]413         if ( var in soil_vars ):
414            soil = True
415
416      # Write soil flag
417      ncfile_out.setncattr( name_soil_sampling + str( num_vmeas), np.int8(soil) )
418
419
420
421    # Store the number of observational sites
[4758]422   num_sites += len( input_files )
[4400]423
424
425#  Now process the customized input data. Please note, at the moment only timeseries are
426# are possible.
427if ( custom_coordinates ):
428
429   count_site = num_sites + 1
430   for coord in coordinates:
431      # Define mandatory attributes
432      ncfile_out.setncattr( name_featuretype + str(count_site),  \
433                            name_ts )
434      ncfile_out.setncattr( name_site        + str(count_site),  \
435                            "custom"         + str(count_site - num_sites) )
436      ncfile_out.setncattr( name_orig_x      + str(count_site),  \
437                            coord[0] )
438      ncfile_out.setncattr( name_orig_y      + str(count_site),  \
439                            coord[1] )
440      ncfile_out.setncattr( name_orig_z      + str(count_site),  \
441                            0.0 )
442
443      # Define dimensions
444      ntime = 1
445      nstat = 1
446      ncfile_out.createDimension( name_ntime   + str(count_site), ntime )
447      ncfile_out.createDimension( name_station + str(count_site), nstat )
448
449      # Define coordinate variables
450      temp_ts = ncfile_out.createVariable( name_eutm      + str(count_site), \
451                                           float,                            \
452                                           name_station   + str(count_site) )
453      temp_ts[:] = np.array( coord[0] )
454
455      temp_ts = ncfile_out.createVariable( name_nutm      + str(count_site), \
456                                           float,                            \
457                                           name_station   + str(count_site) )
458      temp_ts[:] = np.array( coord[1] )
459
460      temp_ts = ncfile_out.createVariable( name_z         + str(count_site), \
461                                           float,                            \
462                                           name_station   + str(count_site) )
463      temp_ts[:] = np.array( coord[2] )
464
465      temp_ts = ncfile_out.createVariable( name_station_h + str(count_site), \
466                                           float,                            \
467                                           name_station   + str(count_site) )
468      temp_ts[:] = np.array( 0.0 )
469
470
471      count_site += 1
472
473   # Reset counter variable
474   count_site = num_sites + 1
475
476   # check if variables are prescribed. If so, prepare final output string
477   # stored in measured_variables.
478   if ( vars_to_be_measured ):
479
480      for custom_vars in vars_to_be_measured:
481
482         measured_variables = []
483         for var in vars_default:
484            measured_variables.append(var)
485
[4663]486         # Check if given variables are already in the default variables.
[4400]487         # If not, extend.
488         for var in custom_vars:
489             if ( var  not in  measured_variables ):
490
491                measured_variables.append(var)
492
493         ncfile_out.createDimension( "nvar"+ str(count_site), \
494                                     len( measured_variables ) )
495
496         measured_var = ncfile_out.createVariable( 'measured_variables' + str(count_site), 'S1', \
497                                                  ("nvar" + str(count_site), "string_len") ) # must be NC_CHAR
498
499         # Write the variables to the file
500         for counter, meas in enumerate( measured_variables ):
501            measured_var[counter] = stringtochar( np.array( meas,"S%s"%(max_string_len) ) )
502
503         # Add soil attribute for the current measurement.
504         soil = False
505         if ( any( var == soil_vars for var in measured_variables) ):
506            soil = True
507
508         # Write soil flag
509         ncfile_out.setncattr( name_soil_sampling + str( count_site), np.int8(soil) )
510
511         # Increment counter variable
512         count_site += 1
513
514         del ( measured_variables[:] )
515
516   # Add the number of customized sites.
517   num_sites += int( number_positions )
518
519
520# Finally, write the total number of sites to the output file
521ncfile_out.setncattr( name_num_stat, num_sites )
522
523
524print( "*** palm_cvd has been finished. You can find the output file under: " )
525print( "    " + output_filename )
526
527quit()
Note: See TracBrowser for help on using the repository browser.