source: palm/trunk/SCRIPTS/palm_cvd @ 4679

Last change on this file since 4679 was 4663, checked in by gronemeier, 4 years ago

bugfix in non_measurable_vars; ignore station_h if featureType is trajectory (palm_cvd)

  • Property svn:executable set to *
  • Property svn:keywords set to ID
File size: 21.9 KB
RevLine 
[4400]1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4#--------------------------------------------------------------------------------#
5# This file is part of the PALM model system.
6#
7# PALM is free software: you can redistribute it and/or modify it under the terms
8# of the GNU General Public License as published by the Free Software Foundation,
9# either version 3 of the License, or (at your option) any later version.
10#
11# PALM is distributed in the hope that it will be useful, but WITHOUT ANY
12# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License along with
16# PALM. If not, see <http://www.gnu.org/licenses/>.
17#
18# Copyright 1997-2020  Leibniz Universitaet Hannover
19#--------------------------------------------------------------------------------#
20#
21# Current revisions:
22# -----------------
[4663]23#
24#
[4400]25# Former revisions:
26# -----------------
27# $Id$
[4663]28# bugfix in non_measurable_vars; ignore station_h if featureType is trajectory
29#
30# 4400 2020-02-10 20:32:41Z suehring
[4400]31# Initial revision
32#
33# Description:
34# ------------
35# Processing tool for creating PIDS conform virtual measurement setup file
36# from UC2 data-standard conform observational data or from prescribed input
37# coordinates.
38#
39# @Authors Matthias Suehring (suehring@muk.uni-hannover.de)
40#          Tobias Gronemeier (gronemeier@muk.uni-hannover.de)
41#
42# @todo Add further feature tpyes for customized observations. At the moment only
43#       timeSeries is possible.
44#--------------------------------------------------------------------------------#
45
46
47import netCDF4
48from netCDF4 import Dataset, stringtochar
49import os
50import numpy as np
51
52
53# Function to read the config file
54def read_config_file():
55
56   import configparser
57   import os
58   import sys
59   import json
60
61   # Definition of global configuration parameters
[4663]62   global global_acronym
63   global global_author
64   global global_campaign
65   global global_comment
66   global global_contact
[4400]67   global global_data_content
68   global global_dependencies
[4663]69   global global_institution
70   global global_keywords
71   global global_location
72   global global_references
73   global global_site
74   global global_source
[4400]75   global global_palm_version
[4663]76   global data_path
77   global output_path
78   global output_filename
[4400]79   global number_positions
80   global input_from_observations
81   global coordinates
82   global vars_to_be_measured
83   global custom_coordinates
84
85   global_acronym          = " "
86   global_author           = " "
87   global_campaign         = " "
88   global_comment          = " "
89   global_contact          = " "
90   global_data_content     = " "
91   global_dependencies     = " "
92   global_institution      = " "
93   global_keywords         = " "
94   global_location         = " "
95   global_references       = " "
96   global_site             = " "
97   global_source           = " "
98   global_palm_version     = 6.0
99   data_path               = " "
100   output_path             = " "
101   output_filename         = "none"
102   number_positions        = -999
103   input_from_observations = False
104   coordinates             = []
105   vars_to_be_measured     = []
106   custom_coordinates      = False
107
108   # Check if configuration files exists and quit otherwise
109   input_config = ".cvd.config.default"
[4663]110   for i in range(1,len(sys.argv)):
[4400]111      input_config = str(sys.argv[i])
112
113   # Allow empty settings
114   config = configparser.RawConfigParser(allow_no_value=True)
115
116   # Check if a config file exists.
117   if ( os.path.isfile(input_config) == False ):
118      print ("Error. No configuration file " + input_config + " found.")
119      quit()
120
121   config.read(input_config)
[4663]122
[4400]123   for section in range( 0, len( config.sections() ) ):
124
125      current_section = config.sections()[section]
126
127      # read global attributes which are written into the output file header
128      if ( current_section == 'global' ):
129
130         global_acronym      = config.get( current_section, 'acronym'        )
131         global_author       = config.get( current_section, 'author'         )
132         global_campaign     = config.get( current_section, 'campaign'       )
133         global_comment      = config.get( current_section, 'comment'        )
134         global_contact      = config.get( current_section, 'contact_person' )
135         global_data_content = config.get( current_section, 'data_content'   )
136         global_dependencies = config.get( current_section, 'dependencies'   )
137         global_institution  = config.get( current_section, 'institution'    )
138         global_keywords     = config.get( current_section, 'keywords'       )
139         global_location     = config.get( current_section, 'location'       )
140         global_references   = config.get( current_section, 'references'     )
141         global_site         = config.get( current_section, 'site'           )
142         global_source       = config.get( current_section, 'source'         )
143         global_palm_version = float( config.get( current_section, 'palm_version' ) )
144
145      # Read data input path for observational data
146      elif ( current_section == 'input' ):
147
148         data_path = config.get( current_section, 'data_path' )
149         input_from_observations = True
150
151      # Read output path and filename for the VM driver
152      elif ( current_section == 'output' ):
153
154         output_path     = config.get( current_section, 'output_path' )
155         output_filename = config.get( current_section, 'output_filename' )
156
[4663]157      # Read customized coordinates where virtual measurements shall be taken,
[4400]158      # as well as the variables that should be sampled.
159      elif ( current_section == 'custom_positions' ):
160
161         number_positions = config.get( current_section, 'number_positions' )
162
163         for count in range( 0, int( number_positions ) ):
164            coordinates.append( json.loads( config.get( current_section, \
165                                                        "coordinates" + str( count + 1 ) ) ) )
166            # If coordinates are given, set a global flag.
167            custom_coordinates = True
168
169         for count in range( 0, int( number_positions ) ):
170            vars_to_be_measured.append( json.loads( config.get( current_section, \
171                                                    "vars_to_be_measured" + str( count + 1 ) ) ) )
172
173
174   return 0
175
176#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
177# Main program:
178#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
179
180
181# Define strings
182name_featuretype   = "featureType"
183name_ts            = "timeSeries"
184name_traj          = "trajectory"
185name_ntime         = "ntime"
186name_time          = "time"
187name_station       = "station"
188name_traj_dim      = "traj"
189name_nz            = "nz"
190name_datacontent   = "data_content"
191name_eutm          = "E_UTM"
192name_nutm          = "N_UTM"
193name_hao           = "height_above_origin"
194name_station_h     = "station_h"
195name_z             = "z"
196name_soil_sampling = "soil_sample"
197name_num_stat      = "number_of_stations"
198name_fill          = "_FillValue"
199name_site          = "site"
200name_orig_x        = "origin_x"
201name_orig_y        = "origin_y"
202name_orig_z        = "origin_z"
203
204max_string_len     = 50
205
206name_measvars      = "measured_variables"
207
208non_measurable_vars = ['station_name', 'time', 'time_bounds', 'crs', \
209                       'vrs', 'x', 'y', 'z', 'lon', 'lat', 'ntime', 'station', 'traj', \
210                       'E_UTM', 'N_UTM', 'height_above_origin', 'station_h', \
[4663]211                       'traj_name', 'height', 'band_pm_size', 'bands_pm', 'bands_pm_size_bounds', \
[4400]212                       'bands_pm_size', 'ancillary_detected_layer' ]
213
214soil_vars            = [ 't_soil', 'm_soil', 'lwc', 'lwcs', 'smp' ]
215
216dims_out             = [ name_eutm, name_nutm, name_hao, name_z, name_station_h ]
217
[4663]218# Define list of attributes which need to be of type float. In the data set this is not
[4400]219# necessarily guranteed.
220atts_float           = [ 'origin_x', 'origin_y', 'origin_z', 'origin_lon', 'origin_lat', 'rotation_angle' ]
221
222# Define list of default variables that shall be measured at each site
223vars_default         = [ 'u', 'v', 'w', 'theta', 'hus' ]
224
225
226#Read config file
227read_config_file()
228
229# Initialize counter variable for the number of sites
230num_sites = 0
231
232# Set the output path for the data
233output_filename = output_path + output_filename
234
235# Open output file
236ncfile_out = Dataset( output_filename, "w", format="NETCDF4" )
237
238# First, add global attributes
239ncfile_out.setncattr( 'acronym',        global_acronym      )
240ncfile_out.setncattr( 'author',         global_author       )
241ncfile_out.setncattr( 'campaign',       global_campaign     )
242ncfile_out.setncattr( 'comment',        global_comment      )
243ncfile_out.setncattr( 'contact_person', global_contact      )
244ncfile_out.setncattr( 'data_content',   global_data_content )
245ncfile_out.setncattr( 'dependencies',   global_dependencies )
246ncfile_out.setncattr( 'institution',    global_institution  )
247ncfile_out.setncattr( 'keywords',       global_keywords     )
248ncfile_out.setncattr( 'location',       global_location     )
249ncfile_out.setncattr( 'references',     global_references   )
250ncfile_out.setncattr( 'site',           global_site         )
251ncfile_out.setncattr( 'source',         global_source       )
252ncfile_out.setncattr( 'palm_version',   global_palm_version )
253
254# Create universal dimension for the string length.
255ncfile_out.createDimension("string_len", max_string_len)
256
257
[4663]258# Check if observational data is available. This case,
259# obtain an alphabetically sorted list of input data. List is sorted
[4400]260# just for the sake of clarity in the resulting setup file.
261if ( input_from_observations == True ):
262   list_input_data = sorted( os.listdir( data_path ) )
263
264if ( input_from_observations ):
265
266   # Run loop over all subdirectories, detect the files and extract a list of sites.
267   # This is done to reduce the number of virtual measurements in the model. Each
268   # virtual measurement has an overhead and consumes memory.
269   sites = []
270   for dirname in list_input_data:
271       data_file = data_path + dirname
272
273       # Directory may contain various file versions.
274       # Take the one with highest cycle number.
275       highest_cycle_nr = 0
276       for filename in os.listdir(data_file):
277          start_seq = len( filename ) - 6
278          end_seq   = len( filename ) - 3
279          if int( filename[start_seq:end_seq] ) > highest_cycle_nr:
280             highest_cycle_nr = int(filename[start_seq:end_seq])
281             latest_file      = filename
282
283       # Open the NetCDF file
284       input_file = data_file + "/" + latest_file
285       ncfile_in = Dataset( input_file, "r", format="NETCDF4", encoding='ascii')
286
287       # Read global attributes and write them immediately into the output file
288       for att in ncfile_in.ncattrs():
289          if ( att == name_site ):
290             site = ncfile_in.getncattr(att)
291
292       if ( site not in sites ):
293          sites.append(site)
294
295   # Define a flag array that is used to identify whether site dimensions are already
296   # defined or not.
297   create_metadata_for_site = [None] * len(sites)
298
299   # Define a nested list of default variables that shall be measured. Based on this list,
300   # the final number of measured variables is determined.
301   measured_variables_all_sites = [ ['u', 'v', 'w', 'theta', 'hus'] for var in range(0, len(sites))]
302
303   # Run loop over all subdirectories that contain observational data
304   for dirname in list_input_data:
305      data_file = data_path + dirname
306
307      # Directory may contain various file versions.
308      # Take the one with highest cycle number.
309      highest_cycle_nr = 0
310      for filename in os.listdir(data_file):
311         start_seq = len( filename ) - 6
312         end_seq   = len( filename ) - 3
313         if int( filename[start_seq:end_seq] ) > highest_cycle_nr:
314            highest_cycle_nr = int(filename[start_seq:end_seq])
315            latest_file      = filename
316
317      # Open the NetCDF input file
318      input_file = data_file + "/" + latest_file
319      ncfile_in = Dataset( input_file, "r", format="NETCDF4", encoding='ascii' )
320
321      # Read site attribue first
322      for att in ncfile_in.ncattrs():
323         if ( att == name_site ):
324            site = ncfile_in.getncattr(att)
325
326      # Determine index for the treated site
327      num_vmeas = sites.index( site ) + 1
328
329      # Check whether metadata for this site has been already created
330      if ( create_metadata_for_site[sites.index( site )] != "Done" ):
331
332         # Read global attributes and write them immediately into the output file
333         for att in ncfile_in.ncattrs():
334            if ( att == name_featuretype ):
335               feature = ncfile_in.getncattr(att)
336            if ( att == name_datacontent ):
337               content = ncfile_in.getncattr(att)
338            if ( att == name_site ):
339               site = ncfile_in.getncattr(att)
340
341            if ( att in atts_float ):
342               ncfile_out.setncattr( att + str(num_vmeas), np.double(ncfile_in.getncattr(att)) )
343            else:
344               ncfile_out.setncattr( att + str(num_vmeas), ncfile_in.getncattr(att) )
345
346         #timeSeries
347         if ( feature == name_ts ):
348            ntime = len( ncfile_in.dimensions[name_ntime]   )
349            nstat = len( ncfile_in.dimensions[name_station] )
350            ncfile_out.createDimension( name_ntime   + str(num_vmeas), ntime )
351            ncfile_out.createDimension( name_station + str(num_vmeas), nstat )
352
353         #trajectory
354         elif ( feature == name_traj ):
355            ntime = len( ncfile_in.dimensions[name_ntime]   )
356            ntraj = len( ncfile_in.dimensions[name_traj_dim] )
357            ncfile_out.createDimension( name_ntime    + str(num_vmeas), ntime )
358            ncfile_out.createDimension( name_traj_dim + str(num_vmeas), ntraj )
359
360         #timeseriesProfile
361         else:
362            ntime = len( ncfile_in.dimensions[name_ntime]   )
363            nstat = len( ncfile_in.dimensions[name_station] )
364            nz    = len( ncfile_in.dimensions[name_nz]      )
365            ncfile_out.createDimension( name_ntime   + str(num_vmeas), ntime )
366            ncfile_out.createDimension( name_station + str(num_vmeas), nstat )
367            ncfile_out.createDimension( name_nz      + str(num_vmeas), nz    )
368
369         for var in ncfile_in.variables.keys():
370            if ( var in dims_out ):
[4663]371               # Create a variable and write it to file after it is read. In order to
372               # avoid fill values in the dimensions, these are converted to zero
[4400]373               # before written to file. Depending on the featureType of the measurement,
[4663]374               # the array shape is different. For more informations, please see
[4400]375               # [UC]2 data standard.
376               # Timeseries
[4663]377               if ( feature == name_ts  ):
[4400]378                  temp_ts = ncfile_out.createVariable( var + str(num_vmeas), float, \
379                                                       name_station + str(num_vmeas))
380                  temp_ts[:] = np.nan_to_num( ncfile_in.variables[var][:] )
381
382               # Trajectories
383               elif ( feature == name_traj ):
[4663]384                  # @note: If there are files where station_h is present although featureType is
385                  #        trajectory, station_h must not be read.
386                  if var != name_station_h:
387                     temp_traj = ncfile_out.createVariable( var + str(num_vmeas), float, \
388                                                            ( name_traj_dim + str(num_vmeas), \
389                                                              name_ntime + str(num_vmeas) ) )
390                     temp_traj[:,:] = np.nan_to_num( ncfile_in.variables[var][:,:] )
[4400]391
392               # TimeseriesProfiles
393               else:
394                  if ( var == 'z' ):
395                     temp_pr = ncfile_out.createVariable( var + str(num_vmeas), float, \
396                                                         ( name_station + str(num_vmeas), \
397                                                           name_nz + str(num_vmeas) ) )
398                     temp_pr[:] = np.nan_to_num( ncfile_in.variables[var][:,0,:] )
399                  else:
400                     temp_pr = ncfile_out.createVariable( var + str(num_vmeas), float, \
401                                                          name_station + str(num_vmeas))
402                     temp_pr[:] = np.nan_to_num( ncfile_in.variables[var][:] )
403
404      # Search for variables to be measured. In case the variable isn't already defined,
405      # append the variable to the list.
406      for var in ncfile_in.variables.keys():
407         if ( var not in non_measurable_vars  and  \
408              var not in vars_default         and  \
409              var not in measured_variables_all_sites[sites.index( site )] ):
410
411            measured_variables_all_sites[sites.index( site )].append(var)
412
413      # Close the NetCDF input file
414      ncfile_in.close()
415
[4663]416      # Set flag to indicate that for this specific site dimensions have been
[4400]417      # already created and attributes are already set.
418      if ( create_metadata_for_site[sites.index( site )] != "Done" ):
419         create_metadata_for_site[sites.index( site )] = "Done"
420
421   # After variables are gathered and dimensions / attributes are already written to file,
422   # the list of measured variables is written to file.
423   for site in sites:
424
425      num_vmeas = sites.index( site ) + 1
426
427      ncfile_out.createDimension( "nvar"+ str(num_vmeas), \
428                                  len( measured_variables_all_sites[sites.index( site )] ) )
429
430      measured = ncfile_out.createVariable( 'measured_variables' + str(num_vmeas), 'S1', \
431                                            ("nvar" + str(num_vmeas), "string_len")) # must be NC_CHAR
432
433      for counter, meas in enumerate( measured_variables_all_sites[sites.index( site )] ):
434         measured[counter] = stringtochar( np.array( meas,"S%s"%(max_string_len) ) )
435
436      # Check if any of the measured variables is a soil variable. Set flag accordingly.
437      soil = False
438      for var in measured_variables_all_sites[sites.index( site )]:
439         if ( var in soil_vars ):
440            soil = True
441
442      # Write soil flag
443      ncfile_out.setncattr( name_soil_sampling + str( num_vmeas), np.int8(soil) )
444
445
446
447    # Store the number of observational sites
448   num_sites += len( sites )
449
450
451#  Now process the customized input data. Please note, at the moment only timeseries are
452# are possible.
453if ( custom_coordinates ):
454
455   count_site = num_sites + 1
456   for coord in coordinates:
457      # Define mandatory attributes
458      ncfile_out.setncattr( name_featuretype + str(count_site),  \
459                            name_ts )
460      ncfile_out.setncattr( name_site        + str(count_site),  \
461                            "custom"         + str(count_site - num_sites) )
462      ncfile_out.setncattr( name_orig_x      + str(count_site),  \
463                            coord[0] )
464      ncfile_out.setncattr( name_orig_y      + str(count_site),  \
465                            coord[1] )
466      ncfile_out.setncattr( name_orig_z      + str(count_site),  \
467                            0.0 )
468
469      # Define dimensions
470      ntime = 1
471      nstat = 1
472      ncfile_out.createDimension( name_ntime   + str(count_site), ntime )
473      ncfile_out.createDimension( name_station + str(count_site), nstat )
474
475      # Define coordinate variables
476      temp_ts = ncfile_out.createVariable( name_eutm      + str(count_site), \
477                                           float,                            \
478                                           name_station   + str(count_site) )
479      temp_ts[:] = np.array( coord[0] )
480
481      temp_ts = ncfile_out.createVariable( name_nutm      + str(count_site), \
482                                           float,                            \
483                                           name_station   + str(count_site) )
484      temp_ts[:] = np.array( coord[1] )
485
486      temp_ts = ncfile_out.createVariable( name_z         + str(count_site), \
487                                           float,                            \
488                                           name_station   + str(count_site) )
489      temp_ts[:] = np.array( coord[2] )
490
491      temp_ts = ncfile_out.createVariable( name_station_h + str(count_site), \
492                                           float,                            \
493                                           name_station   + str(count_site) )
494      temp_ts[:] = np.array( 0.0 )
495
496
497      count_site += 1
498
499   # Reset counter variable
500   count_site = num_sites + 1
501
502   # check if variables are prescribed. If so, prepare final output string
503   # stored in measured_variables.
504   if ( vars_to_be_measured ):
505
506      for custom_vars in vars_to_be_measured:
507
508         measured_variables = []
509         for var in vars_default:
510            measured_variables.append(var)
511
[4663]512         # Check if given variables are already in the default variables.
[4400]513         # If not, extend.
514         for var in custom_vars:
515             if ( var  not in  measured_variables ):
516
517                measured_variables.append(var)
518
519         ncfile_out.createDimension( "nvar"+ str(count_site), \
520                                     len( measured_variables ) )
521
522         measured_var = ncfile_out.createVariable( 'measured_variables' + str(count_site), 'S1', \
523                                                  ("nvar" + str(count_site), "string_len") ) # must be NC_CHAR
524
525         # Write the variables to the file
526         for counter, meas in enumerate( measured_variables ):
527            measured_var[counter] = stringtochar( np.array( meas,"S%s"%(max_string_len) ) )
528
529         # Add soil attribute for the current measurement.
530         soil = False
531         if ( any( var == soil_vars for var in measured_variables) ):
532            soil = True
533
534         # Write soil flag
535         ncfile_out.setncattr( name_soil_sampling + str( count_site), np.int8(soil) )
536
537         # Increment counter variable
538         count_site += 1
539
540         del ( measured_variables[:] )
541
542   # Add the number of customized sites.
543   num_sites += int( number_positions )
544
545
546# Finally, write the total number of sites to the output file
547ncfile_out.setncattr( name_num_stat, num_sites )
548
549
550print( "*** palm_cvd has been finished. You can find the output file under: " )
551print( "    " + output_filename )
552
553quit()
Note: See TracBrowser for help on using the repository browser.