source: palm/trunk/SCRIPTS/palm_cvd @ 4436

Last change on this file since 4436 was 4400, checked in by suehring, 5 years ago

Revision of the virtual-measurement module: data input from NetCDF file; removed binary output - instead parallel NetCDF output using the new data-output module; variable attributes added; further variables added that can be sampled, file connections added; Functions for coordinate transformation moved to basic_constants_and_equations; netcdf_data_input_mod: unused routines netcdf_data_input_att and netcdf_data_input_var removed; new routines to inquire fill values added; Preprocessing script (palm_cvd) to setup virtual-measurement input files provided; postprocessor combine_virtual_measurements deleted

  • Property svn:executable set to *
  • Property svn:keywords set to ID
File size: 21.7 KB
Line 
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4#--------------------------------------------------------------------------------#
5# This file is part of the PALM model system.
6#
7# PALM is free software: you can redistribute it and/or modify it under the terms
8# of the GNU General Public License as published by the Free Software Foundation,
9# either version 3 of the License, or (at your option) any later version.
10#
11# PALM is distributed in the hope that it will be useful, but WITHOUT ANY
12# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License along with
16# PALM. If not, see <http://www.gnu.org/licenses/>.
17#
18# Copyright 1997-2020  Leibniz Universitaet Hannover
19#--------------------------------------------------------------------------------#
20#
21# Current revisions:
22# -----------------
23#
24#
25# Former revisions:
26# -----------------
27# $Id$
28# Initial revision
29#
30#
31# $
32#
33# Description:
34# ------------
35# Processing tool for creating PIDS conform virtual measurement setup file
36# from UC2 data-standard conform observational data or from prescribed input
37# coordinates.
38#
39# @Authors Matthias Suehring (suehring@muk.uni-hannover.de)
40#          Tobias Gronemeier (gronemeier@muk.uni-hannover.de)
41#
42# @todo Add further feature tpyes for customized observations. At the moment only
43#       timeSeries is possible.
44#--------------------------------------------------------------------------------#
45
46
47import netCDF4
48from netCDF4 import Dataset, stringtochar
49import os
50import numpy as np
51
52
53# Function to read the config file
54def read_config_file():
55
56   import configparser
57   import os
58   import sys
59   import json
60
61   # Definition of global configuration parameters
62   global global_acronym     
63   global global_author     
64   global global_campaign   
65   global global_comment     
66   global global_contact     
67   global global_data_content
68   global global_dependencies
69   global global_institution
70   global global_keywords   
71   global global_location   
72   global global_references 
73   global global_site       
74   global global_source     
75   global global_palm_version
76   global data_path         
77   global output_path       
78   global output_filename   
79   global number_positions
80   global input_from_observations
81   global coordinates
82   global vars_to_be_measured
83   global custom_coordinates
84
85   global_acronym          = " "
86   global_author           = " "
87   global_campaign         = " "
88   global_comment          = " "
89   global_contact          = " "
90   global_data_content     = " "
91   global_dependencies     = " "
92   global_institution      = " "
93   global_keywords         = " "
94   global_location         = " "
95   global_references       = " "
96   global_site             = " "
97   global_source           = " "
98   global_palm_version     = 6.0
99   data_path               = " "
100   output_path             = " "
101   output_filename         = "none"
102   number_positions        = -999
103   input_from_observations = False
104   coordinates             = []
105   vars_to_be_measured     = []
106   custom_coordinates      = False
107
108   # Check if configuration files exists and quit otherwise
109   input_config = ".cvd.config.default"
110   for i in range(1,len(sys.argv)): 
111      input_config = str(sys.argv[i])
112
113   # Allow empty settings
114   config = configparser.RawConfigParser(allow_no_value=True)
115
116   # Check if a config file exists.
117   if ( os.path.isfile(input_config) == False ):
118      print ("Error. No configuration file " + input_config + " found.")
119      quit()
120
121   config.read(input_config)
122   
123   for section in range( 0, len( config.sections() ) ):
124
125      current_section = config.sections()[section]
126
127      # read global attributes which are written into the output file header
128      if ( current_section == 'global' ):
129
130         global_acronym      = config.get( current_section, 'acronym'        )
131         global_author       = config.get( current_section, 'author'         )
132         global_campaign     = config.get( current_section, 'campaign'       )
133         global_comment      = config.get( current_section, 'comment'        )
134         global_contact      = config.get( current_section, 'contact_person' )
135         global_data_content = config.get( current_section, 'data_content'   )
136         global_dependencies = config.get( current_section, 'dependencies'   )
137         global_institution  = config.get( current_section, 'institution'    )
138         global_keywords     = config.get( current_section, 'keywords'       )
139         global_location     = config.get( current_section, 'location'       )
140         global_references   = config.get( current_section, 'references'     )
141         global_site         = config.get( current_section, 'site'           )
142         global_source       = config.get( current_section, 'source'         )
143         global_palm_version = float( config.get( current_section, 'palm_version' ) )
144
145      # Read data input path for observational data
146      elif ( current_section == 'input' ):
147
148         data_path = config.get( current_section, 'data_path' )
149         input_from_observations = True
150
151      # Read output path and filename for the VM driver
152      elif ( current_section == 'output' ):
153
154         output_path     = config.get( current_section, 'output_path' )
155         output_filename = config.get( current_section, 'output_filename' )
156
157      # Read customized coordinates where virtual measurements shall be taken,
158      # as well as the variables that should be sampled.
159      elif ( current_section == 'custom_positions' ):
160
161         number_positions = config.get( current_section, 'number_positions' )
162
163         for count in range( 0, int( number_positions ) ):
164            coordinates.append( json.loads( config.get( current_section, \
165                                                        "coordinates" + str( count + 1 ) ) ) )
166            # If coordinates are given, set a global flag.
167            custom_coordinates = True
168
169         for count in range( 0, int( number_positions ) ):
170            vars_to_be_measured.append( json.loads( config.get( current_section, \
171                                                    "vars_to_be_measured" + str( count + 1 ) ) ) )
172
173
174   return 0
175
176#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
177# Main program:
178#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
179
180
181# Define strings
182name_featuretype   = "featureType"
183name_ts            = "timeSeries"
184name_traj          = "trajectory"
185name_ntime         = "ntime"
186name_time          = "time"
187name_station       = "station"
188name_traj_dim      = "traj"
189name_nz            = "nz"
190name_datacontent   = "data_content"
191name_eutm          = "E_UTM"
192name_nutm          = "N_UTM"
193name_hao           = "height_above_origin"
194name_station_h     = "station_h"
195name_z             = "z"
196name_soil_sampling = "soil_sample"
197name_num_stat      = "number_of_stations"
198name_fill          = "_FillValue"
199name_site          = "site"
200name_orig_x        = "origin_x"
201name_orig_y        = "origin_y"
202name_orig_z        = "origin_z"
203
204max_string_len     = 50
205
206name_measvars      = "measured_variables"
207
208non_measurable_vars = ['station_name', 'time', 'time_bounds', 'crs', \
209                       'vrs', 'x', 'y', 'z', 'lon', 'lat', 'ntime', 'station', 'traj', \
210                       'E_UTM', 'N_UTM', 'height_above_origin', 'station_h', \
211                       'traj_name', 'height', 'band_pm_size', 'bands_pm', 'bands_pm_size_bounds' \
212                       'bands_pm_size', 'ancillary_detected_layer' ]
213
214soil_vars            = [ 't_soil', 'm_soil', 'lwc', 'lwcs', 'smp' ]
215
216dims_out             = [ name_eutm, name_nutm, name_hao, name_z, name_station_h ]
217
218# Define list of attributes which need to be of type float. In the data set this is not
219# necessarily guranteed.
220atts_float           = [ 'origin_x', 'origin_y', 'origin_z', 'origin_lon', 'origin_lat', 'rotation_angle' ]
221
222# Define list of default variables that shall be measured at each site
223vars_default         = [ 'u', 'v', 'w', 'theta', 'hus' ]
224
225
226#Read config file
227read_config_file()
228
229# Initialize counter variable for the number of sites
230num_sites = 0
231
232# Set the output path for the data
233output_filename = output_path + output_filename
234
235# Open output file
236ncfile_out = Dataset( output_filename, "w", format="NETCDF4" )
237
238# First, add global attributes
239ncfile_out.setncattr( 'acronym',        global_acronym      )
240ncfile_out.setncattr( 'author',         global_author       )
241ncfile_out.setncattr( 'campaign',       global_campaign     )
242ncfile_out.setncattr( 'comment',        global_comment      )
243ncfile_out.setncattr( 'contact_person', global_contact      )
244ncfile_out.setncattr( 'data_content',   global_data_content )
245ncfile_out.setncattr( 'dependencies',   global_dependencies )
246ncfile_out.setncattr( 'institution',    global_institution  )
247ncfile_out.setncattr( 'keywords',       global_keywords     )
248ncfile_out.setncattr( 'location',       global_location     )
249ncfile_out.setncattr( 'references',     global_references   )
250ncfile_out.setncattr( 'site',           global_site         )
251ncfile_out.setncattr( 'source',         global_source       )
252ncfile_out.setncattr( 'palm_version',   global_palm_version )
253
254# Create universal dimension for the string length.
255ncfile_out.createDimension("string_len", max_string_len)
256
257
258# Check if observational data is available. This case,
259# obtain an alphabetically sorted list of input data. List is sorted
260# just for the sake of clarity in the resulting setup file.
261if ( input_from_observations == True ):
262   list_input_data = sorted( os.listdir( data_path ) )
263
264if ( input_from_observations ):
265
266   # Run loop over all subdirectories, detect the files and extract a list of sites.
267   # This is done to reduce the number of virtual measurements in the model. Each
268   # virtual measurement has an overhead and consumes memory.
269   sites = []
270   for dirname in list_input_data:
271       data_file = data_path + dirname
272
273       # Directory may contain various file versions.
274       # Take the one with highest cycle number.
275       highest_cycle_nr = 0
276       for filename in os.listdir(data_file):
277          start_seq = len( filename ) - 6
278          end_seq   = len( filename ) - 3
279          if int( filename[start_seq:end_seq] ) > highest_cycle_nr:
280             highest_cycle_nr = int(filename[start_seq:end_seq])
281             latest_file      = filename
282
283       # Open the NetCDF file
284       input_file = data_file + "/" + latest_file
285       ncfile_in = Dataset( input_file, "r", format="NETCDF4", encoding='ascii')
286
287       # Read global attributes and write them immediately into the output file
288       for att in ncfile_in.ncattrs():
289          if ( att == name_site ):
290             site = ncfile_in.getncattr(att)
291
292       if ( site not in sites ):
293          sites.append(site)
294
295   # Define a flag array that is used to identify whether site dimensions are already
296   # defined or not.
297   create_metadata_for_site = [None] * len(sites)
298
299   # Define a nested list of default variables that shall be measured. Based on this list,
300   # the final number of measured variables is determined.
301   measured_variables_all_sites = [ ['u', 'v', 'w', 'theta', 'hus'] for var in range(0, len(sites))]
302
303   # Run loop over all subdirectories that contain observational data
304   for dirname in list_input_data:
305      data_file = data_path + dirname
306
307      # Directory may contain various file versions.
308      # Take the one with highest cycle number.
309      highest_cycle_nr = 0
310      for filename in os.listdir(data_file):
311         start_seq = len( filename ) - 6
312         end_seq   = len( filename ) - 3
313         if int( filename[start_seq:end_seq] ) > highest_cycle_nr:
314            highest_cycle_nr = int(filename[start_seq:end_seq])
315            latest_file      = filename
316
317      # Open the NetCDF input file
318      input_file = data_file + "/" + latest_file
319      ncfile_in = Dataset( input_file, "r", format="NETCDF4", encoding='ascii' )
320
321      # Read site attribue first
322      for att in ncfile_in.ncattrs():
323         if ( att == name_site ):
324            site = ncfile_in.getncattr(att)
325
326      # Determine index for the treated site
327      num_vmeas = sites.index( site ) + 1
328
329      # Check whether metadata for this site has been already created
330      if ( create_metadata_for_site[sites.index( site )] != "Done" ):
331
332         # Read global attributes and write them immediately into the output file
333         for att in ncfile_in.ncattrs():
334            if ( att == name_featuretype ):
335               feature = ncfile_in.getncattr(att)
336            if ( att == name_datacontent ):
337               content = ncfile_in.getncattr(att)
338            if ( att == name_site ):
339               site = ncfile_in.getncattr(att)
340
341            if ( att in atts_float ):
342               ncfile_out.setncattr( att + str(num_vmeas), np.double(ncfile_in.getncattr(att)) )
343            else:
344               ncfile_out.setncattr( att + str(num_vmeas), ncfile_in.getncattr(att) )
345
346         #timeSeries
347         if ( feature == name_ts ):
348            ntime = len( ncfile_in.dimensions[name_ntime]   )
349            nstat = len( ncfile_in.dimensions[name_station] )
350            ncfile_out.createDimension( name_ntime   + str(num_vmeas), ntime )
351            ncfile_out.createDimension( name_station + str(num_vmeas), nstat )
352
353         #trajectory
354         elif ( feature == name_traj ):
355            ntime = len( ncfile_in.dimensions[name_ntime]   )
356            ntraj = len( ncfile_in.dimensions[name_traj_dim] )
357            ncfile_out.createDimension( name_ntime    + str(num_vmeas), ntime )
358            ncfile_out.createDimension( name_traj_dim + str(num_vmeas), ntraj )
359
360         #timeseriesProfile
361         else:
362            ntime = len( ncfile_in.dimensions[name_ntime]   )
363            nstat = len( ncfile_in.dimensions[name_station] )
364            nz    = len( ncfile_in.dimensions[name_nz]      )
365            ncfile_out.createDimension( name_ntime   + str(num_vmeas), ntime )
366            ncfile_out.createDimension( name_station + str(num_vmeas), nstat )
367            ncfile_out.createDimension( name_nz      + str(num_vmeas), nz    )
368
369         for var in ncfile_in.variables.keys():
370            if ( var in dims_out ):
371               # Create a variable and write it to file after it is read. In order to
372               # avoid fill values in the dimensions, these are converted to zero
373               # before written to file. Depending on the featureType of the measurement,
374               # the array shape is different. For more informations, please see
375               # [UC]2 data standard.
376               # Timeseries
377               if ( feature == name_ts  ): 
378                  temp_ts = ncfile_out.createVariable( var + str(num_vmeas), float, \
379                                                       name_station + str(num_vmeas))
380                  temp_ts[:] = np.nan_to_num( ncfile_in.variables[var][:] )
381
382               # Trajectories
383               elif ( feature == name_traj ):
384                  temp_traj = ncfile_out.createVariable( var + str(num_vmeas), float, \
385                                                         ( name_traj_dim + str(num_vmeas), \
386                                                           name_ntime + str(num_vmeas) ) )
387                  temp_traj[:,:] = np.nan_to_num( ncfile_in.variables[var][:,:] )
388
389               # TimeseriesProfiles
390               else:
391                  if ( var == 'z' ):
392                     temp_pr = ncfile_out.createVariable( var + str(num_vmeas), float, \
393                                                         ( name_station + str(num_vmeas), \
394                                                           name_nz + str(num_vmeas) ) )
395                     temp_pr[:] = np.nan_to_num( ncfile_in.variables[var][:,0,:] )
396                  else:
397                     temp_pr = ncfile_out.createVariable( var + str(num_vmeas), float, \
398                                                          name_station + str(num_vmeas))
399                     temp_pr[:] = np.nan_to_num( ncfile_in.variables[var][:] )
400
401      # Search for variables to be measured. In case the variable isn't already defined,
402      # append the variable to the list.
403      for var in ncfile_in.variables.keys():
404         if ( var not in non_measurable_vars  and  \
405              var not in vars_default         and  \
406              var not in measured_variables_all_sites[sites.index( site )] ):
407
408            measured_variables_all_sites[sites.index( site )].append(var)
409
410      # Close the NetCDF input file
411      ncfile_in.close()
412
413      # Set flag to indicate that for this specific site dimensions have been
414      # already created and attributes are already set.
415      if ( create_metadata_for_site[sites.index( site )] != "Done" ):
416         create_metadata_for_site[sites.index( site )] = "Done"
417
418   # After variables are gathered and dimensions / attributes are already written to file,
419   # the list of measured variables is written to file.
420   for site in sites:
421
422      num_vmeas = sites.index( site ) + 1
423
424      ncfile_out.createDimension( "nvar"+ str(num_vmeas), \
425                                  len( measured_variables_all_sites[sites.index( site )] ) )
426
427      measured = ncfile_out.createVariable( 'measured_variables' + str(num_vmeas), 'S1', \
428                                            ("nvar" + str(num_vmeas), "string_len")) # must be NC_CHAR
429
430      for counter, meas in enumerate( measured_variables_all_sites[sites.index( site )] ):
431         measured[counter] = stringtochar( np.array( meas,"S%s"%(max_string_len) ) )
432
433      # Check if any of the measured variables is a soil variable. Set flag accordingly.
434      soil = False
435      for var in measured_variables_all_sites[sites.index( site )]:
436         if ( var in soil_vars ):
437            soil = True
438
439      # Write soil flag
440      ncfile_out.setncattr( name_soil_sampling + str( num_vmeas), np.int8(soil) )
441
442
443
444    # Store the number of observational sites
445   num_sites += len( sites )
446
447
448#  Now process the customized input data. Please note, at the moment only timeseries are
449# are possible.
450if ( custom_coordinates ):
451
452   count_site = num_sites + 1
453   for coord in coordinates:
454      # Define mandatory attributes
455      ncfile_out.setncattr( name_featuretype + str(count_site),  \
456                            name_ts )
457      ncfile_out.setncattr( name_site        + str(count_site),  \
458                            "custom"         + str(count_site - num_sites) )
459      ncfile_out.setncattr( name_orig_x      + str(count_site),  \
460                            coord[0] )
461      ncfile_out.setncattr( name_orig_y      + str(count_site),  \
462                            coord[1] )
463      ncfile_out.setncattr( name_orig_z      + str(count_site),  \
464                            0.0 )
465
466      # Define dimensions
467      ntime = 1
468      nstat = 1
469      ncfile_out.createDimension( name_ntime   + str(count_site), ntime )
470      ncfile_out.createDimension( name_station + str(count_site), nstat )
471
472      # Define coordinate variables
473      temp_ts = ncfile_out.createVariable( name_eutm      + str(count_site), \
474                                           float,                            \
475                                           name_station   + str(count_site) )
476      temp_ts[:] = np.array( coord[0] )
477
478      temp_ts = ncfile_out.createVariable( name_nutm      + str(count_site), \
479                                           float,                            \
480                                           name_station   + str(count_site) )
481      temp_ts[:] = np.array( coord[1] )
482
483      temp_ts = ncfile_out.createVariable( name_z         + str(count_site), \
484                                           float,                            \
485                                           name_station   + str(count_site) )
486      temp_ts[:] = np.array( coord[2] )
487
488      temp_ts = ncfile_out.createVariable( name_station_h + str(count_site), \
489                                           float,                            \
490                                           name_station   + str(count_site) )
491      temp_ts[:] = np.array( 0.0 )
492
493
494      count_site += 1
495
496   # Reset counter variable
497   count_site = num_sites + 1
498
499   # check if variables are prescribed. If so, prepare final output string
500   # stored in measured_variables.
501   if ( vars_to_be_measured ):
502
503      for custom_vars in vars_to_be_measured:
504
505         measured_variables = []
506         for var in vars_default:
507            measured_variables.append(var)
508
509         # Check if given variables are already in the default variables.
510         # If not, extend.
511         for var in custom_vars:
512             if ( var  not in  measured_variables ):
513
514                measured_variables.append(var)
515
516         ncfile_out.createDimension( "nvar"+ str(count_site), \
517                                     len( measured_variables ) )
518
519         measured_var = ncfile_out.createVariable( 'measured_variables' + str(count_site), 'S1', \
520                                                  ("nvar" + str(count_site), "string_len") ) # must be NC_CHAR
521
522         # Write the variables to the file
523         for counter, meas in enumerate( measured_variables ):
524            measured_var[counter] = stringtochar( np.array( meas,"S%s"%(max_string_len) ) )
525
526         # Add soil attribute for the current measurement.
527         soil = False
528         if ( any( var == soil_vars for var in measured_variables) ):
529            soil = True
530
531         # Write soil flag
532         ncfile_out.setncattr( name_soil_sampling + str( count_site), np.int8(soil) )
533
534         # Increment counter variable
535         count_site += 1
536
537         del ( measured_variables[:] )
538
539   # Add the number of customized sites.
540   num_sites += int( number_positions )
541
542
543# Finally, write the total number of sites to the output file
544ncfile_out.setncattr( name_num_stat, num_sites )
545
546
547print( "*** palm_cvd has been finished. You can find the output file under: " )
548print( "    " + output_filename )
549
550quit()
551
Note: See TracBrowser for help on using the repository browser.