Ignore:
Timestamp:
Oct 26, 2020 1:03:52 PM (4 years ago)
Author:
suehring
Message:

script to create input file for virtual measurements - palm_cvd: In order to do not omit observations that are on the same site but have different oordinates or feature-types, process all files rather than only one and omit the rest

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SCRIPTS/palm_cvd

    r4663 r4758  
    2626# -----------------
    2727# $Id$
     28# In order to do not omit observations that are on the same site but have different
     29# coordinates or feature-types, process all files rather than only one and omit
     30# the rest.
     31#
     32# 4663 2020-09-02 14:54:09Z gronemeier
    2833# bugfix in non_measurable_vars; ignore station_h if featureType is trajectory
    2934#
     
    198203name_fill          = "_FillValue"
    199204name_site          = "site"
     205name_acro          = "acronym"
     206name_content       = "data_content"
    200207name_orig_x        = "origin_x"
    201208name_orig_y        = "origin_y"
     
    268275   # virtual measurement has an overhead and consumes memory.
    269276   sites = []
     277   input_files = []
    270278   for dirname in list_input_data:
    271279       data_file = data_path + dirname
     
    284292       input_file = data_file + "/" + latest_file
    285293       ncfile_in = Dataset( input_file, "r", format="NETCDF4", encoding='ascii')
    286 
    287        # Read global attributes and write them immediately into the output file
    288        for att in ncfile_in.ncattrs():
    289           if ( att == name_site ):
    290              site = ncfile_in.getncattr(att)
    291 
    292        if ( site not in sites ):
    293           sites.append(site)
    294 
    295    # Define a flag array that is used to identify whether site dimensions are already
    296    # defined or not.
    297    create_metadata_for_site = [None] * len(sites)
     294       input_files.append(input_file)
    298295
    299296   # Define a nested list of default variables that shall be measured. Based on this list,
    300297   # the final number of measured variables is determined.
    301    measured_variables_all_sites = [ ['u', 'v', 'w', 'theta', 'hus'] for var in range(0, len(sites))]
     298   measured_variables_all_sites = [ ['u', 'v', 'w', 'theta', 'hus'] for var in range(0, len(input_files))]
    302299
    303300   # Run loop over all subdirectories that contain observational data
    304    for dirname in list_input_data:
    305       data_file = data_path + dirname
    306 
    307       # Directory may contain various file versions.
    308       # Take the one with highest cycle number.
    309       highest_cycle_nr = 0
    310       for filename in os.listdir(data_file):
    311          start_seq = len( filename ) - 6
    312          end_seq   = len( filename ) - 3
    313          if int( filename[start_seq:end_seq] ) > highest_cycle_nr:
    314             highest_cycle_nr = int(filename[start_seq:end_seq])
    315             latest_file      = filename
     301   for counter, file in enumerate(input_files):
    316302
    317303      # Open the NetCDF input file
    318       input_file = data_file + "/" + latest_file
     304      input_file = input_files[counter]
    319305      ncfile_in = Dataset( input_file, "r", format="NETCDF4", encoding='ascii' )
    320306
    321       # Read site attribue first
     307      # Determine index for the treated site
     308      num_vmeas = input_files.index( input_file ) + 1
     309      print( counter, num_vmeas )
     310
     311      # Read global attributes and write them immediately into the output file
    322312      for att in ncfile_in.ncattrs():
     313         if ( att == name_featuretype ):
     314            feature = ncfile_in.getncattr(att)
     315         if ( att == name_datacontent ):
     316            content = ncfile_in.getncattr(att)
    323317         if ( att == name_site ):
    324318            site = ncfile_in.getncattr(att)
    325319
    326       # Determine index for the treated site
    327       num_vmeas = sites.index( site ) + 1
    328 
    329       # Check whether metadata for this site has been already created
    330       if ( create_metadata_for_site[sites.index( site )] != "Done" ):
    331 
    332          # Read global attributes and write them immediately into the output file
    333          for att in ncfile_in.ncattrs():
    334             if ( att == name_featuretype ):
    335                feature = ncfile_in.getncattr(att)
    336             if ( att == name_datacontent ):
    337                content = ncfile_in.getncattr(att)
    338             if ( att == name_site ):
    339                site = ncfile_in.getncattr(att)
    340 
    341             if ( att in atts_float ):
    342                ncfile_out.setncattr( att + str(num_vmeas), np.double(ncfile_in.getncattr(att)) )
     320         if ( att in atts_float ):
     321            ncfile_out.setncattr( att + str(num_vmeas), np.double(ncfile_in.getncattr(att)) )
     322         else:
     323            ncfile_out.setncattr( att + str(num_vmeas), ncfile_in.getncattr(att) )
     324
     325      #timeSeries
     326      if ( feature == name_ts ):
     327         ntime = len( ncfile_in.dimensions[name_ntime]   )
     328         nstat = len( ncfile_in.dimensions[name_station] )
     329         ncfile_out.createDimension( name_ntime   + str(num_vmeas), ntime )
     330         ncfile_out.createDimension( name_station + str(num_vmeas), nstat )
     331
     332      #trajectory
     333      elif ( feature == name_traj ):
     334         ntime = len( ncfile_in.dimensions[name_ntime]   )
     335         ntraj = len( ncfile_in.dimensions[name_traj_dim] )
     336         ncfile_out.createDimension( name_ntime    + str(num_vmeas), ntime )
     337         ncfile_out.createDimension( name_traj_dim + str(num_vmeas), ntraj )
     338
     339      #timeseriesProfile
     340      else:
     341         ntime = len( ncfile_in.dimensions[name_ntime]   )
     342         nstat = len( ncfile_in.dimensions[name_station] )
     343         nz    = len( ncfile_in.dimensions[name_nz]      )
     344         ncfile_out.createDimension( name_ntime   + str(num_vmeas), ntime )
     345         ncfile_out.createDimension( name_station + str(num_vmeas), nstat )
     346         ncfile_out.createDimension( name_nz      + str(num_vmeas), nz    )
     347
     348      for var in ncfile_in.variables.keys():
     349         if ( var in dims_out ):
     350            # Create a variable and write it to file after it is read. In order to
     351            # avoid fill values in the dimensions, these are converted to zero
     352            # before written to file. Depending on the featureType of the measurement,
     353            # the array shape is different. For more informations, please see
     354            # [UC]2 data standard.
     355            # Timeseries
     356            if ( feature == name_ts  ):
     357               temp_ts = ncfile_out.createVariable( var + str(num_vmeas), float, \
     358                                                    name_station + str(num_vmeas))
     359               temp_ts[:] = np.nan_to_num( ncfile_in.variables[var][:] )
     360
     361            # Trajectories
     362            elif ( feature == name_traj ):
     363               # @note: If there are files where station_h is present although featureType is
     364               #        trajectory, station_h must not be read.
     365               if var != name_station_h:
     366                  temp_traj = ncfile_out.createVariable( var + str(num_vmeas), float, \
     367                                                         ( name_traj_dim + str(num_vmeas), \
     368                                                           name_ntime + str(num_vmeas) ) )
     369                  temp_traj[:,:] = np.nan_to_num( ncfile_in.variables[var][:,:] )
     370
     371            # TimeseriesProfiles
    343372            else:
    344                ncfile_out.setncattr( att + str(num_vmeas), ncfile_in.getncattr(att) )
    345 
    346          #timeSeries
    347          if ( feature == name_ts ):
    348             ntime = len( ncfile_in.dimensions[name_ntime]   )
    349             nstat = len( ncfile_in.dimensions[name_station] )
    350             ncfile_out.createDimension( name_ntime   + str(num_vmeas), ntime )
    351             ncfile_out.createDimension( name_station + str(num_vmeas), nstat )
    352 
    353          #trajectory
    354          elif ( feature == name_traj ):
    355             ntime = len( ncfile_in.dimensions[name_ntime]   )
    356             ntraj = len( ncfile_in.dimensions[name_traj_dim] )
    357             ncfile_out.createDimension( name_ntime    + str(num_vmeas), ntime )
    358             ncfile_out.createDimension( name_traj_dim + str(num_vmeas), ntraj )
    359 
    360          #timeseriesProfile
    361          else:
    362             ntime = len( ncfile_in.dimensions[name_ntime]   )
    363             nstat = len( ncfile_in.dimensions[name_station] )
    364             nz    = len( ncfile_in.dimensions[name_nz]      )
    365             ncfile_out.createDimension( name_ntime   + str(num_vmeas), ntime )
    366             ncfile_out.createDimension( name_station + str(num_vmeas), nstat )
    367             ncfile_out.createDimension( name_nz      + str(num_vmeas), nz    )
    368 
    369          for var in ncfile_in.variables.keys():
    370             if ( var in dims_out ):
    371                # Create a variable and write it to file after it is read. In order to
    372                # avoid fill values in the dimensions, these are converted to zero
    373                # before written to file. Depending on the featureType of the measurement,
    374                # the array shape is different. For more informations, please see
    375                # [UC]2 data standard.
    376                # Timeseries
    377                if ( feature == name_ts  ):
    378                   temp_ts = ncfile_out.createVariable( var + str(num_vmeas), float, \
     373               if ( var == 'z' ):
     374                  temp_pr = ncfile_out.createVariable( var + str(num_vmeas), float, \
     375                                                      ( name_station + str(num_vmeas), \
     376                                                        name_nz + str(num_vmeas) ) )
     377                  temp_pr[:] = np.nan_to_num( ncfile_in.variables[var][:,0,:] )
     378               else:
     379                  temp_pr = ncfile_out.createVariable( var + str(num_vmeas), float, \
    379380                                                       name_station + str(num_vmeas))
    380                   temp_ts[:] = np.nan_to_num( ncfile_in.variables[var][:] )
    381 
    382                # Trajectories
    383                elif ( feature == name_traj ):
    384                   # @note: If there are files where station_h is present although featureType is
    385                   #        trajectory, station_h must not be read.
    386                   if var != name_station_h:
    387                      temp_traj = ncfile_out.createVariable( var + str(num_vmeas), float, \
    388                                                             ( name_traj_dim + str(num_vmeas), \
    389                                                               name_ntime + str(num_vmeas) ) )
    390                      temp_traj[:,:] = np.nan_to_num( ncfile_in.variables[var][:,:] )
    391 
    392                # TimeseriesProfiles
    393                else:
    394                   if ( var == 'z' ):
    395                      temp_pr = ncfile_out.createVariable( var + str(num_vmeas), float, \
    396                                                          ( name_station + str(num_vmeas), \
    397                                                            name_nz + str(num_vmeas) ) )
    398                      temp_pr[:] = np.nan_to_num( ncfile_in.variables[var][:,0,:] )
    399                   else:
    400                      temp_pr = ncfile_out.createVariable( var + str(num_vmeas), float, \
    401                                                           name_station + str(num_vmeas))
    402                      temp_pr[:] = np.nan_to_num( ncfile_in.variables[var][:] )
     381                  temp_pr[:] = np.nan_to_num( ncfile_in.variables[var][:] )
    403382
    404383      # Search for variables to be measured. In case the variable isn't already defined,
     
    407386         if ( var not in non_measurable_vars  and  \
    408387              var not in vars_default         and  \
    409               var not in measured_variables_all_sites[sites.index( site )] ):
    410 
    411             measured_variables_all_sites[sites.index( site )].append(var)
     388              var not in measured_variables_all_sites[input_files.index( input_file )] ):
     389
     390            measured_variables_all_sites[input_files.index( input_file )].append(var)
    412391
    413392      # Close the NetCDF input file
    414393      ncfile_in.close()
    415394
    416       # Set flag to indicate that for this specific site dimensions have been
    417       # already created and attributes are already set.
    418       if ( create_metadata_for_site[sites.index( site )] != "Done" ):
    419          create_metadata_for_site[sites.index( site )] = "Done"
    420 
    421395   # After variables are gathered and dimensions / attributes are already written to file,
    422396   # the list of measured variables is written to file.
    423    for site in sites:
    424 
    425       num_vmeas = sites.index( site ) + 1
     397   for site in input_files:
     398
     399      num_vmeas = input_files.index( site ) + 1
    426400
    427401      ncfile_out.createDimension( "nvar"+ str(num_vmeas), \
    428                                   len( measured_variables_all_sites[sites.index( site )] ) )
     402                                  len( measured_variables_all_sites[input_files.index( site )] ) )
    429403
    430404      measured = ncfile_out.createVariable( 'measured_variables' + str(num_vmeas), 'S1', \
    431405                                            ("nvar" + str(num_vmeas), "string_len")) # must be NC_CHAR
    432406
    433       for counter, meas in enumerate( measured_variables_all_sites[sites.index( site )] ):
     407      for counter, meas in enumerate( measured_variables_all_sites[input_files.index( site )] ):
    434408         measured[counter] = stringtochar( np.array( meas,"S%s"%(max_string_len) ) )
    435409
    436410      # Check if any of the measured variables is a soil variable. Set flag accordingly.
    437411      soil = False
    438       for var in measured_variables_all_sites[sites.index( site )]:
     412      for var in measured_variables_all_sites[input_files.index( site )]:
    439413         if ( var in soil_vars ):
    440414            soil = True
     
    446420
    447421    # Store the number of observational sites
    448    num_sites += len( sites )
     422   num_sites += len( input_files )
    449423
    450424
Note: See TracChangeset for help on using the changeset viewer.