Changeset 4879 for palm/trunk/SCRIPTS


Ignore:
Timestamp:
Feb 18, 2021 11:15:29 AM (4 years ago)
Author:
gronemeier
Message:

extensive re-work of postprocess_vm_measurements.py:

  • bugfix: convert atmosphere and soil time to record dimension
  • bugfix: make overwrite optional
  • reduce complexity
  • remove unnecessary parts
  • variable renaming
  • code restructuring to follow coding standard
File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SCRIPTS/postprocess_vm_measurements.py

    r4854 r4879  
    11#!/usr/bin/env python3
    2 # -*- coding: utf-8 -*-
    3 #
    4 #--------------------------------------------------------------------------------#
     2# --------------------------------------------------------------------------------#
    53# This file is part of the PALM model system.
    64#
     
    1715#
    1816# Copyright 1997-2021  Leibniz Universitaet Hannover
    19 #--------------------------------------------------------------------------------#
     17# --------------------------------------------------------------------------------#
    2018#
    2119# Current revisions:
     
    2624# -----------------
    2725# $Id: postprocess_vm_measurements.py 4853 2021-01-15 15:22:11Z suehring #
     26# extensive re-work of postprocess_vm_measurements.py:
     27#    - bugfix: convert atmosphere and soil time to record dimension
     28#    - bugfix: make overwrite optional
     29#    - reduce complexity
     30#    - remove unnecessary parts
     31#    - variable renaming
     32#    - code restructuring to follow coding standard
     33#
     34# 4853 2021-01-15 15:22:11Z suehring
    2835# Initial revision
    2936#
    30 #
    31 #
    32 #
    33 #--------------------------------------------------------------------------------#
    34 #
    35 
    36 #
     37# --------------------------------------------------------------------------------#
    3738# Description:
    3839# ------------
    39 # Postprocessing tool to merge output of virtual measurements into site-specific
    40 # files.
    41 #
    42 # Usage:
    43 #-------
    44 """Merge virtual measurement output: removes empty timestamps from the netcdf
    45    files and concatenates files from several restart files into one file-
     40"""Merge virtual measurement output.
     41
     42Removes empty time stamps from the netCDF files and concatenates files
     43from several restart files into one file.
    4644
    4745Example:
     
    5048"""
    5149#
    52 # @Authors Matthias Suehring (suehring@muk.uni-hannover.de)
     50# @Authors Matthias SÃŒhring (suehring@muk.uni-hannover.de)
    5351#          Tobias Gronemeier (gronemeier@muk.uni-hannover.de)
    54 #
    55 #--------------------------------------------------------------------------------#
    56 #
     52# --------------------------------------------------------------------------------#
    5753
    5854
     
    7470        + 'python -m pip install --user netCDF4\nto install it.')
    7571
    76 # - - - - - - - - - - - - - - -
    77 
    78 
    79 def concatenate(file_list, inds, sites, out_dir, override=True):
     72
     73def concatenate(files_per_site, sites, output_directory, overwrite_file=False):
    8074    """Concatenate netCDF files via ncrcat.
    8175
     
    8478    """
    8579
    86     if not os.path.isdir(out_dir):
    87         mkdir = os.mkdir(out_dir)
    88 
    89     nco_command = ["ncrcat"]
    90     counter_file = 0
    91     for files in file_list:
    92 
    93         ncrcat_command = "ncrcat -O "
    94         counter_dir = 0
    95         for single_file in files:
    96             if counter_dir != 0:
    97                 ncfile = Dataset(single_file, "r")
    98 
    99                 soil = False
    100                 for var in ncfile.variables.keys():
    101                     if var == "time_soil":
    102                         soil = True
    103 
    104                 nco_command = "ncap2 -O -s 'ntime=ntime+{}' ".format(
    105                     inds[counter_file][counter_dir])
    106                 if soil is True:
    107                     nco_command += " -s 'ntime_soil=ntime_soil+{}' ".format(
    108                         inds[counter_file][counter_dir])
    109 
    110                 nco_command += " " + single_file + " " + single_file
    111                 print(nco_command)
    112                 nco_output = subprocess.run(nco_command, shell=True, check=True)
    113 
    114             # if counter_dir == 0:
    115             #     cp = os.system("cp " + single_file + " " + out_dir + "/" + sites[counter_file])
    116             #     print("cp " + single_file + " " + out_dir + "/" + sites[counter_file])
    117             #     ncrcat_command += "/" + out_dir + "/" + sites[counter_file] + " "
    118             # else:
    119             #     ncrcat_command += single_file + " "
    120             ncrcat_command += single_file + " "
    121             counter_dir += 1
    122 
    123         ncrcat_command += out_dir + "/"
    124         if os.path.isfile(out_dir + "/" + sites[counter_file]):
    125             start = sites[counter_file].find('site')
    126             end = start + 7
    127 
    128             string_dum = sites[counter_file]
    129             outfile = sites[counter_file] + "_" + string_dum[start:end]
    130             print(string_dum[start:end])
    131         else:
    132             outfile = sites[counter_file]
    133 
    134         ncrcat_command += outfile
     80    if not os.path.isdir(output_directory):
     81        mkdir = os.mkdir(output_directory)
     82
     83    if output_directory[-1] != '/':
     84        output_directory += '/'
     85
     86    for site_index, file_list in enumerate(files_per_site):
     87
     88        ncrcat_command = "ncrcat"
     89
     90        if overwrite_file:
     91            ncrcat_command += " -O"
     92
     93        for file_name in file_list:
     94            ncrcat_command += " " + file_name
     95
     96        # Check if output file already exists
     97        output_file = output_directory + sites[site_index]
     98        if not overwrite_file and os.path.isfile(output_file):
     99            for i in range(1000):
     100                output_file = output_directory + sites[site_index] + "_{:03d}".format(i)
     101                if not os.path.isfile(output_file):
     102                    break
     103                elif i == 999:
     104                    raise IOError("could not guarantee non overwriting output file: {}".format(
     105                            output_file))
     106        ncrcat_command += " " + output_file
     107
    135108        print(ncrcat_command)
    136109        ncrcat_output = subprocess.run(ncrcat_command, shell=True, check=True)
    137         counter_file += 1
    138 
    139         # nco_output = subprocess.run(nco_command, shell=True, check=True, stdout=subprocess.PIPE)
    140 
    141     return nco_command
    142 
    143 # - - - - - - - - - - - - - - -
    144 
    145 
    146 def truncate(file_process, override=True):
    147 
    148     # print("file "   + file_process)
     110
     111    return output_file
     112
     113
     114def truncate(input_file, time_index_shift=0, overwrite_file=False):
     115    """Truncate netCDF files via ncrcat.
     116
     117    Truncate all time dimensions of the input file and convert them to
     118    record dimensions. The output is saved to 'input_file.trunc' or to
     119    'input_file.trunc.nc' if the input_file has a '.nc' extension.
     120    If "overwrite_file" is true, write output directly to input_file.
     121    Shift the time index variables by time_index_shift.
     122
     123    Return values:
     124        highest time index of time dimension in output file
     125        output-file name
     126    """
     127
    149128    # Gather information about time coordinate in file
    150     ncfile = Dataset(file_process, "r")
     129    ncfile = Dataset(input_file, "r")
    151130    time_dim = ncfile.dimensions["ntime"]
    152131    time_var = ncfile.variables["time"][:, :]
     
    154133    start_index = 0
    155134
    156     soil = False
    157     for var in ncfile.variables.keys():
    158         if var == "time_soil":
    159             soil = True
     135    soil = any([var == "time_soil" for var in ncfile.variables.keys()])
    160136
    161137    if np.any(time_mask is False):
     
    184160    #     site = site + "_traj"
    185161    # print(cut)
    186     ncks_output = []
     162
     163    # Compose nco commands
     164    ncks_command = "ncks"
     165
     166    if overwrite_file:
     167        ncks_command += " -O"
     168        output_file = input_file
     169    else:
     170        # Add '.trunc' to file name before '.nc' file extension
     171        output_file, file_extension = os.path.splitext(input_file)
     172        if file_extension != '.nc':
     173            output_file += file_extension + '.trunc'
     174        else:
     175            output_file += '.trunc' + file_extension
     176        if os.path.isfile(output_file):
     177            raise IOError("truncated file already exists: {}".format(output_file))
     178
    187179    if cut:
    188         # Compose ncks command
    189         ncks_command = "ncks -O -d ntime,{0},{1}".format(start_index, end_index)
    190         if not time_dim.isunlimited():
    191             ncks_command += " --mk_rec_dmn"
    192             ncks_command += " ntime"
    193         if soil is True:
     180        # set dimension limits
     181        ncks_command += " -d ntime,{0},{1}".format(start_index, end_index)
     182        if soil:
    194183            ncks_command += " -d ntime_soil,{0},{1}".format(start_index, end_index)
    195             ncks_command += " --mk_rec_dmn"
    196             ncks_command += " ntime_soil"
    197 
    198         ncks_command += " " + file_process + " " + file_process
    199 
    200         # Cut time levels using ncks
     184
     185    # convert time into record dimension
     186    time_is_limited = not time_dim.isunlimited()
     187    if time_is_limited:
     188        ncks_command += " --mk_rec_dmn"
     189        ncks_command += " ntime"
     190
     191    if cut or time_is_limited:
     192        # set input and output file
     193        ncks_command += " {0} {1}".format(input_file, output_file)
     194
     195        # execute ncks
    201196        print(ncks_command)
    202197        ncks_output = subprocess.run(ncks_command, shell=True, check=True, stdout=subprocess.PIPE)
     198
     199        new_input_file = output_file
     200
    203201    else:
    204         end_index = len(time_var[:][0])
    205 
    206     return end_index, site
    207 
    208 # - - - - - - - - - - - - - - -
    209 
    210 
    211 def main(path_data, output_directory, override=True):
    212 
    213     # Get current working directory
    214     work_dir = os.getcwd()
    215 
    216     if path_data[-1] != '/':
    217         path_data += '/'
     202        new_input_file = input_file
     203
     204    # If soil is present, also convert soil time to record dimension
     205    # (must be done separately due to NCO limitations)
     206    if soil:
     207        soil_time_is_limited = not ncfile.dimensions["ntime_soil"].isunlimited()
     208        if soil_time_is_limited:
     209
     210            ncks_command = "ncks -O --mk_rec_dmn ntime_soil {0} {1}".format(
     211                    new_input_file, output_file)
     212            print(ncks_command)
     213            ncks_output = subprocess.run(
     214                    ncks_command, shell=True, check=True, stdout=subprocess.PIPE)
     215
     216            new_input_file = output_file
     217
     218    # Add time shift to ntime variables
     219    if time_index_shift != 0:
     220        ncap2_command = "ncap2 -O -s 'ntime=ntime+{}' ".format(time_index_shift)
     221        if soil:
     222            ncap2_command += " -s 'ntime_soil=ntime_soil+{}' ".format(time_index_shift)
     223
     224        ncap2_command += " {0} {1}".format(new_input_file, output_file)
     225
     226        print(ncap2_command)
     227        ncap2_output = subprocess.run(ncap2_command, shell=True, check=True)
     228
     229        end_index += time_index_shift
     230        new_input_file = output_file
     231
     232    return output_file, end_index
     233
     234
     235def main(base_input_directory, output_directory, overwrite_file=False):
     236
     237    if base_input_directory[-1] != '/':
     238        base_input_directory += '/'
     239
     240    if output_directory[-1] != '/':
     241        output_directory += '/'
    218242
    219243    # Get directory list
    220     list_output_dirs = [path_data + directory + '/' for directory in sorted(os.listdir(path_data))]
    221     filelist = []
    222     output_file_list = []
    223     counter = 0
     244    input_directory_list = [
     245            base_input_directory + directory + '/' for directory in
     246            sorted(os.listdir(base_input_directory))]
    224247
    225248    # Obtain list of sites that need to be processed
    226     for directory in list_output_dirs:
    227         # Get file list
    228         file_list = sorted(os.listdir(directory))
    229         for filename in file_list:
    230             if counter == 0:
    231                 output_file_list.append(filename)
    232         counter += 1
    233 
    234     start_inds = [[0] * len(list_output_dirs) for i in range(len(output_file_list))]
    235     end_inds = [[0] * len(list_output_dirs) for i in range(len(output_file_list))]
    236 
    237     input_files = [[None] * len(list_output_dirs) for i in range(len(output_file_list))]
    238     sites = [None] * len(output_file_list)
    239 
    240     counter_file = 0
    241     for filename in output_file_list:
    242 
    243         counter_dir = 0
    244         for directory in list_output_dirs:
    245             file_process = directory + filename
    246             end_ind, sites[counter_file] = truncate(file_process, override)
    247             sites[counter_file] = filename
    248 
    249             if not counter_dir == 0:
    250                 start_inds[counter_file][counter_dir] = end_inds[counter_file][counter_dir - 1]
    251 
    252             end_inds[counter_file][counter_dir] = start_inds[counter_file][counter_dir] + end_ind
    253 
    254             input_files[counter_file][counter_dir] = file_process
    255             counter_dir += 1
    256 
    257         counter_file += 1
     249    sites = sorted(os.listdir(input_directory_list[0]))
     250
     251    files_per_site_and_directory = [[None] * len(input_directory_list) for i in range(len(sites))]
     252
     253    # Truncate each file and save end index of time dimension
     254    for site_index, site_name in enumerate(sites):
     255
     256        start_index = 0
     257        for dir_index, directory in enumerate(input_directory_list):
     258            files_per_site_and_directory[site_index][dir_index], end_index = \
     259                    truncate(directory + site_name, start_index, overwrite_file)
     260            start_index = end_index
    258261
    259262    # Concatenate all files
    260     outfile = concatenate(input_files, start_inds, sites, output_directory, override)
    261 
    262 
    263 # - - - - - - - - - - - - - - -
     263    file_concatenated = concatenate(
     264            files_per_site_and_directory, sites, output_directory, overwrite_file)
     265
    264266
    265267if __name__ == '__main__':
    266268    parser = argparse.ArgumentParser(
    267         description='Merge virtual measurement output from multiple PALM run cycles',
    268         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    269     parser.add_argument('input', metavar='IN',
    270                         help='PALM output directory containing virtual measurements')
    271     parser.add_argument('--out', '-o', metavar='OUT', nargs=1, default='./merge',
    272                         help='Output directory to store merged data')
     269            description='Merge virtual measurement output from multiple PALM run cycles',
     270            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     271    parser.add_argument(
     272            'input',
     273            metavar='IN',
     274            help='PALM output directory containing virtual measurements')
     275    parser.add_argument(
     276            '--out',
     277            '-o',
     278            metavar='OUT',
     279            default='./merge',
     280            help='Output directory to store merged data')
     281    parser.add_argument(
     282            '--overwrite',
     283            action='store_true',
     284            help='Overwrite input files with output files')
    273285
    274286    args = parser.parse_args()
    275     path_data = args.input
    276     output_directory = args.out
    277 
    278     main(path_data, output_directory=output_directory, override=True)
     287
     288    main(args.input, output_directory=args.out, overwrite_file=args.overwrite)
Note: See TracChangeset for help on using the changeset viewer.