Home

Context Navigation

← Previous Changeset
Next Changeset →

Changeset 4879

Timestamp:

Feb 18, 2021 11:15:29 AM (3 years ago)

Author:

gronemeier

Message:

extensive re-work of postprocess_vm_measurements.py:

bugfix: convert atmosphere and soil time to record dimension
bugfix: make overwrite optional
reduce complexity
remove unnecessary parts
variable renaming
code restructuring to follow coding standard

File:

: 1 edited

palm/trunk/SCRIPTS/postprocess_vm_measurements.py (modified) (8 diffs)

Legend:

: Unmodified
: Added
: Removed

palm/trunk/SCRIPTS/postprocess_vm_measurements.py

-                      r4854
+                      r4879
 #!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+#--------------------------------------------------------------------------------#
+# --------------------------------------------------------------------------------#
 # This file is part of the PALM model system.
+#
 …
+#
 # Copyright 1997-2021  Leibniz Universitaet Hannover
 #--------------------------------------------------------------------------------#
+# --------------------------------------------------------------------------------#
+#
 # Current revisions:
 …
 # -----------------
 # $Id: postprocess_vm_measurements.py 4853 2021-01-15 15:22:11Z suehring #
+# extensive re-work of postprocess_vm_measurements.py:
+#    - bugfix: convert atmosphere and soil time to record dimension
+#    - bugfix: make overwrite optional
+#    - reduce complexity
+#    - remove unnecessary parts
+#    - variable renaming
+#    - code restructuring to follow coding standard
+#
+# 4853 2021-01-15 15:22:11Z suehring
 # Initial revision
+#
+#
+#
+#
+#--------------------------------------------------------------------------------#
+#
+#
+# --------------------------------------------------------------------------------#
 # Description:
 # ------------
+# Postprocessing tool to merge output of virtual measurements into site-specific
+# files.
+#
+# Usage:
+#-------
+"""Merge virtual measurement output: removes empty timestamps from the netcdf
+   files and concatenates files from several restart files into one file-
+"""Merge virtual measurement output.
+Removes empty time stamps from the netCDF files and concatenates files
+from several restart files into one file.
 Example:
 …
 """
+#
 # @Authors Matthias Suehring (suehring@muk.uni-hannover.de)
+# @Authors Matthias SÃŒhring (suehring@muk.uni-hannover.de)
 #          Tobias Gronemeier (gronemeier@muk.uni-hannover.de)
+#
+#--------------------------------------------------------------------------------#
+#
+# --------------------------------------------------------------------------------#
 …
         + 'python -m pip install --user netCDF4\nto install it.')
+# - - - - - - - - - - - - - - -
+def concatenate(file_list, inds, sites, out_dir, override=True):
+def concatenate(files_per_site, sites, output_directory, overwrite_file=False):
     """Concatenate netCDF files via ncrcat.
 …
     """
+    if not os.path.isdir(out_dir):
+        mkdir = os.mkdir(out_dir)
+    nco_command = ["ncrcat"]
+    counter_file = 0
+    for files in file_list:
+        ncrcat_command = "ncrcat -O "
+        counter_dir = 0
+        for single_file in files:
+            if counter_dir != 0:
+                ncfile = Dataset(single_file, "r")
+                soil = False
+                for var in ncfile.variables.keys():
+                    if var == "time_soil":
+                        soil = True
+                nco_command = "ncap2 -O -s 'ntime=ntime+{}' ".format(
+                    inds[counter_file][counter_dir])
+                if soil is True:
+                    nco_command += " -s 'ntime_soil=ntime_soil+{}' ".format(
+                        inds[counter_file][counter_dir])
+                nco_command += " " + single_file + " " + single_file
+                print(nco_command)
+                nco_output = subprocess.run(nco_command, shell=True, check=True)
+            # if counter_dir == 0:
+            #     cp = os.system("cp " + single_file + " " + out_dir + "/" + sites[counter_file])
+            #     print("cp " + single_file + " " + out_dir + "/" + sites[counter_file])
+            #     ncrcat_command += "/" + out_dir + "/" + sites[counter_file] + " "
+            # else:
+            #     ncrcat_command += single_file + " "
+            ncrcat_command += single_file + " "
+            counter_dir += 1
+        ncrcat_command += out_dir + "/"
+        if os.path.isfile(out_dir + "/" + sites[counter_file]):
+            start = sites[counter_file].find('site')
+            end = start + 7
+            string_dum = sites[counter_file]
+            outfile = sites[counter_file] + "_" + string_dum[start:end]
+            print(string_dum[start:end])
+        else:
+            outfile = sites[counter_file]
+        ncrcat_command += outfile
+    if not os.path.isdir(output_directory):
+        mkdir = os.mkdir(output_directory)
+    if output_directory[-1] != '/':
+        output_directory += '/'
+    for site_index, file_list in enumerate(files_per_site):
+        ncrcat_command = "ncrcat"
+        if overwrite_file:
+            ncrcat_command += " -O"
+        for file_name in file_list:
+            ncrcat_command += " " + file_name
+        # Check if output file already exists
+        output_file = output_directory + sites[site_index]
+        if not overwrite_file and os.path.isfile(output_file):
+            for i in range(1000):
+                output_file = output_directory + sites[site_index] + "_{:03d}".format(i)
+                if not os.path.isfile(output_file):
+                    break
+                elif i == 999:
+                    raise IOError("could not guarantee non overwriting output file: {}".format(
+                            output_file))
+        ncrcat_command += " " + output_file
         print(ncrcat_command)
         ncrcat_output = subprocess.run(ncrcat_command, shell=True, check=True)
+        counter_file += 1
+        # nco_output = subprocess.run(nco_command, shell=True, check=True, stdout=subprocess.PIPE)
+    return nco_command
+# - - - - - - - - - - - - - - -
+def truncate(file_process, override=True):
+    # print("file "   + file_process)
+    return output_file
+def truncate(input_file, time_index_shift=0, overwrite_file=False):
+    """Truncate netCDF files via ncrcat.
+    Truncate all time dimensions of the input file and convert them to
+    record dimensions. The output is saved to 'input_file.trunc' or to
+    'input_file.trunc.nc' if the input_file has a '.nc' extension.
+    If "overwrite_file" is true, write output directly to input_file.
+    Shift the time index variables by time_index_shift.
+    Return values:
+        highest time index of time dimension in output file
+        output-file name
+    """
     # Gather information about time coordinate in file
     ncfile = Dataset(file_process, "r")
+    ncfile = Dataset(input_file, "r")
     time_dim = ncfile.dimensions["ntime"]
     time_var = ncfile.variables["time"][:, :]
 …
     start_index = 0
+    soil = False
+    for var in ncfile.variables.keys():
+        if var == "time_soil":
+            soil = True
+    soil = any([var == "time_soil" for var in ncfile.variables.keys()])
     if np.any(time_mask is False):
 …
     #     site = site + "_traj"
     # print(cut)
+    ncks_output = []
+    # Compose nco commands
+    ncks_command = "ncks"
+    if overwrite_file:
+        ncks_command += " -O"
+        output_file = input_file
+    else:
+        # Add '.trunc' to file name before '.nc' file extension
+        output_file, file_extension = os.path.splitext(input_file)
+        if file_extension != '.nc':
+            output_file += file_extension + '.trunc'
+        else:
+            output_file += '.trunc' + file_extension
+        if os.path.isfile(output_file):
+            raise IOError("truncated file already exists: {}".format(output_file))
     if cut:
+        # Compose ncks command
+        ncks_command = "ncks -O -d ntime,{0},{1}".format(start_index, end_index)
+        if not time_dim.isunlimited():
+            ncks_command += " --mk_rec_dmn"
+            ncks_command += " ntime"
+        if soil is True:
+        # set dimension limits
+        ncks_command += " -d ntime,{0},{1}".format(start_index, end_index)
+        if soil:
             ncks_command += " -d ntime_soil,{0},{1}".format(start_index, end_index)
+            ncks_command += " --mk_rec_dmn"
+            ncks_command += " ntime_soil"
+        ncks_command += " " + file_process + " " + file_process
+        # Cut time levels using ncks
+    # convert time into record dimension
+    time_is_limited = not time_dim.isunlimited()
+    if time_is_limited:
+        ncks_command += " --mk_rec_dmn"
+        ncks_command += " ntime"
+    if cut or time_is_limited:
+        # set input and output file
+        ncks_command += " {0} {1}".format(input_file, output_file)
+        # execute ncks
         print(ncks_command)
         ncks_output = subprocess.run(ncks_command, shell=True, check=True, stdout=subprocess.PIPE)
+        new_input_file = output_file
     else:
+        end_index = len(time_var[:][0])
+    return end_index, site
+# - - - - - - - - - - - - - - -
+def main(path_data, output_directory, override=True):
+    # Get current working directory
+    work_dir = os.getcwd()
+    if path_data[-1] != '/':
+        path_data += '/'
+        new_input_file = input_file
+    # If soil is present, also convert soil time to record dimension
+    # (must be done separately due to NCO limitations)
+    if soil:
+        soil_time_is_limited = not ncfile.dimensions["ntime_soil"].isunlimited()
+        if soil_time_is_limited:
+            ncks_command = "ncks -O --mk_rec_dmn ntime_soil {0} {1}".format(
+                    new_input_file, output_file)
+            print(ncks_command)
+            ncks_output = subprocess.run(
+                    ncks_command, shell=True, check=True, stdout=subprocess.PIPE)
+            new_input_file = output_file
+    # Add time shift to ntime variables
+    if time_index_shift != 0:
+        ncap2_command = "ncap2 -O -s 'ntime=ntime+{}' ".format(time_index_shift)
+        if soil:
+            ncap2_command += " -s 'ntime_soil=ntime_soil+{}' ".format(time_index_shift)
+        ncap2_command += " {0} {1}".format(new_input_file, output_file)
+        print(ncap2_command)
+        ncap2_output = subprocess.run(ncap2_command, shell=True, check=True)
+        end_index += time_index_shift
+        new_input_file = output_file
+    return output_file, end_index
+def main(base_input_directory, output_directory, overwrite_file=False):
+    if base_input_directory[-1] != '/':
+        base_input_directory += '/'
+    if output_directory[-1] != '/':
+        output_directory += '/'
     # Get directory list
+    list_output_dirs = [path_data + directory + '/' for directory in sorted(os.listdir(path_data))]
+    filelist = []
+    output_file_list = []
+    counter = 0
+    input_directory_list = [
+            base_input_directory + directory + '/' for directory in
+            sorted(os.listdir(base_input_directory))]
     # Obtain list of sites that need to be processed
+    for directory in list_output_dirs:
+        # Get file list
+        file_list = sorted(os.listdir(directory))
+        for filename in file_list:
+            if counter == 0:
+                output_file_list.append(filename)
+        counter += 1
+    start_inds = [[0] * len(list_output_dirs) for i in range(len(output_file_list))]
+    end_inds = [[0] * len(list_output_dirs) for i in range(len(output_file_list))]
+    input_files = [[None] * len(list_output_dirs) for i in range(len(output_file_list))]
+    sites = [None] * len(output_file_list)
+    counter_file = 0
+    for filename in output_file_list:
+        counter_dir = 0
+        for directory in list_output_dirs:
+            file_process = directory + filename
+            end_ind, sites[counter_file] = truncate(file_process, override)
+            sites[counter_file] = filename
+            if not counter_dir == 0:
+                start_inds[counter_file][counter_dir] = end_inds[counter_file][counter_dir - 1]
+            end_inds[counter_file][counter_dir] = start_inds[counter_file][counter_dir] + end_ind
+            input_files[counter_file][counter_dir] = file_process
+            counter_dir += 1
+        counter_file += 1
+    sites = sorted(os.listdir(input_directory_list[0]))
+    files_per_site_and_directory = [[None] * len(input_directory_list) for i in range(len(sites))]
+    # Truncate each file and save end index of time dimension
+    for site_index, site_name in enumerate(sites):
+        start_index = 0
+        for dir_index, directory in enumerate(input_directory_list):
+            files_per_site_and_directory[site_index][dir_index], end_index = \
+                    truncate(directory + site_name, start_index, overwrite_file)
+            start_index = end_index
     # Concatenate all files
+    outfile = concatenate(input_files, start_inds, sites, output_directory, override)
+# - - - - - - - - - - - - - - -
+    file_concatenated = concatenate(
+            files_per_site_and_directory, sites, output_directory, overwrite_file)
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(
+        description='Merge virtual measurement output from multiple PALM run cycles',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('input', metavar='IN',
+                        help='PALM output directory containing virtual measurements')
+    parser.add_argument('--out', '-o', metavar='OUT', nargs=1, default='./merge',
+                        help='Output directory to store merged data')
+            description='Merge virtual measurement output from multiple PALM run cycles',
+            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument(
+            'input',
+            metavar='IN',
+            help='PALM output directory containing virtual measurements')
+    parser.add_argument(
+            '--out',
+            '-o',
+            metavar='OUT',
+            default='./merge',
+            help='Output directory to store merged data')
+    parser.add_argument(
+            '--overwrite',
+            action='store_true',
+            help='Overwrite input files with output files')
     args = parser.parse_args()
+    path_data = args.input
+    output_directory = args.out
+    main(path_data, output_directory=output_directory, override=True)
+    main(args.input, output_directory=args.out, overwrite_file=args.overwrite)

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 4879

Legend:

palm/trunk/SCRIPTS/postprocess_vm_measurements.py

Download in other formats: