#!/usr/bin/env python3 # -*- coding: utf-8 -*- # #--------------------------------------------------------------------------------# # This file is part of the PALM model system. # # PALM is free software: you can redistribute it and/or modify it under the terms # of the GNU General Public License as published by the Free Software Foundation, # either version 3 of the License, or (at your option) any later version. # # PALM is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # PALM. If not, see . # # Copyright 1997-2021 Leibniz Universitaet Hannover #--------------------------------------------------------------------------------# # # Current revisions: # ----------------- # # # Former revisions: # ----------------- # $Id: postprocess_vm_measurements.py 4853 2021-01-15 15:22:11Z suehring # # Initial revision # # # # #--------------------------------------------------------------------------------# # # # Description: # ------------ # Postprocessing tool to merge output of virtual measurements into site-specific # files. # # Usage: #------- """Merge virtual measurement output: removes empty timestamps from the netcdf files and concatenates files from several restart files into one file- Example: module load nco anaconda3 python3 postprocess_vm_measurements.py my_palm_simulation/OUTPUT """ # # @Authors Matthias Suehring (suehring@muk.uni-hannover.de) # Tobias Gronemeier (gronemeier@muk.uni-hannover.de) # #--------------------------------------------------------------------------------# # import argparse import subprocess import os import sys try: import numpy as np except ImportError: sys.exit( 'package "numpy" is required but not installed! Run\n' + 'python -m pip install --user numpy\nto install it.') try: from netCDF4 import Dataset except ImportError: sys.exit( 'package "netCDF4" is required but not installed! Run\n' + 'python -m pip install --user netCDF4\nto install it.') # - - - - - - - - - - - - - - - def concatenate(file_list, inds, sites, out_dir, override=True): """Concatenate netCDF files via ncrcat. Concatenate a list of netCDF files using NCO command 'ncrcat'. Return value: output file """ if not os.path.isdir(out_dir): mkdir = os.mkdir(out_dir) nco_command = ["ncrcat"] counter_file = 0 for files in file_list: ncrcat_command = "ncrcat -O " counter_dir = 0 for single_file in files: if counter_dir != 0: ncfile = Dataset(single_file, "r") soil = False for var in ncfile.variables.keys(): if var == "time_soil": soil = True nco_command = "ncap2 -O -s 'ntime=ntime+{}' ".format( inds[counter_file][counter_dir]) if soil is True: nco_command += " -s 'ntime_soil=ntime_soil+{}' ".format( inds[counter_file][counter_dir]) nco_command += " " + single_file + " " + single_file print(nco_command) nco_output = subprocess.run(nco_command, shell=True, check=True) # if counter_dir == 0: # cp = os.system("cp " + single_file + " " + out_dir + "/" + sites[counter_file]) # print("cp " + single_file + " " + out_dir + "/" + sites[counter_file]) # ncrcat_command += "/" + out_dir + "/" + sites[counter_file] + " " # else: # ncrcat_command += single_file + " " ncrcat_command += single_file + " " counter_dir += 1 ncrcat_command += out_dir + "/" if os.path.isfile(out_dir + "/" + sites[counter_file]): start = sites[counter_file].find('site') end = start + 7 string_dum = sites[counter_file] outfile = sites[counter_file] + "_" + string_dum[start:end] print(string_dum[start:end]) else: outfile = sites[counter_file] ncrcat_command += outfile print(ncrcat_command) ncrcat_output = subprocess.run(ncrcat_command, shell=True, check=True) counter_file += 1 # nco_output = subprocess.run(nco_command, shell=True, check=True, stdout=subprocess.PIPE) return nco_command # - - - - - - - - - - - - - - - def truncate(file_process, override=True): # print("file " + file_process) # Gather information about time coordinate in file ncfile = Dataset(file_process, "r") time_dim = ncfile.dimensions["ntime"] time_var = ncfile.variables["time"][:, :] time_mask = ~np.ma.getmaskarray(time_var) start_index = 0 soil = False for var in ncfile.variables.keys(): if var == "time_soil": soil = True if np.any(time_mask is False): end_ind = np.where(time_mask is False) end_index = end_ind[0][0] - 1 cut = True elif np.any(time_var == 0): end_ind = np.where(time_var == 0) end_index = end_ind[0][0] - 1 cut = True else: end_index = len(time_var[:][0]) cut = False for att in ncfile.ncattrs(): if (att == "site"): site = ncfile.getncattr(att) if (att == "featureType"): feat = ncfile.getncattr(att) # if feat == "timeSeries": # site = site + "_ts" # if feat == "timeSeriesProfile": # site = site + "_tspr" # if feat == "trajectory": # site = site + "_traj" # print(cut) ncks_output = [] if cut: # Compose ncks command ncks_command = "ncks -O -d ntime,{0},{1}".format(start_index, end_index) if not time_dim.isunlimited(): ncks_command += " --mk_rec_dmn" ncks_command += " ntime" if soil is True: ncks_command += " -d ntime_soil,{0},{1}".format(start_index, end_index) ncks_command += " --mk_rec_dmn" ncks_command += " ntime_soil" ncks_command += " " + file_process + " " + file_process # Cut time levels using ncks print(ncks_command) ncks_output = subprocess.run(ncks_command, shell=True, check=True, stdout=subprocess.PIPE) else: end_index = len(time_var[:][0]) return end_index, site # - - - - - - - - - - - - - - - def main(path_data, output_directory, override=True): # Get current working directory work_dir = os.getcwd() if path_data[-1] != '/': path_data += '/' # Get directory list list_output_dirs = [path_data + directory + '/' for directory in sorted(os.listdir(path_data))] filelist = [] output_file_list = [] counter = 0 # Obtain list of sites that need to be processed for directory in list_output_dirs: # Get file list file_list = sorted(os.listdir(directory)) for filename in file_list: if counter == 0: output_file_list.append(filename) counter += 1 start_inds = [[0] * len(list_output_dirs) for i in range(len(output_file_list))] end_inds = [[0] * len(list_output_dirs) for i in range(len(output_file_list))] input_files = [[None] * len(list_output_dirs) for i in range(len(output_file_list))] sites = [None] * len(output_file_list) counter_file = 0 for filename in output_file_list: counter_dir = 0 for directory in list_output_dirs: file_process = directory + filename end_ind, sites[counter_file] = truncate(file_process, override) sites[counter_file] = filename if not counter_dir == 0: start_inds[counter_file][counter_dir] = end_inds[counter_file][counter_dir - 1] end_inds[counter_file][counter_dir] = start_inds[counter_file][counter_dir] + end_ind input_files[counter_file][counter_dir] = file_process counter_dir += 1 counter_file += 1 # Concatenate all files outfile = concatenate(input_files, start_inds, sites, output_directory, override) # - - - - - - - - - - - - - - - if __name__ == '__main__': parser = argparse.ArgumentParser( description='Merge virtual measurement output from multiple PALM run cycles', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('input', metavar='IN', help='PALM output directory containing virtual measurements') parser.add_argument('--out', '-o', metavar='OUT', nargs=1, default='./merge', help='Output directory to store merged data') args = parser.parse_args() path_data = args.input output_directory = args.out main(path_data, output_directory=output_directory, override=True)