source: palm/trunk/SCRIPTS/postprocess_vm_measurements.py @ 4874

Last change on this file since 4874 was 4854, checked in by suehring, 4 years ago

Postprocessing tool for virtual measurement output added

  • Property svn:keywords set to Id
File size: 9.1 KB
RevLine 
[4854]1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4#--------------------------------------------------------------------------------#
5# This file is part of the PALM model system.
6#
7# PALM is free software: you can redistribute it and/or modify it under the terms
8# of the GNU General Public License as published by the Free Software Foundation,
9# either version 3 of the License, or (at your option) any later version.
10#
11# PALM is distributed in the hope that it will be useful, but WITHOUT ANY
12# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License along with
16# PALM. If not, see <http://www.gnu.org/licenses/>.
17#
18# Copyright 1997-2021  Leibniz Universitaet Hannover
19#--------------------------------------------------------------------------------#
20#
21# Current revisions:
22# -----------------
23#
24#
25# Former revisions:
26# -----------------
27# $Id: postprocess_vm_measurements.py 4853 2021-01-15 15:22:11Z suehring #
28# Initial revision
29#
30#
31#
32#
33#--------------------------------------------------------------------------------#
34#
35
36#
37# Description:
38# ------------
39# Postprocessing tool to merge output of virtual measurements into site-specific
40# files.
41#
42# Usage:
43#-------
44"""Merge virtual measurement output: removes empty timestamps from the netcdf
45   files and concatenates files from several restart files into one file-
46
47Example:
48module load nco anaconda3
49python3 postprocess_vm_measurements.py my_palm_simulation/OUTPUT
50"""
51#
52# @Authors Matthias Suehring (suehring@muk.uni-hannover.de)
53#          Tobias Gronemeier (gronemeier@muk.uni-hannover.de)
54#
55#--------------------------------------------------------------------------------#
56#
57
58
59import argparse
60import subprocess
61import os
62import sys
63try:
64    import numpy as np
65except ImportError:
66    sys.exit(
67        'package "numpy" is required but not installed! Run\n'
68        + 'python -m pip install --user numpy\nto install it.')
69try:
70    from netCDF4 import Dataset
71except ImportError:
72    sys.exit(
73        'package "netCDF4" is required but not installed! Run\n'
74        + 'python -m pip install --user netCDF4\nto install it.')
75
76# - - - - - - - - - - - - - - -
77
78
79def concatenate(file_list, inds, sites, out_dir, override=True):
80    """Concatenate netCDF files via ncrcat.
81
82    Concatenate a list of netCDF files using NCO command 'ncrcat'.
83    Return value: output file
84    """
85
86    if not os.path.isdir(out_dir):
87        mkdir = os.mkdir(out_dir)
88
89    nco_command = ["ncrcat"]
90    counter_file = 0
91    for files in file_list:
92
93        ncrcat_command = "ncrcat -O "
94        counter_dir = 0
95        for single_file in files:
96            if counter_dir != 0:
97                ncfile = Dataset(single_file, "r")
98
99                soil = False
100                for var in ncfile.variables.keys():
101                    if var == "time_soil":
102                        soil = True
103
104                nco_command = "ncap2 -O -s 'ntime=ntime+{}' ".format(
105                    inds[counter_file][counter_dir])
106                if soil is True:
107                    nco_command += " -s 'ntime_soil=ntime_soil+{}' ".format(
108                        inds[counter_file][counter_dir])
109
110                nco_command += " " + single_file + " " + single_file
111                print(nco_command)
112                nco_output = subprocess.run(nco_command, shell=True, check=True)
113
114            # if counter_dir == 0:
115            #     cp = os.system("cp " + single_file + " " + out_dir + "/" + sites[counter_file])
116            #     print("cp " + single_file + " " + out_dir + "/" + sites[counter_file])
117            #     ncrcat_command += "/" + out_dir + "/" + sites[counter_file] + " "
118            # else:
119            #     ncrcat_command += single_file + " "
120            ncrcat_command += single_file + " "
121            counter_dir += 1
122
123        ncrcat_command += out_dir + "/"
124        if os.path.isfile(out_dir + "/" + sites[counter_file]):
125            start = sites[counter_file].find('site')
126            end = start + 7
127
128            string_dum = sites[counter_file]
129            outfile = sites[counter_file] + "_" + string_dum[start:end]
130            print(string_dum[start:end])
131        else:
132            outfile = sites[counter_file]
133
134        ncrcat_command += outfile
135        print(ncrcat_command)
136        ncrcat_output = subprocess.run(ncrcat_command, shell=True, check=True)
137        counter_file += 1
138
139        # nco_output = subprocess.run(nco_command, shell=True, check=True, stdout=subprocess.PIPE)
140
141    return nco_command
142
143# - - - - - - - - - - - - - - -
144
145
146def truncate(file_process, override=True):
147
148    # print("file "   + file_process)
149    # Gather information about time coordinate in file
150    ncfile = Dataset(file_process, "r")
151    time_dim = ncfile.dimensions["ntime"]
152    time_var = ncfile.variables["time"][:, :]
153    time_mask = ~np.ma.getmaskarray(time_var)
154    start_index = 0
155
156    soil = False
157    for var in ncfile.variables.keys():
158        if var == "time_soil":
159            soil = True
160
161    if np.any(time_mask is False):
162        end_ind = np.where(time_mask is False)
163        end_index = end_ind[0][0] - 1
164        cut = True
165    elif np.any(time_var == 0):
166        end_ind = np.where(time_var == 0)
167        end_index = end_ind[0][0] - 1
168        cut = True
169    else:
170        end_index = len(time_var[:][0])
171        cut = False
172
173    for att in ncfile.ncattrs():
174        if (att == "site"):
175            site = ncfile.getncattr(att)
176        if (att == "featureType"):
177            feat = ncfile.getncattr(att)
178
179    # if feat == "timeSeries":
180    #     site = site + "_ts"
181    # if feat == "timeSeriesProfile":
182    #     site = site + "_tspr"
183    # if feat == "trajectory":
184    #     site = site + "_traj"
185    # print(cut)
186    ncks_output = []
187    if cut:
188        # Compose ncks command
189        ncks_command = "ncks -O -d ntime,{0},{1}".format(start_index, end_index)
190        if not time_dim.isunlimited():
191            ncks_command += " --mk_rec_dmn"
192            ncks_command += " ntime"
193        if soil is True:
194            ncks_command += " -d ntime_soil,{0},{1}".format(start_index, end_index)
195            ncks_command += " --mk_rec_dmn"
196            ncks_command += " ntime_soil"
197
198        ncks_command += " " + file_process + " " + file_process
199
200        # Cut time levels using ncks
201        print(ncks_command)
202        ncks_output = subprocess.run(ncks_command, shell=True, check=True, stdout=subprocess.PIPE)
203    else:
204        end_index = len(time_var[:][0])
205
206    return end_index, site
207
208# - - - - - - - - - - - - - - -
209
210
211def main(path_data, output_directory, override=True):
212
213    # Get current working directory
214    work_dir = os.getcwd()
215
216    if path_data[-1] != '/':
217        path_data += '/'
218
219    # Get directory list
220    list_output_dirs = [path_data + directory + '/' for directory in sorted(os.listdir(path_data))]
221    filelist = []
222    output_file_list = []
223    counter = 0
224
225    # Obtain list of sites that need to be processed
226    for directory in list_output_dirs:
227        # Get file list
228        file_list = sorted(os.listdir(directory))
229        for filename in file_list:
230            if counter == 0:
231                output_file_list.append(filename)
232        counter += 1
233
234    start_inds = [[0] * len(list_output_dirs) for i in range(len(output_file_list))]
235    end_inds = [[0] * len(list_output_dirs) for i in range(len(output_file_list))]
236
237    input_files = [[None] * len(list_output_dirs) for i in range(len(output_file_list))]
238    sites = [None] * len(output_file_list)
239
240    counter_file = 0
241    for filename in output_file_list:
242
243        counter_dir = 0
244        for directory in list_output_dirs:
245            file_process = directory + filename
246            end_ind, sites[counter_file] = truncate(file_process, override)
247            sites[counter_file] = filename
248
249            if not counter_dir == 0:
250                start_inds[counter_file][counter_dir] = end_inds[counter_file][counter_dir - 1]
251
252            end_inds[counter_file][counter_dir] = start_inds[counter_file][counter_dir] + end_ind
253
254            input_files[counter_file][counter_dir] = file_process
255            counter_dir += 1
256
257        counter_file += 1
258
259    # Concatenate all files
260    outfile = concatenate(input_files, start_inds, sites, output_directory, override)
261
262
263# - - - - - - - - - - - - - - -
264
265if __name__ == '__main__':
266    parser = argparse.ArgumentParser(
267        description='Merge virtual measurement output from multiple PALM run cycles',
268        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
269    parser.add_argument('input', metavar='IN',
270                        help='PALM output directory containing virtual measurements')
271    parser.add_argument('--out', '-o', metavar='OUT', nargs=1, default='./merge',
272                        help='Output directory to store merged data')
273
274    args = parser.parse_args()
275    path_data = args.input
276    output_directory = args.out
277
278    main(path_data, output_directory=output_directory, override=True)
Note: See TracBrowser for help on using the repository browser.