source: palm/trunk/SCRIPTS/postprocess_vm_measurements.py @ 4887

Last change on this file since 4887 was 4879, checked in by gronemeier, 4 years ago

extensive re-work of postprocess_vm_measurements.py:

  • bugfix: convert atmosphere and soil time to record dimension
  • bugfix: make overwrite optional
  • reduce complexity
  • remove unnecessary parts
  • variable renaming
  • code restructuring to follow coding standard
  • Property svn:keywords set to Id
File size: 9.9 KB
Line 
1#!/usr/bin/env python3
2# --------------------------------------------------------------------------------#
3# This file is part of the PALM model system.
4#
5# PALM is free software: you can redistribute it and/or modify it under the terms
6# of the GNU General Public License as published by the Free Software Foundation,
7# either version 3 of the License, or (at your option) any later version.
8#
9# PALM is distributed in the hope that it will be useful, but WITHOUT ANY
10# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
11# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License along with
14# PALM. If not, see <http://www.gnu.org/licenses/>.
15#
16# Copyright 1997-2021  Leibniz Universitaet Hannover
17# --------------------------------------------------------------------------------#
18#
19# Current revisions:
20# -----------------
21#
22#
23# Former revisions:
24# -----------------
25# $Id: postprocess_vm_measurements.py 4853 2021-01-15 15:22:11Z suehring #
26# extensive re-work of postprocess_vm_measurements.py:
27#    - bugfix: convert atmosphere and soil time to record dimension
28#    - bugfix: make overwrite optional
29#    - reduce complexity
30#    - remove unnecessary parts
31#    - variable renaming
32#    - code restructuring to follow coding standard
33#
34# 4853 2021-01-15 15:22:11Z suehring
35# Initial revision
36#
37# --------------------------------------------------------------------------------#
38# Description:
39# ------------
40"""Merge virtual measurement output.
41
42Removes empty time stamps from the netCDF files and concatenates files
43from several restart files into one file.
44
45Example:
46module load nco anaconda3
47python3 postprocess_vm_measurements.py my_palm_simulation/OUTPUT
48"""
49#
50# @Authors Matthias SÃŒhring (suehring@muk.uni-hannover.de)
51#          Tobias Gronemeier (gronemeier@muk.uni-hannover.de)
52# --------------------------------------------------------------------------------#
53
54
55import argparse
56import subprocess
57import os
58import sys
59try:
60    import numpy as np
61except ImportError:
62    sys.exit(
63        'package "numpy" is required but not installed! Run\n'
64        + 'python -m pip install --user numpy\nto install it.')
65try:
66    from netCDF4 import Dataset
67except ImportError:
68    sys.exit(
69        'package "netCDF4" is required but not installed! Run\n'
70        + 'python -m pip install --user netCDF4\nto install it.')
71
72
73def concatenate(files_per_site, sites, output_directory, overwrite_file=False):
74    """Concatenate netCDF files via ncrcat.
75
76    Concatenate a list of netCDF files using NCO command 'ncrcat'.
77    Return value: output file
78    """
79
80    if not os.path.isdir(output_directory):
81        mkdir = os.mkdir(output_directory)
82
83    if output_directory[-1] != '/':
84        output_directory += '/'
85
86    for site_index, file_list in enumerate(files_per_site):
87
88        ncrcat_command = "ncrcat"
89
90        if overwrite_file:
91            ncrcat_command += " -O"
92
93        for file_name in file_list:
94            ncrcat_command += " " + file_name
95
96        # Check if output file already exists
97        output_file = output_directory + sites[site_index]
98        if not overwrite_file and os.path.isfile(output_file):
99            for i in range(1000):
100                output_file = output_directory + sites[site_index] + "_{:03d}".format(i)
101                if not os.path.isfile(output_file):
102                    break
103                elif i == 999:
104                    raise IOError("could not guarantee non overwriting output file: {}".format(
105                            output_file))
106        ncrcat_command += " " + output_file
107
108        print(ncrcat_command)
109        ncrcat_output = subprocess.run(ncrcat_command, shell=True, check=True)
110
111    return output_file
112
113
114def truncate(input_file, time_index_shift=0, overwrite_file=False):
115    """Truncate netCDF files via ncrcat.
116
117    Truncate all time dimensions of the input file and convert them to
118    record dimensions. The output is saved to 'input_file.trunc' or to
119    'input_file.trunc.nc' if the input_file has a '.nc' extension.
120    If "overwrite_file" is true, write output directly to input_file.
121    Shift the time index variables by time_index_shift.
122
123    Return values:
124        highest time index of time dimension in output file
125        output-file name
126    """
127
128    # Gather information about time coordinate in file
129    ncfile = Dataset(input_file, "r")
130    time_dim = ncfile.dimensions["ntime"]
131    time_var = ncfile.variables["time"][:, :]
132    time_mask = ~np.ma.getmaskarray(time_var)
133    start_index = 0
134
135    soil = any([var == "time_soil" for var in ncfile.variables.keys()])
136
137    if np.any(time_mask is False):
138        end_ind = np.where(time_mask is False)
139        end_index = end_ind[0][0] - 1
140        cut = True
141    elif np.any(time_var == 0):
142        end_ind = np.where(time_var == 0)
143        end_index = end_ind[0][0] - 1
144        cut = True
145    else:
146        end_index = len(time_var[:][0])
147        cut = False
148
149    for att in ncfile.ncattrs():
150        if (att == "site"):
151            site = ncfile.getncattr(att)
152        if (att == "featureType"):
153            feat = ncfile.getncattr(att)
154
155    # if feat == "timeSeries":
156    #     site = site + "_ts"
157    # if feat == "timeSeriesProfile":
158    #     site = site + "_tspr"
159    # if feat == "trajectory":
160    #     site = site + "_traj"
161    # print(cut)
162
163    # Compose nco commands
164    ncks_command = "ncks"
165
166    if overwrite_file:
167        ncks_command += " -O"
168        output_file = input_file
169    else:
170        # Add '.trunc' to file name before '.nc' file extension
171        output_file, file_extension = os.path.splitext(input_file)
172        if file_extension != '.nc':
173            output_file += file_extension + '.trunc'
174        else:
175            output_file += '.trunc' + file_extension
176        if os.path.isfile(output_file):
177            raise IOError("truncated file already exists: {}".format(output_file))
178
179    if cut:
180        # set dimension limits
181        ncks_command += " -d ntime,{0},{1}".format(start_index, end_index)
182        if soil:
183            ncks_command += " -d ntime_soil,{0},{1}".format(start_index, end_index)
184
185    # convert time into record dimension
186    time_is_limited = not time_dim.isunlimited()
187    if time_is_limited:
188        ncks_command += " --mk_rec_dmn"
189        ncks_command += " ntime"
190
191    if cut or time_is_limited:
192        # set input and output file
193        ncks_command += " {0} {1}".format(input_file, output_file)
194
195        # execute ncks
196        print(ncks_command)
197        ncks_output = subprocess.run(ncks_command, shell=True, check=True, stdout=subprocess.PIPE)
198
199        new_input_file = output_file
200
201    else:
202        new_input_file = input_file
203
204    # If soil is present, also convert soil time to record dimension
205    # (must be done separately due to NCO limitations)
206    if soil:
207        soil_time_is_limited = not ncfile.dimensions["ntime_soil"].isunlimited()
208        if soil_time_is_limited:
209
210            ncks_command = "ncks -O --mk_rec_dmn ntime_soil {0} {1}".format(
211                    new_input_file, output_file)
212            print(ncks_command)
213            ncks_output = subprocess.run(
214                    ncks_command, shell=True, check=True, stdout=subprocess.PIPE)
215
216            new_input_file = output_file
217
218    # Add time shift to ntime variables
219    if time_index_shift != 0:
220        ncap2_command = "ncap2 -O -s 'ntime=ntime+{}' ".format(time_index_shift)
221        if soil:
222            ncap2_command += " -s 'ntime_soil=ntime_soil+{}' ".format(time_index_shift)
223
224        ncap2_command += " {0} {1}".format(new_input_file, output_file)
225
226        print(ncap2_command)
227        ncap2_output = subprocess.run(ncap2_command, shell=True, check=True)
228
229        end_index += time_index_shift
230        new_input_file = output_file
231
232    return output_file, end_index
233
234
235def main(base_input_directory, output_directory, overwrite_file=False):
236
237    if base_input_directory[-1] != '/':
238        base_input_directory += '/'
239
240    if output_directory[-1] != '/':
241        output_directory += '/'
242
243    # Get directory list
244    input_directory_list = [
245            base_input_directory + directory + '/' for directory in
246            sorted(os.listdir(base_input_directory))]
247
248    # Obtain list of sites that need to be processed
249    sites = sorted(os.listdir(input_directory_list[0]))
250
251    files_per_site_and_directory = [[None] * len(input_directory_list) for i in range(len(sites))]
252
253    # Truncate each file and save end index of time dimension
254    for site_index, site_name in enumerate(sites):
255
256        start_index = 0
257        for dir_index, directory in enumerate(input_directory_list):
258            files_per_site_and_directory[site_index][dir_index], end_index = \
259                    truncate(directory + site_name, start_index, overwrite_file)
260            start_index = end_index
261
262    # Concatenate all files
263    file_concatenated = concatenate(
264            files_per_site_and_directory, sites, output_directory, overwrite_file)
265
266
267if __name__ == '__main__':
268    parser = argparse.ArgumentParser(
269            description='Merge virtual measurement output from multiple PALM run cycles',
270            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
271    parser.add_argument(
272            'input',
273            metavar='IN',
274            help='PALM output directory containing virtual measurements')
275    parser.add_argument(
276            '--out',
277            '-o',
278            metavar='OUT',
279            default='./merge',
280            help='Output directory to store merged data')
281    parser.add_argument(
282            '--overwrite',
283            action='store_true',
284            help='Overwrite input files with output files')
285
286    args = parser.parse_args()
287
288    main(args.input, output_directory=args.out, overwrite_file=args.overwrite)
Note: See TracBrowser for help on using the repository browser.