1 | #!/usr/bin/env python3 |
---|
2 | # -*- coding: utf-8 -*- |
---|
3 | # |
---|
4 | #--------------------------------------------------------------------------------# |
---|
5 | # This file is part of the PALM model system. |
---|
6 | # |
---|
7 | # PALM is free software: you can redistribute it and/or modify it under the terms |
---|
8 | # of the GNU General Public License as published by the Free Software Foundation, |
---|
9 | # either version 3 of the License, or (at your option) any later version. |
---|
10 | # |
---|
11 | # PALM is distributed in the hope that it will be useful, but WITHOUT ANY |
---|
12 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR |
---|
13 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
---|
14 | # |
---|
15 | # You should have received a copy of the GNU General Public License along with |
---|
16 | # PALM. If not, see <http://www.gnu.org/licenses/>. |
---|
17 | # |
---|
18 | # Copyright 1997-2021 Leibniz Universitaet Hannover |
---|
19 | #--------------------------------------------------------------------------------# |
---|
20 | # |
---|
21 | # Current revisions: |
---|
22 | # ----------------- |
---|
23 | # |
---|
24 | # |
---|
25 | # Former revisions: |
---|
26 | # ----------------- |
---|
27 | # $Id: postprocess_vm_measurements.py 4853 2021-01-15 15:22:11Z suehring # |
---|
28 | # Initial revision |
---|
29 | # |
---|
30 | # |
---|
31 | # |
---|
32 | # |
---|
33 | #--------------------------------------------------------------------------------# |
---|
34 | # |
---|
35 | |
---|
36 | # |
---|
37 | # Description: |
---|
38 | # ------------ |
---|
39 | # Postprocessing tool to merge output of virtual measurements into site-specific |
---|
40 | # files. |
---|
41 | # |
---|
42 | # Usage: |
---|
43 | #------- |
---|
44 | """Merge virtual measurement output: removes empty timestamps from the netcdf |
---|
45 | files and concatenates files from several restart files into one file- |
---|
46 | |
---|
47 | Example: |
---|
48 | module load nco anaconda3 |
---|
49 | python3 postprocess_vm_measurements.py my_palm_simulation/OUTPUT |
---|
50 | """ |
---|
51 | # |
---|
52 | # @Authors Matthias Suehring (suehring@muk.uni-hannover.de) |
---|
53 | # Tobias Gronemeier (gronemeier@muk.uni-hannover.de) |
---|
54 | # |
---|
55 | #--------------------------------------------------------------------------------# |
---|
56 | # |
---|
57 | |
---|
58 | |
---|
59 | import argparse |
---|
60 | import subprocess |
---|
61 | import os |
---|
62 | import sys |
---|
63 | try: |
---|
64 | import numpy as np |
---|
65 | except ImportError: |
---|
66 | sys.exit( |
---|
67 | 'package "numpy" is required but not installed! Run\n' |
---|
68 | + 'python -m pip install --user numpy\nto install it.') |
---|
69 | try: |
---|
70 | from netCDF4 import Dataset |
---|
71 | except ImportError: |
---|
72 | sys.exit( |
---|
73 | 'package "netCDF4" is required but not installed! Run\n' |
---|
74 | + 'python -m pip install --user netCDF4\nto install it.') |
---|
75 | |
---|
76 | # - - - - - - - - - - - - - - - |
---|
77 | |
---|
78 | |
---|
79 | def concatenate(file_list, inds, sites, out_dir, override=True): |
---|
80 | """Concatenate netCDF files via ncrcat. |
---|
81 | |
---|
82 | Concatenate a list of netCDF files using NCO command 'ncrcat'. |
---|
83 | Return value: output file |
---|
84 | """ |
---|
85 | |
---|
86 | if not os.path.isdir(out_dir): |
---|
87 | mkdir = os.mkdir(out_dir) |
---|
88 | |
---|
89 | nco_command = ["ncrcat"] |
---|
90 | counter_file = 0 |
---|
91 | for files in file_list: |
---|
92 | |
---|
93 | ncrcat_command = "ncrcat -O " |
---|
94 | counter_dir = 0 |
---|
95 | for single_file in files: |
---|
96 | if counter_dir != 0: |
---|
97 | ncfile = Dataset(single_file, "r") |
---|
98 | |
---|
99 | soil = False |
---|
100 | for var in ncfile.variables.keys(): |
---|
101 | if var == "time_soil": |
---|
102 | soil = True |
---|
103 | |
---|
104 | nco_command = "ncap2 -O -s 'ntime=ntime+{}' ".format( |
---|
105 | inds[counter_file][counter_dir]) |
---|
106 | if soil is True: |
---|
107 | nco_command += " -s 'ntime_soil=ntime_soil+{}' ".format( |
---|
108 | inds[counter_file][counter_dir]) |
---|
109 | |
---|
110 | nco_command += " " + single_file + " " + single_file |
---|
111 | print(nco_command) |
---|
112 | nco_output = subprocess.run(nco_command, shell=True, check=True) |
---|
113 | |
---|
114 | # if counter_dir == 0: |
---|
115 | # cp = os.system("cp " + single_file + " " + out_dir + "/" + sites[counter_file]) |
---|
116 | # print("cp " + single_file + " " + out_dir + "/" + sites[counter_file]) |
---|
117 | # ncrcat_command += "/" + out_dir + "/" + sites[counter_file] + " " |
---|
118 | # else: |
---|
119 | # ncrcat_command += single_file + " " |
---|
120 | ncrcat_command += single_file + " " |
---|
121 | counter_dir += 1 |
---|
122 | |
---|
123 | ncrcat_command += out_dir + "/" |
---|
124 | if os.path.isfile(out_dir + "/" + sites[counter_file]): |
---|
125 | start = sites[counter_file].find('site') |
---|
126 | end = start + 7 |
---|
127 | |
---|
128 | string_dum = sites[counter_file] |
---|
129 | outfile = sites[counter_file] + "_" + string_dum[start:end] |
---|
130 | print(string_dum[start:end]) |
---|
131 | else: |
---|
132 | outfile = sites[counter_file] |
---|
133 | |
---|
134 | ncrcat_command += outfile |
---|
135 | print(ncrcat_command) |
---|
136 | ncrcat_output = subprocess.run(ncrcat_command, shell=True, check=True) |
---|
137 | counter_file += 1 |
---|
138 | |
---|
139 | # nco_output = subprocess.run(nco_command, shell=True, check=True, stdout=subprocess.PIPE) |
---|
140 | |
---|
141 | return nco_command |
---|
142 | |
---|
143 | # - - - - - - - - - - - - - - - |
---|
144 | |
---|
145 | |
---|
146 | def truncate(file_process, override=True): |
---|
147 | |
---|
148 | # print("file " + file_process) |
---|
149 | # Gather information about time coordinate in file |
---|
150 | ncfile = Dataset(file_process, "r") |
---|
151 | time_dim = ncfile.dimensions["ntime"] |
---|
152 | time_var = ncfile.variables["time"][:, :] |
---|
153 | time_mask = ~np.ma.getmaskarray(time_var) |
---|
154 | start_index = 0 |
---|
155 | |
---|
156 | soil = False |
---|
157 | for var in ncfile.variables.keys(): |
---|
158 | if var == "time_soil": |
---|
159 | soil = True |
---|
160 | |
---|
161 | if np.any(time_mask is False): |
---|
162 | end_ind = np.where(time_mask is False) |
---|
163 | end_index = end_ind[0][0] - 1 |
---|
164 | cut = True |
---|
165 | elif np.any(time_var == 0): |
---|
166 | end_ind = np.where(time_var == 0) |
---|
167 | end_index = end_ind[0][0] - 1 |
---|
168 | cut = True |
---|
169 | else: |
---|
170 | end_index = len(time_var[:][0]) |
---|
171 | cut = False |
---|
172 | |
---|
173 | for att in ncfile.ncattrs(): |
---|
174 | if (att == "site"): |
---|
175 | site = ncfile.getncattr(att) |
---|
176 | if (att == "featureType"): |
---|
177 | feat = ncfile.getncattr(att) |
---|
178 | |
---|
179 | # if feat == "timeSeries": |
---|
180 | # site = site + "_ts" |
---|
181 | # if feat == "timeSeriesProfile": |
---|
182 | # site = site + "_tspr" |
---|
183 | # if feat == "trajectory": |
---|
184 | # site = site + "_traj" |
---|
185 | # print(cut) |
---|
186 | ncks_output = [] |
---|
187 | if cut: |
---|
188 | # Compose ncks command |
---|
189 | ncks_command = "ncks -O -d ntime,{0},{1}".format(start_index, end_index) |
---|
190 | if not time_dim.isunlimited(): |
---|
191 | ncks_command += " --mk_rec_dmn" |
---|
192 | ncks_command += " ntime" |
---|
193 | if soil is True: |
---|
194 | ncks_command += " -d ntime_soil,{0},{1}".format(start_index, end_index) |
---|
195 | ncks_command += " --mk_rec_dmn" |
---|
196 | ncks_command += " ntime_soil" |
---|
197 | |
---|
198 | ncks_command += " " + file_process + " " + file_process |
---|
199 | |
---|
200 | # Cut time levels using ncks |
---|
201 | print(ncks_command) |
---|
202 | ncks_output = subprocess.run(ncks_command, shell=True, check=True, stdout=subprocess.PIPE) |
---|
203 | else: |
---|
204 | end_index = len(time_var[:][0]) |
---|
205 | |
---|
206 | return end_index, site |
---|
207 | |
---|
208 | # - - - - - - - - - - - - - - - |
---|
209 | |
---|
210 | |
---|
211 | def main(path_data, output_directory, override=True): |
---|
212 | |
---|
213 | # Get current working directory |
---|
214 | work_dir = os.getcwd() |
---|
215 | |
---|
216 | if path_data[-1] != '/': |
---|
217 | path_data += '/' |
---|
218 | |
---|
219 | # Get directory list |
---|
220 | list_output_dirs = [path_data + directory + '/' for directory in sorted(os.listdir(path_data))] |
---|
221 | filelist = [] |
---|
222 | output_file_list = [] |
---|
223 | counter = 0 |
---|
224 | |
---|
225 | # Obtain list of sites that need to be processed |
---|
226 | for directory in list_output_dirs: |
---|
227 | # Get file list |
---|
228 | file_list = sorted(os.listdir(directory)) |
---|
229 | for filename in file_list: |
---|
230 | if counter == 0: |
---|
231 | output_file_list.append(filename) |
---|
232 | counter += 1 |
---|
233 | |
---|
234 | start_inds = [[0] * len(list_output_dirs) for i in range(len(output_file_list))] |
---|
235 | end_inds = [[0] * len(list_output_dirs) for i in range(len(output_file_list))] |
---|
236 | |
---|
237 | input_files = [[None] * len(list_output_dirs) for i in range(len(output_file_list))] |
---|
238 | sites = [None] * len(output_file_list) |
---|
239 | |
---|
240 | counter_file = 0 |
---|
241 | for filename in output_file_list: |
---|
242 | |
---|
243 | counter_dir = 0 |
---|
244 | for directory in list_output_dirs: |
---|
245 | file_process = directory + filename |
---|
246 | end_ind, sites[counter_file] = truncate(file_process, override) |
---|
247 | sites[counter_file] = filename |
---|
248 | |
---|
249 | if not counter_dir == 0: |
---|
250 | start_inds[counter_file][counter_dir] = end_inds[counter_file][counter_dir - 1] |
---|
251 | |
---|
252 | end_inds[counter_file][counter_dir] = start_inds[counter_file][counter_dir] + end_ind |
---|
253 | |
---|
254 | input_files[counter_file][counter_dir] = file_process |
---|
255 | counter_dir += 1 |
---|
256 | |
---|
257 | counter_file += 1 |
---|
258 | |
---|
259 | # Concatenate all files |
---|
260 | outfile = concatenate(input_files, start_inds, sites, output_directory, override) |
---|
261 | |
---|
262 | |
---|
263 | # - - - - - - - - - - - - - - - |
---|
264 | |
---|
265 | if __name__ == '__main__': |
---|
266 | parser = argparse.ArgumentParser( |
---|
267 | description='Merge virtual measurement output from multiple PALM run cycles', |
---|
268 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
---|
269 | parser.add_argument('input', metavar='IN', |
---|
270 | help='PALM output directory containing virtual measurements') |
---|
271 | parser.add_argument('--out', '-o', metavar='OUT', nargs=1, default='./merge', |
---|
272 | help='Output directory to store merged data') |
---|
273 | |
---|
274 | args = parser.parse_args() |
---|
275 | path_data = args.input |
---|
276 | output_directory = args.out |
---|
277 | |
---|
278 | main(path_data, output_directory=output_directory, override=True) |
---|