Home

Context Navigation

postprocess_vm_measurements.py @ 4882

Last change on this file since 4882 was 4879, checked in by gronemeier, 4 years ago

extensive re-work of postprocess_vm_measurements.py:

bugfix: convert atmosphere and soil time to record dimension
bugfix: make overwrite optional
reduce complexity
remove unnecessary parts
variable renaming
code restructuring to follow coding standard

Property svn:keywords set to Id

File size: 9.9 KB

Line
1	#!/usr/bin/env python3
2	# --------------------------------------------------------------------------------#
3	# This file is part of the PALM model system.
4	#
5	# PALM is free software: you can redistribute it and/or modify it under the terms
6	# of the GNU General Public License as published by the Free Software Foundation,
7	# either version 3 of the License, or (at your option) any later version.
8	#
9	# PALM is distributed in the hope that it will be useful, but WITHOUT ANY
10	# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
11	# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12	#
13	# You should have received a copy of the GNU General Public License along with
14	# PALM. If not, see <http://www.gnu.org/licenses/>.
15	#
16	# Copyright 1997-2021 Leibniz Universitaet Hannover
17	# --------------------------------------------------------------------------------#
18	#
19	# Current revisions:
20	# -----------------
21	#
22	#
23	# Former revisions:
24	# -----------------
25	# $Id: postprocess_vm_measurements.py 4853 2021-01-15 15:22:11Z suehring #
26	# extensive re-work of postprocess_vm_measurements.py:
27	# - bugfix: convert atmosphere and soil time to record dimension
28	# - bugfix: make overwrite optional
29	# - reduce complexity
30	# - remove unnecessary parts
31	# - variable renaming
32	# - code restructuring to follow coding standard
33	#
34	# 4853 2021-01-15 15:22:11Z suehring
35	# Initial revision
36	#
37	# --------------------------------------------------------------------------------#
38	# Description:
39	# ------------
40	"""Merge virtual measurement output.
41
42	Removes empty time stamps from the netCDF files and concatenates files
43	from several restart files into one file.
44
45	Example:
46	module load nco anaconda3
47	python3 postprocess_vm_measurements.py my_palm_simulation/OUTPUT
48	"""
49	#
50	# @Authors Matthias SÃŒhring (suehring@muk.uni-hannover.de)
51	# Tobias Gronemeier (gronemeier@muk.uni-hannover.de)
52	# --------------------------------------------------------------------------------#
53
54
55	import argparse
56	import subprocess
57	import os
58	import sys
59	try:
60	import numpy as np
61	except ImportError:
62	sys.exit(
63	'package "numpy" is required but not installed! Run\n'
64	+ 'python -m pip install --user numpy\nto install it.')
65	try:
66	from netCDF4 import Dataset
67	except ImportError:
68	sys.exit(
69	'package "netCDF4" is required but not installed! Run\n'
70	+ 'python -m pip install --user netCDF4\nto install it.')
71
72
73	def concatenate(files_per_site, sites, output_directory, overwrite_file=False):
74	"""Concatenate netCDF files via ncrcat.
75
76	Concatenate a list of netCDF files using NCO command 'ncrcat'.
77	Return value: output file
78	"""
79
80	if not os.path.isdir(output_directory):
81	mkdir = os.mkdir(output_directory)
82
83	if output_directory[-1] != '/':
84	output_directory += '/'
85
86	for site_index, file_list in enumerate(files_per_site):
87
88	ncrcat_command = "ncrcat"
89
90	if overwrite_file:
91	ncrcat_command += " -O"
92
93	for file_name in file_list:
94	ncrcat_command += " " + file_name
95
96	# Check if output file already exists
97	output_file = output_directory + sites[site_index]
98	if not overwrite_file and os.path.isfile(output_file):
99	for i in range(1000):
100	output_file = output_directory + sites[site_index] + "_{:03d}".format(i)
101	if not os.path.isfile(output_file):
102	break
103	elif i == 999:
104	raise IOError("could not guarantee non overwriting output file: {}".format(
105	output_file))
106	ncrcat_command += " " + output_file
107
108	print(ncrcat_command)
109	ncrcat_output = subprocess.run(ncrcat_command, shell=True, check=True)
110
111	return output_file
112
113
114	def truncate(input_file, time_index_shift=0, overwrite_file=False):
115	"""Truncate netCDF files via ncrcat.
116
117	Truncate all time dimensions of the input file and convert them to
118	record dimensions. The output is saved to 'input_file.trunc' or to
119	'input_file.trunc.nc' if the input_file has a '.nc' extension.
120	If "overwrite_file" is true, write output directly to input_file.
121	Shift the time index variables by time_index_shift.
122
123	Return values:
124	highest time index of time dimension in output file
125	output-file name
126	"""
127
128	# Gather information about time coordinate in file
129	ncfile = Dataset(input_file, "r")
130	time_dim = ncfile.dimensions["ntime"]
131	time_var = ncfile.variables["time"][:, :]
132	time_mask = ~np.ma.getmaskarray(time_var)
133	start_index = 0
134
135	soil = any([var == "time_soil" for var in ncfile.variables.keys()])
136
137	if np.any(time_mask is False):
138	end_ind = np.where(time_mask is False)
139	end_index = end_ind[0][0] - 1
140	cut = True
141	elif np.any(time_var == 0):
142	end_ind = np.where(time_var == 0)
143	end_index = end_ind[0][0] - 1
144	cut = True
145	else:
146	end_index = len(time_var[:][0])
147	cut = False
148
149	for att in ncfile.ncattrs():
150	if (att == "site"):
151	site = ncfile.getncattr(att)
152	if (att == "featureType"):
153	feat = ncfile.getncattr(att)
154
155	# if feat == "timeSeries":
156	# site = site + "_ts"
157	# if feat == "timeSeriesProfile":
158	# site = site + "_tspr"
159	# if feat == "trajectory":
160	# site = site + "_traj"
161	# print(cut)
162
163	# Compose nco commands
164	ncks_command = "ncks"
165
166	if overwrite_file:
167	ncks_command += " -O"
168	output_file = input_file
169	else:
170	# Add '.trunc' to file name before '.nc' file extension
171	output_file, file_extension = os.path.splitext(input_file)
172	if file_extension != '.nc':
173	output_file += file_extension + '.trunc'
174	else:
175	output_file += '.trunc' + file_extension
176	if os.path.isfile(output_file):
177	raise IOError("truncated file already exists: {}".format(output_file))
178
179	if cut:
180	# set dimension limits
181	ncks_command += " -d ntime,{0},{1}".format(start_index, end_index)
182	if soil:
183	ncks_command += " -d ntime_soil,{0},{1}".format(start_index, end_index)
184
185	# convert time into record dimension
186	time_is_limited = not time_dim.isunlimited()
187	if time_is_limited:
188	ncks_command += " --mk_rec_dmn"
189	ncks_command += " ntime"
190
191	if cut or time_is_limited:
192	# set input and output file
193	ncks_command += " {0} {1}".format(input_file, output_file)
194
195	# execute ncks
196	print(ncks_command)
197	ncks_output = subprocess.run(ncks_command, shell=True, check=True, stdout=subprocess.PIPE)
198
199	new_input_file = output_file
200
201	else:
202	new_input_file = input_file
203
204	# If soil is present, also convert soil time to record dimension
205	# (must be done separately due to NCO limitations)
206	if soil:
207	soil_time_is_limited = not ncfile.dimensions["ntime_soil"].isunlimited()
208	if soil_time_is_limited:
209
210	ncks_command = "ncks -O --mk_rec_dmn ntime_soil {0} {1}".format(
211	new_input_file, output_file)
212	print(ncks_command)
213	ncks_output = subprocess.run(
214	ncks_command, shell=True, check=True, stdout=subprocess.PIPE)
215
216	new_input_file = output_file
217
218	# Add time shift to ntime variables
219	if time_index_shift != 0:
220	ncap2_command = "ncap2 -O -s 'ntime=ntime+{}' ".format(time_index_shift)
221	if soil:
222	ncap2_command += " -s 'ntime_soil=ntime_soil+{}' ".format(time_index_shift)
223
224	ncap2_command += " {0} {1}".format(new_input_file, output_file)
225
226	print(ncap2_command)
227	ncap2_output = subprocess.run(ncap2_command, shell=True, check=True)
228
229	end_index += time_index_shift
230	new_input_file = output_file
231
232	return output_file, end_index
233
234
235	def main(base_input_directory, output_directory, overwrite_file=False):
236
237	if base_input_directory[-1] != '/':
238	base_input_directory += '/'
239
240	if output_directory[-1] != '/':
241	output_directory += '/'
242
243	# Get directory list
244	input_directory_list = [
245	base_input_directory + directory + '/' for directory in
246	sorted(os.listdir(base_input_directory))]
247
248	# Obtain list of sites that need to be processed
249	sites = sorted(os.listdir(input_directory_list[0]))
250
251	files_per_site_and_directory = [[None] * len(input_directory_list) for i in range(len(sites))]
252
253	# Truncate each file and save end index of time dimension
254	for site_index, site_name in enumerate(sites):
255
256	start_index = 0
257	for dir_index, directory in enumerate(input_directory_list):
258	files_per_site_and_directory[site_index][dir_index], end_index = \
259	truncate(directory + site_name, start_index, overwrite_file)
260	start_index = end_index
261
262	# Concatenate all files
263	file_concatenated = concatenate(
264	files_per_site_and_directory, sites, output_directory, overwrite_file)
265
266
267	if __name__ == '__main__':
268	parser = argparse.ArgumentParser(
269	description='Merge virtual measurement output from multiple PALM run cycles',
270	formatter_class=argparse.ArgumentDefaultsHelpFormatter)
271	parser.add_argument(
272	'input',
273	metavar='IN',
274	help='PALM output directory containing virtual measurements')
275	parser.add_argument(
276	'--out',
277	'-o',
278	metavar='OUT',
279	default='./merge',
280	help='Output directory to store merged data')
281	parser.add_argument(
282	'--overwrite',
283	action='store_true',
284	help='Overwrite input files with output files')
285
286	args = parser.parse_args()
287
288	main(args.input, output_directory=args.out, overwrite_file=args.overwrite)

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: palm/trunk/SCRIPTS/postprocess_vm_measurements.py @ 4882

Download in other formats: