Changeset 4879
- Timestamp:
- Feb 18, 2021 11:15:29 AM (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SCRIPTS/postprocess_vm_measurements.py
r4854 r4879 1 1 #!/usr/bin/env python3 2 # -*- coding: utf-8 -*- 3 # 4 #--------------------------------------------------------------------------------# 2 # --------------------------------------------------------------------------------# 5 3 # This file is part of the PALM model system. 6 4 # … … 17 15 # 18 16 # Copyright 1997-2021 Leibniz Universitaet Hannover 19 # --------------------------------------------------------------------------------#17 # --------------------------------------------------------------------------------# 20 18 # 21 19 # Current revisions: … … 26 24 # ----------------- 27 25 # $Id: postprocess_vm_measurements.py 4853 2021-01-15 15:22:11Z suehring # 26 # extensive re-work of postprocess_vm_measurements.py: 27 # - bugfix: convert atmosphere and soil time to record dimension 28 # - bugfix: make overwrite optional 29 # - reduce complexity 30 # - remove unnecessary parts 31 # - variable renaming 32 # - code restructuring to follow coding standard 33 # 34 # 4853 2021-01-15 15:22:11Z suehring 28 35 # Initial revision 29 36 # 30 # 31 # 32 # 33 #--------------------------------------------------------------------------------# 34 # 35 36 # 37 # --------------------------------------------------------------------------------# 37 38 # Description: 38 39 # ------------ 39 # Postprocessing tool to merge output of virtual measurements into site-specific 40 # files. 41 # 42 # Usage: 43 #------- 44 """Merge virtual measurement output: removes empty timestamps from the netcdf 45 files and concatenates files from several restart files into one file- 40 """Merge virtual measurement output. 41 42 Removes empty time stamps from the netCDF files and concatenates files 43 from several restart files into one file. 46 44 47 45 Example: … … 50 48 """ 51 49 # 52 # @Authors Matthias S uehring (suehring@muk.uni-hannover.de)50 # @Authors Matthias SÃŒhring (suehring@muk.uni-hannover.de) 53 51 # Tobias Gronemeier (gronemeier@muk.uni-hannover.de) 54 # 55 #--------------------------------------------------------------------------------# 56 # 52 # --------------------------------------------------------------------------------# 57 53 58 54 … … 74 70 + 'python -m pip install --user netCDF4\nto install it.') 75 71 76 # - - - - - - - - - - - - - - - 77 78 79 def concatenate(file_list, inds, sites, out_dir, override=True): 72 73 def concatenate(files_per_site, sites, output_directory, overwrite_file=False): 80 74 """Concatenate netCDF files via ncrcat. 81 75 … … 84 78 """ 85 79 86 if not os.path.isdir(out_dir): 87 mkdir = os.mkdir(out_dir) 88 89 nco_command = ["ncrcat"] 90 counter_file = 0 91 for files in file_list: 92 93 ncrcat_command = "ncrcat -O " 94 counter_dir = 0 95 for single_file in files: 96 if counter_dir != 0: 97 ncfile = Dataset(single_file, "r") 98 99 soil = False 100 for var in ncfile.variables.keys(): 101 if var == "time_soil": 102 soil = True 103 104 nco_command = "ncap2 -O -s 'ntime=ntime+{}' ".format( 105 inds[counter_file][counter_dir]) 106 if soil is True: 107 nco_command += " -s 'ntime_soil=ntime_soil+{}' ".format( 108 inds[counter_file][counter_dir]) 109 110 nco_command += " " + single_file + " " + single_file 111 print(nco_command) 112 nco_output = subprocess.run(nco_command, shell=True, check=True) 113 114 # if counter_dir == 0: 115 # cp = os.system("cp " + single_file + " " + out_dir + "/" + sites[counter_file]) 116 # print("cp " + single_file + " " + out_dir + "/" + sites[counter_file]) 117 # ncrcat_command += "/" + out_dir + "/" + sites[counter_file] + " " 118 # else: 119 # ncrcat_command += single_file + " " 120 ncrcat_command += single_file + " " 121 counter_dir += 1 122 123 ncrcat_command += out_dir + "/" 124 if os.path.isfile(out_dir + "/" + sites[counter_file]): 125 start = sites[counter_file].find('site') 126 end = start + 7 127 128 string_dum = sites[counter_file] 129 outfile = sites[counter_file] + "_" + string_dum[start:end] 130 print(string_dum[start:end]) 131 else: 132 outfile = sites[counter_file] 133 134 ncrcat_command += outfile 80 if not os.path.isdir(output_directory): 81 mkdir = os.mkdir(output_directory) 82 83 if output_directory[-1] != '/': 84 output_directory += '/' 85 86 for site_index, file_list in enumerate(files_per_site): 87 88 ncrcat_command = "ncrcat" 89 90 if overwrite_file: 91 ncrcat_command += " -O" 92 93 for file_name in file_list: 94 ncrcat_command += " " + file_name 95 96 # Check if output file already exists 97 output_file = output_directory + sites[site_index] 98 if not overwrite_file and os.path.isfile(output_file): 99 for i in range(1000): 100 output_file = output_directory + sites[site_index] + "_{:03d}".format(i) 101 if not os.path.isfile(output_file): 102 break 103 elif i == 999: 104 raise IOError("could not guarantee non overwriting output file: {}".format( 105 output_file)) 106 ncrcat_command += " " + output_file 107 135 108 print(ncrcat_command) 136 109 ncrcat_output = subprocess.run(ncrcat_command, shell=True, check=True) 137 counter_file += 1 138 139 # nco_output = subprocess.run(nco_command, shell=True, check=True, stdout=subprocess.PIPE) 140 141 return nco_command 142 143 # - - - - - - - - - - - - - - - 144 145 146 def truncate(file_process, override=True): 147 148 # print("file " + file_process) 110 111 return output_file 112 113 114 def truncate(input_file, time_index_shift=0, overwrite_file=False): 115 """Truncate netCDF files via ncrcat. 116 117 Truncate all time dimensions of the input file and convert them to 118 record dimensions. The output is saved to 'input_file.trunc' or to 119 'input_file.trunc.nc' if the input_file has a '.nc' extension. 120 If "overwrite_file" is true, write output directly to input_file. 121 Shift the time index variables by time_index_shift. 122 123 Return values: 124 highest time index of time dimension in output file 125 output-file name 126 """ 127 149 128 # Gather information about time coordinate in file 150 ncfile = Dataset( file_process, "r")129 ncfile = Dataset(input_file, "r") 151 130 time_dim = ncfile.dimensions["ntime"] 152 131 time_var = ncfile.variables["time"][:, :] … … 154 133 start_index = 0 155 134 156 soil = False 157 for var in ncfile.variables.keys(): 158 if var == "time_soil": 159 soil = True 135 soil = any([var == "time_soil" for var in ncfile.variables.keys()]) 160 136 161 137 if np.any(time_mask is False): … … 184 160 # site = site + "_traj" 185 161 # print(cut) 186 ncks_output = [] 162 163 # Compose nco commands 164 ncks_command = "ncks" 165 166 if overwrite_file: 167 ncks_command += " -O" 168 output_file = input_file 169 else: 170 # Add '.trunc' to file name before '.nc' file extension 171 output_file, file_extension = os.path.splitext(input_file) 172 if file_extension != '.nc': 173 output_file += file_extension + '.trunc' 174 else: 175 output_file += '.trunc' + file_extension 176 if os.path.isfile(output_file): 177 raise IOError("truncated file already exists: {}".format(output_file)) 178 187 179 if cut: 188 # Compose ncks command 189 ncks_command = "ncks -O -d ntime,{0},{1}".format(start_index, end_index) 190 if not time_dim.isunlimited(): 191 ncks_command += " --mk_rec_dmn" 192 ncks_command += " ntime" 193 if soil is True: 180 # set dimension limits 181 ncks_command += " -d ntime,{0},{1}".format(start_index, end_index) 182 if soil: 194 183 ncks_command += " -d ntime_soil,{0},{1}".format(start_index, end_index) 195 ncks_command += " --mk_rec_dmn" 196 ncks_command += " ntime_soil" 197 198 ncks_command += " " + file_process + " " + file_process 199 200 # Cut time levels using ncks 184 185 # convert time into record dimension 186 time_is_limited = not time_dim.isunlimited() 187 if time_is_limited: 188 ncks_command += " --mk_rec_dmn" 189 ncks_command += " ntime" 190 191 if cut or time_is_limited: 192 # set input and output file 193 ncks_command += " {0} {1}".format(input_file, output_file) 194 195 # execute ncks 201 196 print(ncks_command) 202 197 ncks_output = subprocess.run(ncks_command, shell=True, check=True, stdout=subprocess.PIPE) 198 199 new_input_file = output_file 200 203 201 else: 204 end_index = len(time_var[:][0]) 205 206 return end_index, site 207 208 # - - - - - - - - - - - - - - - 209 210 211 def main(path_data, output_directory, override=True): 212 213 # Get current working directory 214 work_dir = os.getcwd() 215 216 if path_data[-1] != '/': 217 path_data += '/' 202 new_input_file = input_file 203 204 # If soil is present, also convert soil time to record dimension 205 # (must be done separately due to NCO limitations) 206 if soil: 207 soil_time_is_limited = not ncfile.dimensions["ntime_soil"].isunlimited() 208 if soil_time_is_limited: 209 210 ncks_command = "ncks -O --mk_rec_dmn ntime_soil {0} {1}".format( 211 new_input_file, output_file) 212 print(ncks_command) 213 ncks_output = subprocess.run( 214 ncks_command, shell=True, check=True, stdout=subprocess.PIPE) 215 216 new_input_file = output_file 217 218 # Add time shift to ntime variables 219 if time_index_shift != 0: 220 ncap2_command = "ncap2 -O -s 'ntime=ntime+{}' ".format(time_index_shift) 221 if soil: 222 ncap2_command += " -s 'ntime_soil=ntime_soil+{}' ".format(time_index_shift) 223 224 ncap2_command += " {0} {1}".format(new_input_file, output_file) 225 226 print(ncap2_command) 227 ncap2_output = subprocess.run(ncap2_command, shell=True, check=True) 228 229 end_index += time_index_shift 230 new_input_file = output_file 231 232 return output_file, end_index 233 234 235 def main(base_input_directory, output_directory, overwrite_file=False): 236 237 if base_input_directory[-1] != '/': 238 base_input_directory += '/' 239 240 if output_directory[-1] != '/': 241 output_directory += '/' 218 242 219 243 # Get directory list 220 list_output_dirs = [path_data + directory + '/' for directory in sorted(os.listdir(path_data))] 221 filelist = [] 222 output_file_list = [] 223 counter = 0 244 input_directory_list = [ 245 base_input_directory + directory + '/' for directory in 246 sorted(os.listdir(base_input_directory))] 224 247 225 248 # Obtain list of sites that need to be processed 226 for directory in list_output_dirs: 227 # Get file list 228 file_list = sorted(os.listdir(directory)) 229 for filename in file_list: 230 if counter == 0: 231 output_file_list.append(filename) 232 counter += 1 233 234 start_inds = [[0] * len(list_output_dirs) for i in range(len(output_file_list))] 235 end_inds = [[0] * len(list_output_dirs) for i in range(len(output_file_list))] 236 237 input_files = [[None] * len(list_output_dirs) for i in range(len(output_file_list))] 238 sites = [None] * len(output_file_list) 239 240 counter_file = 0 241 for filename in output_file_list: 242 243 counter_dir = 0 244 for directory in list_output_dirs: 245 file_process = directory + filename 246 end_ind, sites[counter_file] = truncate(file_process, override) 247 sites[counter_file] = filename 248 249 if not counter_dir == 0: 250 start_inds[counter_file][counter_dir] = end_inds[counter_file][counter_dir - 1] 251 252 end_inds[counter_file][counter_dir] = start_inds[counter_file][counter_dir] + end_ind 253 254 input_files[counter_file][counter_dir] = file_process 255 counter_dir += 1 256 257 counter_file += 1 249 sites = sorted(os.listdir(input_directory_list[0])) 250 251 files_per_site_and_directory = [[None] * len(input_directory_list) for i in range(len(sites))] 252 253 # Truncate each file and save end index of time dimension 254 for site_index, site_name in enumerate(sites): 255 256 start_index = 0 257 for dir_index, directory in enumerate(input_directory_list): 258 files_per_site_and_directory[site_index][dir_index], end_index = \ 259 truncate(directory + site_name, start_index, overwrite_file) 260 start_index = end_index 258 261 259 262 # Concatenate all files 260 outfile = concatenate(input_files, start_inds, sites, output_directory, override) 261 262 263 # - - - - - - - - - - - - - - - 263 file_concatenated = concatenate( 264 files_per_site_and_directory, sites, output_directory, overwrite_file) 265 264 266 265 267 if __name__ == '__main__': 266 268 parser = argparse.ArgumentParser( 267 description='Merge virtual measurement output from multiple PALM run cycles', 268 formatter_class=argparse.ArgumentDefaultsHelpFormatter) 269 parser.add_argument('input', metavar='IN', 270 help='PALM output directory containing virtual measurements') 271 parser.add_argument('--out', '-o', metavar='OUT', nargs=1, default='./merge', 272 help='Output directory to store merged data') 269 description='Merge virtual measurement output from multiple PALM run cycles', 270 formatter_class=argparse.ArgumentDefaultsHelpFormatter) 271 parser.add_argument( 272 'input', 273 metavar='IN', 274 help='PALM output directory containing virtual measurements') 275 parser.add_argument( 276 '--out', 277 '-o', 278 metavar='OUT', 279 default='./merge', 280 help='Output directory to store merged data') 281 parser.add_argument( 282 '--overwrite', 283 action='store_true', 284 help='Overwrite input files with output files') 273 285 274 286 args = parser.parse_args() 275 path_data = args.input 276 output_directory = args.out 277 278 main(path_data, output_directory=output_directory, override=True) 287 288 main(args.input, output_directory=args.out, overwrite_file=args.overwrite)
Note: See TracChangeset
for help on using the changeset viewer.