Source code for ecmwf_models.erainterim.download

# -*- coding: utf-8 -*-

"""
Module to download ERA Interim from terminal.
"""

from ecmwfapi import ECMWFDataServer
import argparse
import sys
from datetime import datetime, timedelta
import shutil
import os
import warnings

from ecmwf_models.utils import (
    load_var_table,
    save_ncs_from_nc,
    save_gribs_from_grib,
    lookup,
    mkdate,
    str2bool
)


[docs]def default_variables() -> list: "These variables are being downloaded, when None are passed by the user" lut = load_var_table(name="ERAINT") defaults = lut.loc[lut["default"] == 1]["dl_name"].values return defaults.tolist()
[docs]def download_eraint( target_path, start, end, variables, grid_size=None, type="fc", h_steps=(0, 6, 12, 18), grb=False, dry_run=False, steps=(0,), ): """ Download era interim data Parameters ---------- target_path : str path at which to save the downloaded grib file start : date start date end : date end date variables : list parameter ids, see wiki product : str, optional Name of the model, "ERA-interim" (default) or "ERA5" grid_size: [float,float], optional size of the grid in form (lon, lat), which the data is resampled to If None is passed the minimum grid for the accoring product is chosen h_steps: tuple, optional (default: (0, 6, 12, 18)) List of full hours to download data at the selected dates grb: bool, optional (default: False) Download data as grb files instead of nc files dry_run: bool Do not download anything, this is just used for testing the functions """ if dry_run: warnings.warn("Dry run does not create connection to ECMWF") server = None else: server = ECMWFDataServer() param_strings = [] dataset = "interim" dataclass = "ei" for variable in variables: param_strings.append(str(variable)) timestep_strings = [] for timestep in h_steps: timestep_strings.append("%02d" % timestep) param_string = "/".join(param_strings) timestep_string = "/".join(timestep_strings) date_string = "%s/to/%s" % ( start.strftime("%Y-%m-%d"), end.strftime("%Y-%m-%d"), ) grid_size = "%f/%f" % (grid_size[0], grid_size[1]) if grid_size else None step = "/".join([str(s) for s in steps]) # ATTENTION: When downloading netcdf files steps and times # must not overlap!! see: # https://software.ecmwf.int/wiki/display/CKB/What+to+do+with+ECCODES+ERROR+%3A+Try+using+the+-T+option # noqa: E501 dl_params = { "class": dataclass, "dataset": dataset, "expver": "1", "stream": "oper", "type": type, "levtype": "sfc", "param": param_string, "date": date_string, "time": timestep_string, "step": step, "grid": grid_size, "format": "grib1" if grb else "netcdf", "target": target_path, } if not grid_size: if not grb: grid_size = "%f/%f" % (0.75, 0.75) dl_params["grid"] = grid_size else: del dl_params["grid"] else: if any(size < 0.75 for size in grid_size): raise Warning( "Custom grid smaller than original ERA Interim resolution. " "See https://software.ecmwf.int/wiki/display/CKB/" "Does+downloading+data+at+higher+resolution+improve+the+output" # noqa: E501 ) if not dry_run: server.retrieve(dl_params)
[docs]def download_and_move( target_path, startdate, enddate, variables=None, keep_original=False, grid_size=None, type="an", h_steps=(0, 6, 12, 18), steps=(0,), grb=False, dry_run=False, ): """ Downloads the data from the ECMWF servers and moves them to the target path. This is done in 30 days increments between start and end date to be efficient with the MARS system. See the recommendation for doing it this way in https://software.ecmwf.int/wiki/display/WEBAPI/ERA-Interim+daily+retrieval+efficiency The files are then extracted into separate grib/nc files and stored in yearly folders under the target_path. Parameters ---------- target_path: str Path to which to copy the extracted parameter files startdate: datetime First date to download enddate: datetime Last date to download variables : list, optional (default: None) List of variable ids to pass to the client, if None are passed, the default variable ids will be downloaded. keep_original: bool, optional (default: False) Keep the original downloaded data grid_size: list, optional (default: None) [lon, lat] extent of the grid (regular for netcdf, at lat=0 for grib) If None is passed, the default grid size for the data product is used. type : str, optional (default: 'an') Data stream, model to download data for (fc=forecase) h_steps: list, optional (default: [0, 6, 12, 18]) List of full hours to download data at the selected dates grb: bool, optional (default: False) Download data as grib files instead of netcdf files dry_run: bool Do not download anything, this is just used for testing the functions """ product = "eraint" if variables is None: variables = default_variables() else: # find the dl_names variables = lookup(name=product, variables=variables) variables = variables["dl_name"].values.tolist() td = timedelta(days=30) current_start = startdate while current_start <= enddate: current_end = current_start + td if current_end >= enddate: current_end = enddate fname = "{start}_{end}.{ext}".format( start=current_start.strftime("%Y%m%d"), end=current_end.strftime("%Y%m%d"), ext="grb" if grb else "nc", ) downloaded_data_path = os.path.join(target_path, "temp_downloaded") if not os.path.exists(downloaded_data_path): os.mkdir(downloaded_data_path) dl_file = os.path.join(downloaded_data_path, fname) download_eraint( dl_file, current_start, current_end, variables, grid_size=grid_size, h_steps=h_steps, type=type, steps=steps, grb=grb, dry_run=dry_run, ) if grb: save_gribs_from_grib(dl_file, target_path, product.upper()) else: save_ncs_from_nc(dl_file, target_path, product.upper()) if not keep_original: shutil.rmtree(downloaded_data_path) current_start = current_end + timedelta(days=1)
[docs]def parse_args(args): """ Parse command line parameters for recursive download Parameters ---------- args : list Command line parameters as list of strings Returns ---------- clparams : argparse.Namespace Parsed command line parameters """ parser = argparse.ArgumentParser( description="Download ERA Interim data (6H) between two dates. " "Before this program can be used, you have to register at ECMWF " "and setup your .ecmwfapirc file as described here: " "https://confluence.ecmwf.int//display/WEBAPI/Access+ECMWF+Public+Datasets#AccessECMWFPublicDatasets-key" # noqa: E501 ) parser.add_argument( "localroot", help="Root of local filesystem where the downloaded data is stored.", ) parser.add_argument( "-s", "--start", type=mkdate, default=datetime(1979, 1, 1), help=( "Startdate in format YYYY-MM-DD. " "If no data is found there then the first available date of " "the product is used." ), ) parser.add_argument( "-e", "--end", type=mkdate, default=datetime.now(), help=( "Enddate in format YYYY-MM-DD. " "If not given then the current date is used." ), ) parser.add_argument( "-var", "--variables", metavar="variables", type=str, default=None, nargs="+", help=( "Name of variables to download. " "A list of possible IDs is available at " "https://github.com/TUW-GEO/ecmwf_models/tree/master/ecmwf_models/erainterim/eraint_lut.csv " # noqa: E501 "or by using the 'View MARS request' option in the web based " "ordering system." ), ) parser.add_argument( "-keep", "--keep_original", type=str2bool, default="False", help=( "Keep the originally, temporally downloaded file as it is " "instead of deleting it afterwards" ), ) parser.add_argument( "-grb", "--as_grib", type=str2bool, default="False", help=( "Download data in grib1 format instead of the default " "netcdf format" ), ) parser.add_argument( "--h_steps", type=int, default=None, nargs="+", help=("Manually change the temporal resolution of donwloaded images"), ) parser.add_argument( "--steps", type=int, default=None, nargs="+", help=("Manually change the steps"), ) parser.add_argument( "--type", type=str, default="an", help=("Manually set the data stream, e.g. 'an' (default) or 'fc'"), ) parser.add_argument( "--grid_size", type=float, default=None, nargs="+", help=( "lon lat. Size of the grid that the data is stored to. " "Should be at least (and is by default) " "(0.75, 0.75) for ERA-Interim " ), ) args = parser.parse_args(args) print("ERA Interim data is deprecated. Use ERA5 instead.") print( "Downloading ERA Interim {} data from {} to {} into folder {}".format( "grib" if args.as_grib is True else "netcdf", args.start.isoformat(), args.end.isoformat(), args.localroot, ) ) return args
[docs]def main(args): args = parse_args(args) download_and_move( target_path=args.localroot, startdate=args.start, enddate=args.end, variables=args.variables, keep_original=args.keep_original, grid_size=args.grid_size, h_steps=args.h_steps, type=args.type, grb=args.as_grib, )
[docs]def run(): main(sys.argv[1:])