Source code for ecmwf_models.erainterim.download

# -*- coding: utf-8 -*-

"""
Module to download ERA Interim from terminal.
"""

from ecmwfapi import ECMWFDataServer
import argparse
import sys
from datetime import datetime, timedelta
import shutil
import os
import warnings

from ecmwf_models.utils import (
    load_var_table,
    save_ncs_from_nc,
    save_gribs_from_grib,
    lookup,
    mkdate,
    str2bool
)


[docs]def default_variables() -> list:
    "These variables are being downloaded, when None are passed by the user"
    lut = load_var_table(name="ERAINT")
    defaults = lut.loc[lut["default"] == 1]["dl_name"].values
    return defaults.tolist()


[docs]def download_eraint(
    target_path,
    start,
    end,
    variables,
    grid_size=None,
    type="fc",
    h_steps=(0, 6, 12, 18),
    grb=False,
    dry_run=False,
    steps=(0,),
):
    """
    Download era interim data

    Parameters
    ----------
    target_path : str
        path at which to save the downloaded grib file
    start : date
        start date
    end : date
        end date
    variables : list
        parameter ids, see wiki
    product : str, optional
        Name of the model, "ERA-interim" (default) or "ERA5"
    grid_size: [float,float], optional
        size of the grid in form (lon, lat), which the data is resampled to
        If None is passed the minimum grid for the accoring product is chosen
    h_steps: tuple, optional (default: (0, 6, 12, 18))
        List of full hours to download data at the selected dates
    grb: bool, optional (default: False)
        Download data as grb files instead of nc files
    dry_run: bool
        Do not download anything, this is just used for testing the functions
    """
    if dry_run:
        warnings.warn("Dry run does not create connection to ECMWF")
        server = None
    else:
        server = ECMWFDataServer()

    param_strings = []

    dataset = "interim"
    dataclass = "ei"

    for variable in variables:
        param_strings.append(str(variable))

    timestep_strings = []
    for timestep in h_steps:
        timestep_strings.append("%02d" % timestep)

    param_string = "/".join(param_strings)
    timestep_string = "/".join(timestep_strings)
    date_string = "%s/to/%s" % (
        start.strftime("%Y-%m-%d"),
        end.strftime("%Y-%m-%d"),
    )

    grid_size = "%f/%f" % (grid_size[0], grid_size[1]) if grid_size else None

    step = "/".join([str(s) for s in steps])
    # ATTENTION: When downloading netcdf files steps and times
    # must not overlap!! see:
    # https://software.ecmwf.int/wiki/display/CKB/What+to+do+with+ECCODES+ERROR+%3A+Try+using+the+-T+option  # noqa: E501

    dl_params = {
        "class": dataclass,
        "dataset": dataset,
        "expver": "1",
        "stream": "oper",
        "type": type,
        "levtype": "sfc",
        "param": param_string,
        "date": date_string,
        "time": timestep_string,
        "step": step,
        "grid": grid_size,
        "format": "grib1" if grb else "netcdf",
        "target": target_path,
    }

    if not grid_size:
        if not grb:
            grid_size = "%f/%f" % (0.75, 0.75)
            dl_params["grid"] = grid_size
        else:
            del dl_params["grid"]
    else:
        if any(size < 0.75 for size in grid_size):
            raise Warning(
                "Custom grid smaller than original ERA Interim resolution. "
                "See https://software.ecmwf.int/wiki/display/CKB/"
                "Does+downloading+data+at+higher+resolution+improve+the+output"  # noqa: E501
            )
    if not dry_run:
        server.retrieve(dl_params)


[docs]def download_and_move(
    target_path,
    startdate,
    enddate,
    variables=None,
    keep_original=False,
    grid_size=None,
    type="an",
    h_steps=(0, 6, 12, 18),
    steps=(0,),
    grb=False,
    dry_run=False,
):
    """
    Downloads the data from the ECMWF servers and moves them to the target
    path. This is done in 30 days increments between start and end date to
    be efficient with the MARS system.
    See the recommendation for doing it this way in
    https://software.ecmwf.int/wiki/display/WEBAPI/ERA-Interim+daily+retrieval+efficiency

    The files are then extracted into separate grib/nc files and stored in
    yearly folders under the target_path.

    Parameters
    ----------
    target_path: str
        Path to which to copy the extracted parameter files
    startdate: datetime
        First date to download
    enddate: datetime
        Last date to download
    variables : list, optional (default: None)
        List of variable ids to pass to the client, if None are passed,
        the default variable ids will be downloaded.
    keep_original: bool, optional (default: False)
        Keep the original downloaded data
    grid_size: list, optional (default: None)
        [lon, lat] extent of the grid (regular for netcdf, at lat=0 for grib)
        If None is passed, the default grid size for the data product is used.
    type : str, optional (default: 'an')
        Data stream, model to download data for (fc=forecase)
    h_steps: list, optional (default: [0, 6, 12, 18])
        List of full hours to download data at the selected dates
    grb: bool, optional (default: False)
        Download data as grib files instead of netcdf files
    dry_run: bool
        Do not download anything, this is just used for testing the functions
    """
    product = "eraint"
    if variables is None:
        variables = default_variables()
    else:
        # find the dl_names
        variables = lookup(name=product, variables=variables)
        variables = variables["dl_name"].values.tolist()

    td = timedelta(days=30)
    current_start = startdate

    while current_start <= enddate:
        current_end = current_start + td
        if current_end >= enddate:
            current_end = enddate

        fname = "{start}_{end}.{ext}".format(
            start=current_start.strftime("%Y%m%d"),
            end=current_end.strftime("%Y%m%d"),
            ext="grb" if grb else "nc",
        )

        downloaded_data_path = os.path.join(target_path, "temp_downloaded")
        if not os.path.exists(downloaded_data_path):
            os.mkdir(downloaded_data_path)

        dl_file = os.path.join(downloaded_data_path, fname)

        download_eraint(
            dl_file,
            current_start,
            current_end,
            variables,
            grid_size=grid_size,
            h_steps=h_steps,
            type=type,
            steps=steps,
            grb=grb,
            dry_run=dry_run,
        )

        if grb:
            save_gribs_from_grib(dl_file, target_path, product.upper())
        else:
            save_ncs_from_nc(dl_file, target_path, product.upper())

        if not keep_original:
            shutil.rmtree(downloaded_data_path)
        current_start = current_end + timedelta(days=1)


[docs]def parse_args(args):
    """
    Parse command line parameters for recursive download

    Parameters
    ----------
    args : list
        Command line parameters as list of strings

    Returns
    ----------
    clparams : argparse.Namespace
        Parsed command line parameters
    """

    parser = argparse.ArgumentParser(
        description="Download ERA Interim data (6H) between two dates. "
        "Before this program can be used, you have to register at ECMWF "
        "and setup your .ecmwfapirc file as described here: "
        "https://confluence.ecmwf.int//display/WEBAPI/Access+ECMWF+Public+Datasets#AccessECMWFPublicDatasets-key"  # noqa: E501
    )
    parser.add_argument(
        "localroot",
        help="Root of local filesystem where the downloaded data is stored.",
    )
    parser.add_argument(
        "-s",
        "--start",
        type=mkdate,
        default=datetime(1979, 1, 1),
        help=(
            "Startdate in format YYYY-MM-DD. "
            "If no data is found there then the first available date of "
            "the product is used."
        ),
    )
    parser.add_argument(
        "-e",
        "--end",
        type=mkdate,
        default=datetime.now(),
        help=(
            "Enddate in format YYYY-MM-DD. "
            "If not given then the current date is used."
        ),
    )
    parser.add_argument(
        "-var",
        "--variables",
        metavar="variables",
        type=str,
        default=None,
        nargs="+",
        help=(
            "Name of variables to download. "
            "A list of possible IDs is available at "
            "https://github.com/TUW-GEO/ecmwf_models/tree/master/ecmwf_models/erainterim/eraint_lut.csv "  # noqa: E501
            "or by using the 'View MARS request' option in the web based "
            "ordering system."
        ),
    )
    parser.add_argument(
        "-keep",
        "--keep_original",
        type=str2bool,
        default="False",
        help=(
            "Keep the originally, temporally downloaded file as it is "
            "instead of deleting it afterwards"
        ),
    )
    parser.add_argument(
        "-grb",
        "--as_grib",
        type=str2bool,
        default="False",
        help=(
            "Download data in grib1 format instead of the default "
            "netcdf format"
        ),
    )
    parser.add_argument(
        "--h_steps",
        type=int,
        default=None,
        nargs="+",
        help=("Manually change the temporal resolution of donwloaded images"),
    )
    parser.add_argument(
        "--steps",
        type=int,
        default=None,
        nargs="+",
        help=("Manually change the steps"),
    )
    parser.add_argument(
        "--type",
        type=str,
        default="an",
        help=("Manually set the data stream, e.g. 'an' (default) or 'fc'"),
    )
    parser.add_argument(
        "--grid_size",
        type=float,
        default=None,
        nargs="+",
        help=(
            "lon lat. Size of the grid that the data is stored to. "
            "Should be at least (and is by default) "
            "(0.75, 0.75) for ERA-Interim "
        ),
    )

    args = parser.parse_args(args)

    print("ERA Interim data is deprecated. Use ERA5 instead.")
    print(
        "Downloading ERA Interim {} data from {} to {} into folder {}".format(
            "grib" if args.as_grib is True else "netcdf",
            args.start.isoformat(),
            args.end.isoformat(),
            args.localroot,
        )
    )

    return args


[docs]def main(args):
    args = parse_args(args)

    download_and_move(
        target_path=args.localroot,
        startdate=args.start,
        enddate=args.end,
        variables=args.variables,
        keep_original=args.keep_original,
        grid_size=args.grid_size,
        h_steps=args.h_steps,
        type=args.type,
        grb=args.as_grib,
    )


[docs]def run():
    main(sys.argv[1:])