Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 8c0c9a68 authored by Gildas Cambon's avatar Gildas Cambon
Browse files

Add ERA5_parallel_request.py:

Parallelization of the ERA5 request to speed-up the process
parent 81458237
No related branches found
No related tags found
1 merge request!7First commits for the dev_newcopernicus branch
#!/usr/bin/env python
# Script to download ECMWF ERA5 reanalysis datasets from the Climate Data
# Store (CDS) of Copernicus https://cds.climate.copernicus.eu
#
# This script use the CDS Phyton API[*] to connect and download specific ERA5
# variables, for a chosen area and monthly date interval, required by CROCO to
# perform simulations with atmospheric forcing. Furthermore, this script use
# ERA5 parameter names and not parameter IDs as these did not result in stable
# downloads.
#
# Tested using Python 3.8.6 and Python 3.9.1. This script need the following
# python libraries pre-installed: "calendar", "datetime", "json" and "os".
#
# [*] https://cds.climate.copernicus.eu/api-how-to
#
# Copyright (c) DDONOSO February 2021
# e-mail:ddonoso@dgeo.udec.cl
#
# You may see all available ERA5 variables at the following website
# https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation#ERA5:datadocumentation-Parameterlistings
# -------------------------------------------------
# Getting libraries and utilities
# -------------------------------------------------
import cdsapi
from ERA5_utilities import *
import calendar
import datetime
import json
import os
from multiprocessing import Pool
# Importing addmonths4date function from ERA5_utilities
from ERA5_utilities import addmonths4date
# Function to download data for a single variable
def download_data(variable, options, product, output):
c = cdsapi.Client()
c.retrieve(product, options, output)
# Main function to process tasks in parallel
def process_parallel(tasks):
with Pool() as pool:
pool.starmap(download_data, tasks)
# -------------------------------------------------
# Import my crocotools_param_python file
from era5_crocotools_param import *
print('year_start is '+str(year_start))
# -------------------------------------------------
dl=2
if ownArea == 0:
lines = [line.rstrip('\n') for line in open(paramFile)]
for line in lines:
if "lonmin" in line:
iStart=line.find('=')+1
iEnd=line.find(';')
lonmin = line[iStart:iEnd]
elif "lonmax" in line:
iStart=line.find('=')+1
iEnd=line.find(';')
lonmax = line[iStart:iEnd]
elif "latmin" in line:
iStart=line.find('=')+1
iEnd=line.find(';')
latmin = line[iStart:iEnd]
elif "latmax" in line:
iStart=line.find('=')+1
iEnd=line.find(';')
latmax = line[iStart:iEnd]
lonmin = str(float(lonmin)-dl)
lonmax = str(float(lonmax)+dl)
latmin = str(float(latmin)-dl)
latmax = str(float(latmax)+dl)
print ('lonmin-dl = ', lonmin)
print ('lonmax+dl =', lonmax)
print ('latmin-dl =', latmin)
print ('latmax+dl =', latmax)
# -------------------------------------------------
area = [latmax, lonmin, latmin, lonmax]
# -------------------------------------------------
# Setting raw output directory
# -------------------------------------------------
# Get the current directory
os.makedirs(era5_dir_raw,exist_ok=True)
# -------------------------------------------------
# Loading ERA5 variables's information as
# python Dictionary from JSON file
# -------------------------------------------------
with open('ERA5_variables.json', 'r') as jf:
era5 = json.load(jf)
# -------------------------------------------------
# Downloading ERA5 datasets
# -------------------------------------------------
# Monthly dates limits
monthly_date_start = datetime.datetime(year_start,month_start,1)
monthly_date_end = datetime.datetime(year_end,month_end,1)
# Length of monthly dates loop
len_monthly_dates = (monthly_date_end.year - monthly_date_start.year) * 12 + \
(monthly_date_end.month - monthly_date_start.month) + 1
# Initial monthly date
monthly_date = monthly_date_start
# Construct tasks for parallel processing
tasks = []
# Monthly dates loop
for j in range(len_monthly_dates):
# Year and month
year = monthly_date.year
month = monthly_date.month
# Number of days in month
days_in_month = calendar.monthrange(year,month)[1]
# Date limits
date_start = datetime.datetime(year,month,1)
date_end = datetime.datetime(year,month,days_in_month)
# Ordinal date limits (days)
n_start = datetime.date.toordinal(date_start)
n_end = datetime.date.toordinal(date_end)
# Overlapping date string limits (yyyy-mm-dd)
datestr_start_overlap = datetime.date.fromordinal(n_start - n_overlap).strftime('%Y-%m-%d')
datestr_end_overlap = datetime.date.fromordinal(n_end + n_overlap).strftime('%Y-%m-%d')
# Overlapping date string interval
vdate = datestr_start_overlap + '/' + datestr_end_overlap
# Variables/Parameters loop
for k in range(len(variables)):
# Variable's name, long-name and level-type
vname = variables[k]
vlong = era5[vname][0]
vlevt = era5[vname][3]
# Request options
options = {
'product_type': 'reanalysis',
'type': 'an',
'date': vdate,
'variable': vlong,
'levtype': vlevt,
'area': area,
'format': 'netcdf',
}
if vlong == 'sea_surface_temperature':
options['time'] = '00'
elif vlong == 'land_sea_mask':
options['time'] = '00:00'
else:
options['time'] = time
if vlong == 'specific_humidity' or vlong == 'relative_humidity':
options['pressure_level'] = '1000'
product = 'reanalysis-era5-pressure-levels'
else:
product = 'reanalysis-era5-single-levels'
# Output filename
fname = 'ERA5_ecmwf_' + vname.upper() + '_Y' + str(year) + 'M' + str(month).zfill(2) + '.nc'
output = era5_dir_raw + '/' + fname
# Information strings
info_time_clock = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
info_monthly_date = monthly_date.strftime('%Y-%b')
info_n_overlap = ' with ' + str(n_overlap) + ' overlapping day(s) '
# Printing message on screen
print(' ')
print('-----------------------------------------------------------')
print('',info_time_clock,' ')
print(' Performing ERA5 data request, please wait... ')
print(' Date [yyyy-mmm] =',info_monthly_date + info_n_overlap,' ')
print(' Variable =',vlong,' ')
print('-----------------------------------------------------------')
print('Request options: ')
print(options)
tasks.append((vname, options, product, output))
monthly_date = addmonths4date(monthly_date, 1)
# Process tasks in parallel
process_parallel(tasks)
# Print output message on screen
print(' ')
print(' ERA5 data request has been done successfully! ')
print(' ')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment