Hi all,
I am struggling with the speed of my API request.
I want to download daily averages from the “ERA5 pressure levels” dataset for temperature and relative humidity for each pressure level and four different time zones (US time zones) covering the period 2006-2020.
It seems that I can only pass one variable, UTC shift and pressure level per request.
However, one request takes more than 2 minutes.
Thus, it will take too much time to retrieve all the data I need.
Is there any possibility to speed up the process (e.g. request multiple variables, UTC shifts, pressure levels at once)?
I wrote the following code (based on Retrieve daily ERA5/ERA5-Land data using the CDS API ):
# Packages
import cdsapi
import requests
import urllib3
urllib3.disable_warnings()
# PATH
PATH = ".../ERA5_pressure_levels/"
# Requires:
# 1) the CDS API to be installed and working on your system
# 2) You have agreed to the ERA5 Licence (via the CDS web page)
# 3) Selection of required variable, daily statistic, etc
# Call API
c = cdsapi.Client(timeout=600)
# Time Zones
UTC = ["UTC-05", "UTC-06", "UTC-07", "UTC-08"]
# Variables
VAR = ['temperature', 'relative_humidity']
# Pressure levels
PS = [
'1', '2', '3',
'5', '7', '10',
'20', '30', '50',
'70', '100', '125',
'150', '175', '200',
'225', '250', '300',
'350', '400', '450',
'500', '550', '600',
'650', '700', '750',
'775', '800', '825',
'850', '875', '900',
'925', '950', '975',
'1000'
]
# Years
YEARS = [
'2006', '2007', '2008',
'2009', '2010', '2011',
'2012', '2013', '2014',
'2015', '2016', '2017',
'2018', '2019', '2020',
]
# Months
MONTHS = [
'01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'
]
# Loop over all parameters
for yr in YEARS:
for mn in MONTHS:
for var in VAR:
for ps in PS:
for utc in UTC:
print('Running: '+yr+mn+var+ps+utc)
#--- UTC-05 ---#
if utc == "UTC-05":
result = c.service(
"tool.toolbox.orchestrator.workflow",
params={
"realm": "user-apps",
"project": "app-c3s-daily-era5-statistics",
"version": "master",
"kwargs": {
"dataset": "reanalysis-era5-pressure-levels",
"product_type": "reanalysis",
"variable": var,
'pressure_level': ps,
"statistic": "daily_mean",
"year": yr,
"month": mn,
"time_zone": utc+":0",
"frequency": "1-hourly",
"grid": "0.25/0.25",
"area": {"lat": [23,50], "lon": [-91,-65]}
},
"workflow_name": "application"
})
#--- UTC-06 ---#
if utc == "UTC-06":
result = c.service(
"tool.toolbox.orchestrator.workflow",
params={
"realm": "user-apps",
"project": "app-c3s-daily-era5-statistics",
"version": "master",
"kwargs": {
"dataset": "reanalysis-era5-pressure-levels",
"product_type": "reanalysis",
"variable": var,
'pressure_level': ps,
"statistic": "daily_mean",
"year": yr,
"month": mn,
"time_zone": utc+":0",
"frequency": "1-hourly",
"grid": "0.25/0.25",
"area": {"lat": [24,51], "lon": [-106,-83]}
},
"workflow_name": "application"
})
#--- UTC-07 ---#
if utc == "UTC-07":
result = c.service(
"tool.toolbox.orchestrator.workflow",
params={
"realm": "user-apps",
"project": "app-c3s-daily-era5-statistics",
"version": "master",
"kwargs": {
"dataset": "reanalysis-era5-pressure-levels",
"product_type": "reanalysis",
"variable": var,
'pressure_level': ps,
"statistic": "daily_mean",
"year": yr,
"month": mn,
"time_zone": utc+":0",
"frequency": "1-hourly",
"grid": "0.25/0.25",
"area": {"lat": [29,51], "lon": [-120,-99]}
},
"workflow_name": "application"
})
#--- UTC-08 ---#
if utc == "UTC-08":
result = c.service(
"tool.toolbox.orchestrator.workflow",
params={
"realm": "user-apps",
"project": "app-c3s-daily-era5-statistics",
"version": "master",
"kwargs": {
"dataset": "reanalysis-era5-pressure-levels",
"product_type": "reanalysis",
"variable": var,
'pressure_level': ps,
"statistic": "daily_mean",
"year": yr,
"month": mn,
"time_zone": utc+":0",
"frequency": "1-hourly",
"grid": "0.25/0.25",
"area": {"lat": [31,51], "lon": [-126,-113]}
},
"workflow_name": "application"
})
# set name of output file for each month (statistic, variable, year, month)
file_name = "download_"+utc+ "_"+ ps +"_" + var +"_"+ yr + mn+ ".nc"
w = open(PATH+'Filenames.txt', "a")
w.write(file_name + '\n')
location=result[0]['location']
res = requests.get(location, stream = True)
print("Writing data to " + file_name)
with open(PATH+file_name,'wb') as fh:
for r in res.iter_content(chunk_size = 1024 * 1024):
fh.write(r)
fh.close()
w.close()
2 Comments
Kevin Marsh
Hi Felix,
At the moment I think requests are limited to 1 variable/level/month at a time in order to manage the load on the CDS,
Thanks,
Kevin
Felix Bracht
In this case it seems faster to download ERA5 hourly data on pressure levels for the entire US and calculate daily averages for the respective time zones myself. I will share my code for others facing a similar issue: