Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Access ERA5 data from NCAR’s GDEX

# Imports
import intake
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import intake_esm
import glob
import pandas as pd
import os
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report
# Set up your sratch folder path
username       = os.environ["USER"]
glade_scratch  = "/glade/derecho/scratch/" + username
print(glade_scratch)
/glade/derecho/scratch/harshah
gdex_data    = '/gdex/data/'
#
era5_posix     = 'https://osdf-director.osg-htc.org/ncar/gdex/d633000/catalogs/d633000-posix.json'
era5_https_url = 'https://osdf-director.osg-htc.org/ncar/gdex/d633000/catalogs/d633000-http.json'
era5_osdf_url  = 'https://osdf-director.osg-htc.org/ncar/gdex/d633000/catalogs/d633000-osdf.json'

Create PBS cluster and scale

# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk24-hpc',
    cores = 1,
    memory = '4GiB',
    processes = 1,
    local_directory = glade_scratch+'/dask/',
    resource_spec = 'select=1:ncpus=1:mem=4GB',
    queue = 'casper',
    walltime = '5:00:00',
    log_directory = glade_scratch+'/dask/',
    #interface = 'ib0'
    interface = 'ext'
)
/glade/u/home/harshah/venvs/osdf/lib/python3.10/site-packages/distributed/node.py:187: UserWarning: Port 8787 is already in use.
Perhaps you already have a cluster running?
Hosting the HTTP server on port 42683 instead
  warnings.warn(
cluster.scale(3)
cluster
Loading...

Open catalog and load data

Direct POSIX access for NCAR’s HPC users

cat = intake.open_esm_datastore(era5_posix)
cat
Loading...
cat.df
Loading...
cat_temp = cat.search(variable='VAR_2T')
cat_temp.df['path'].values
array(['/glade/campaign/collections/gdex/data/d633000/e5.oper.an.sfc.zarr/e5.oper.an.sfc.2t.zarr'], dtype=object)
dsets = cat_temp.to_dataset_dict()

--> The keys in the returned dictionary of datasets are constructed as follows:
	'variable.short_name'
Loading...
Loading...
dsets.keys()
dict_keys(['VAR_2T.2t'])
temp_posix = dsets['VAR_2T.2t']
temp_posix
Loading...
temp_posix.VAR_2T.isel(time=0).plot()
<Figure size 640x480 with 2 Axes>

Using HTTPS

catalog_https  = intake.open_esm_datastore(era5_https_url)
cat_temp_https = catalog_https.search(variable='VAR_2T')
cat_temp_https.df['path'].values
array(['https://data.gdex.ucar.edu/d633000/e5.oper.an.sfc.zarr/e5.oper.an.sfc.2t.zarr'], dtype=object)
dsets_https = cat_temp.to_dataset_dict()

--> The keys in the returned dictionary of datasets are constructed as follows:
	'variable.short_name'
Loading...
Loading...
temp_https = dsets_https['VAR_2T.2t']
temp_https.VAR_2T.isel(time=0).plot()
<Figure size 640x480 with 2 Axes>

Using OSDF

catalog_osdf  = intake.open_esm_datastore(era5_osdf_url)
cat_temp_osdf = catalog_osdf.search(variable='VAR_2T')
cat_temp_osdf.df['path'].values
array(['osdf:///ncar/gdex/d633000/e5.oper.an.sfc.zarr/e5.oper.an.sfc.2t.zarr'], dtype=object)
dsets_osdf = cat_temp_osdf.to_dataset_dict()

--> The keys in the returned dictionary of datasets are constructed as follows:
	'variable.short_name'
Loading...
Loading...
temp_osdf = dsets_osdf['VAR_2T.2t']
temp_osdf.VAR_2T.isel(time=0).plot()
<Figure size 640x480 with 2 Axes>
cluster.close()