Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Precipitation plots from ERA5

Access ERA5 data from NCAR’s GDEX and plot mean total precipitation

import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import intake
import intake_esm
import pandas as pd
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import os

Required Packages

Please make sure to installed the packages before moving forward

  • matplotlib

  • xarray

  • dask

  • cartopy

  • zarr < 3

# Set up your sratch folder path
username       = os.environ["USER"]
glade_scratch  = "/glade/derecho/scratch/" + username
print(glade_scratch)
#
era5_catalog_url ='https://osdf-director.osg-htc.org/ncar/gdex/d633000/catalogs/d633000-osdf.json'
/glade/derecho/scratch/harshah
USE_PBS_SCHEDULER = True
# Create a PBS cluster object
def get_pbs_cluster():
    """ Create cluster through dask_jobqueue.   
    """
    from dask_jobqueue import PBSCluster
    cluster = PBSCluster(
        job_name = 'dask-osdf-24',
        cores = 1,
        memory = '4GiB',
        processes = 1,
        local_directory = glade_scratch + '/dask/spill',
        log_directory = glade_scratch + '/dask/logs/',
        resource_spec = 'select=1:ncpus=1:mem=4GB',
        queue = 'casper',
        walltime = '3:00:00',
        #interface = 'ib0'
        interface = 'ext'
    )
    return cluster

def get_gateway_cluster():
    """ Create cluster through dask_gateway
    """
    from dask_gateway import Gateway

    gateway = Gateway()
    cluster = gateway.new_cluster()
    cluster.adapt(minimum=2, maximum=4)
    return cluster

def get_local_cluster():
    """ Create cluster using the Jupyter server's resources
    """
    from distributed import LocalCluster, performance_report
    cluster = LocalCluster()    

    cluster.scale(5)
    return cluster
# Obtain dask cluster in one of three ways
if USE_PBS_SCHEDULER:
    cluster = get_pbs_cluster()
elif USE_DASK_GATEWAY:
    cluster = get_gateway_cluster()
else:
    cluster = get_local_cluster()

# Connect to cluster
from distributed import Client
client = Client(cluster)
/glade/u/home/harshah/.conda/envs/osdf/lib/python3.11/site-packages/distributed/node.py:188: UserWarning: Port 8787 is already in use.
Perhaps you already have a cluster running?
Hosting the HTTP server on port 34923 instead
  warnings.warn(
# Scale the cluster and display cluster dashboard URL
n_workers =5
cluster.scale(n_workers)
client.wait_for_workers(n_workers = n_workers)
cluster
Loading...
%%time
era5_cat = intake.open_esm_datastore(era5_catalog_url)
era5_cat
CPU times: user 125 ms, sys: 7.47 ms, total: 133 ms
Wall time: 5.66 s
Loading...
era5_cat.df
Loading...
era5_cat.df[['variable','long_name']].drop_duplicates()
Loading...
cat_subset = era5_cat.search(variable='MTPR')
cat_subset.df
Loading...
##############################################################
# Define the xarray_open_kwargs with a compatible engine, for example, 'scipy'
xarray_open_kwargs = {
    'engine': 'h5netcdf',
    'chunks': {},  # Specify any chunking if needed
    'backend_kwargs': {}  # Any additional backend arguments if required
}
%%time
# dset_temp = temp_cat.to_dataset_dict(xarray_open_kwargs=xarray_open_kwargs)
dset_subset = cat_subset.to_dataset_dict(xarray_open_kwargs={'engine':'zarr','backend_kwargs':{'consolidated': True,'zarr_format': 2}})

--> The keys in the returned dictionary of datasets are constructed as follows:
	'variable.short_name'
Loading...
Loading...
CPU times: user 532 ms, sys: 63.9 ms, total: 596 ms
Wall time: 10.3 s
dset_subset
{'MTPR.mtpr': <xarray.Dataset> Size: 3TB Dimensions: (forecast_initial_time: 61666, forecast_hour: 12, latitude: 721, longitude: 1440) Coordinates: * forecast_initial_time (forecast_initial_time) datetime64[ns] 493kB 1940-... * forecast_hour (forecast_hour) int32 48B 1 2 3 4 5 6 7 8 9 10 11 12 * latitude (latitude) float64 6kB 90.0 89.75 ... -89.75 -90.0 * longitude (longitude) float64 12kB 0.0 0.25 0.5 ... 359.5 359.8 Data variables: MTPR (forecast_initial_time, forecast_hour, latitude, longitude) float32 3TB dask.array<chunksize=(2, 12, 721, 1440), meta=np.ndarray> utc_date (forecast_initial_time) int32 247kB dask.array<chunksize=(61666,), meta=np.ndarray> Attributes: (12/17) CONVERSION_DATE: Wed Nov 2 10:03:48 MDT 2022 CONVERSION_PLATFORM: Linux r3i0n28 4.12.14-95.51-default #1 S... Conventions: CF-1.6 DATA_SOURCE: ECMWF: https://cds.climate.copernicus.eu... NCO: netCDF Operators version 5.0.3 (Homepage... NETCDF_COMPRESSION: NCO: Precision-preserving compression to... ... ... intake_esm_attrs:variable: MTPR intake_esm_attrs:_data_format_: reference intake_esm_attrs:short_name: mtpr intake_esm_attrs:long_name: Mean total precipitation rate intake_esm_attrs:units: kg m**-2 s**-1 intake_esm_dataset_key: MTPR.mtpr}
mtpr = dset_subset['MTPR.mtpr']
mtpr
Loading...
da = mtpr.MTPR.isel(forecast_hour=10).isel(forecast_initial_time=10)

# 2) Make a Cartopy map axis
proj = ccrs.PlateCarree()  
fig, ax = plt.subplots(figsize=(12, 6), subplot_kw={"projection": proj})

# 3) Plot onto that axis
im = da.plot(
    ax=ax,  
    transform=ccrs.PlateCarree(),
    cmap = 'Blues',
    x="longitude",
    y="latitude",
    #robust=True,                
    cbar_kwargs={"label": getattr(da, "units", "")},
)

ax.coastlines(color="black", linewidth=1.0)
<cartopy.mpl.feature_artist.FeatureArtist at 0x151ab39f1650>
<Figure size 1200x600 with 2 Axes>
cluster.close()