CESM2-LENS zarr data from NCAR’s object store - GDEX Intake ESM Catalog Documentation

# Imports
import intake
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import intake_esm
import glob
import pandas as pd
import os

import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report

# Set up your sratch folder path
username       = os.environ["USER"]
glade_scratch  = "/glade/derecho/scratch/" + username
print(glade_scratch)

/glade/derecho/scratch/harshah

gdex_data    = '/gdex/data/'
#
cesm_osdf_url  = 'https://osdata.gdex.ucar.edu/d010092/catalogs/d010092-osdf.json'
cesm_https_url = 'https://osdata.gdex.ucar.edu/d010092/catalogs/d010092-https.json'

Create PBS cluster and scale¶

# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk24-hpc',
    cores = 1,
    memory = '4GiB',
    processes = 1,
    local_directory = glade_scratch+'/dask/',
    resource_spec = 'select=1:ncpus=1:mem=4GB',
    queue = 'casper',
    walltime = '5:00:00',
    log_directory = glade_scratch+'/dask/',
    #interface = 'ib0'
    interface = 'ext'
)

/glade/u/home/harshah/venvs/osdf/lib/python3.10/site-packages/distributed/node.py:187: UserWarning: Port 8787 is already in use.
Perhaps you already have a cluster running?
Hosting the HTTP server on port 46367 instead
  warnings.warn(

cluster.scale(3)

cluster

Loading...

Open catalog and load data¶

cat = intake.open_esm_datastore(cesm_https_url)
cat

Loading...

cat.df

Loading...

cat_temp = cat.search(variable='TREFHTMX',frequency='monthly')
cat_temp

Loading...

cat_temp.df['path'].values

array(['https://stratus.gdex.ucar.edu/d010092/atm/monthly/cesm2LE-historical-cmip6-TREFHTMX.zarr',
       'https://stratus.gdex.ucar.edu/d010092/atm/monthly/cesm2LE-historical-smbb-TREFHTMX.zarr',
       'https://stratus.gdex.ucar.edu/d010092/atm/monthly/cesm2LE-ssp370-cmip6-TREFHTMX.zarr',
       'https://stratus.gdex.ucar.edu/d010092/atm/monthly/cesm2LE-ssp370-smbb-TREFHTMX.zarr'],
      dtype=object)

test = xr.open_zarr('https://osdata.gdex.ucar.edu/d010092/atm/monthly/cesm2LE-historical-cmip6-TREFHTMX.zarr')
test

Loading...

dsets = cat_temp.to_dataset_dict()


--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.frequency.forcing_variant'

Loading...

dsets.keys()

dict_keys(['atm.historical.monthly.cmip6', 'atm.historical.monthly.smbb', 'atm.ssp370.monthly.smbb', 'atm.ssp370.monthly.cmip6'])

dset = dsets['atm.historical.monthly.smbb']
dset

Loading...

%%time
dset.TREFHTMX.isel(member_id=0,time=0).plot()

CPU times: user 324 ms, sys: 120 ms, total: 444 ms
Wall time: 953 ms

Using OSDF¶

cat_osdf = intake.open_esm_datastore(cesm_osdf_url)
cat_temp_osdf = cat_osdf.search(variable='TREFHTMX',frequency='monthly')
cat_temp_osdf

Loading...

cat_temp_osdf.df['path'].values

array(['osdf:///ncar-gdex/d010092/atm/monthly/cesm2LE-historical-cmip6-TREFHTMX.zarr',
       'osdf:///ncar-gdex/d010092/atm/monthly/cesm2LE-historical-smbb-TREFHTMX.zarr',
       'osdf:///ncar-gdex/d010092/atm/monthly/cesm2LE-ssp370-cmip6-TREFHTMX.zarr',
       'osdf:///ncar-gdex/d010092/atm/monthly/cesm2LE-ssp370-smbb-TREFHTMX.zarr'],
      dtype=object)

# test = xr.open_zarr('osdf:///ncar-gdex/d010092/atm/monthly/cesm2LE-historical-cmip6-TREFHTMX.zarr')

dsets_osdf = cat_temp_osdf.to_dataset_dict()


--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.frequency.forcing_variant'

Loading...

dset = dsets['atm.historical.monthly.smbb']
dset.TREFHTMX.isel(member_id=0,time=0).plot()

cluster.close()