# Imports
import intake
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import intake_esm
import glob
import pandas as pd
import osimport dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report# Set up your sratch folder path
username = os.environ["USER"]
glade_scratch = "/glade/derecho/scratch/" + username
print(glade_scratch)/glade/derecho/scratch/harshah
gdex_data = '/gdex/data/'
#
cesm_osdf_url = 'https://osdata.gdex.ucar.edu/d010092/catalogs/d010092-osdf.json'
cesm_https_url = 'https://osdata.gdex.ucar.edu/d010092/catalogs/d010092-https.json'Create PBS cluster and scale¶
# Create a PBS cluster object
cluster = PBSCluster(
job_name = 'dask-wk24-hpc',
cores = 1,
memory = '4GiB',
processes = 1,
local_directory = glade_scratch+'/dask/',
resource_spec = 'select=1:ncpus=1:mem=4GB',
queue = 'casper',
walltime = '5:00:00',
log_directory = glade_scratch+'/dask/',
#interface = 'ib0'
interface = 'ext'
)/glade/u/home/harshah/venvs/osdf/lib/python3.10/site-packages/distributed/node.py:187: UserWarning: Port 8787 is already in use.
Perhaps you already have a cluster running?
Hosting the HTTP server on port 46367 instead
warnings.warn(
cluster.scale(3)clusterLoading...
Open catalog and load data¶
cat = intake.open_esm_datastore(cesm_https_url)
catLoading...
cat.dfLoading...
cat_temp = cat.search(variable='TREFHTMX',frequency='monthly')
cat_tempLoading...
cat_temp.df['path'].valuesarray(['https://stratus.gdex.ucar.edu/d010092/atm/monthly/cesm2LE-historical-cmip6-TREFHTMX.zarr',
'https://stratus.gdex.ucar.edu/d010092/atm/monthly/cesm2LE-historical-smbb-TREFHTMX.zarr',
'https://stratus.gdex.ucar.edu/d010092/atm/monthly/cesm2LE-ssp370-cmip6-TREFHTMX.zarr',
'https://stratus.gdex.ucar.edu/d010092/atm/monthly/cesm2LE-ssp370-smbb-TREFHTMX.zarr'],
dtype=object)test = xr.open_zarr('https://osdata.gdex.ucar.edu/d010092/atm/monthly/cesm2LE-historical-cmip6-TREFHTMX.zarr')
testLoading...
dsets = cat_temp.to_dataset_dict()
--> The keys in the returned dictionary of datasets are constructed as follows:
'component.experiment.frequency.forcing_variant'
Loading...
Loading...
dsets.keys()dict_keys(['atm.historical.monthly.cmip6', 'atm.historical.monthly.smbb', 'atm.ssp370.monthly.smbb', 'atm.ssp370.monthly.cmip6'])dset = dsets['atm.historical.monthly.smbb']
dsetLoading...
%%time
dset.TREFHTMX.isel(member_id=0,time=0).plot()CPU times: user 324 ms, sys: 120 ms, total: 444 ms
Wall time: 953 ms

Using OSDF¶
cat_osdf = intake.open_esm_datastore(cesm_osdf_url)
cat_temp_osdf = cat_osdf.search(variable='TREFHTMX',frequency='monthly')
cat_temp_osdfLoading...
cat_temp_osdf.df['path'].valuesarray(['osdf:///ncar-gdex/d010092/atm/monthly/cesm2LE-historical-cmip6-TREFHTMX.zarr',
'osdf:///ncar-gdex/d010092/atm/monthly/cesm2LE-historical-smbb-TREFHTMX.zarr',
'osdf:///ncar-gdex/d010092/atm/monthly/cesm2LE-ssp370-cmip6-TREFHTMX.zarr',
'osdf:///ncar-gdex/d010092/atm/monthly/cesm2LE-ssp370-smbb-TREFHTMX.zarr'],
dtype=object)# test = xr.open_zarr('osdf:///ncar-gdex/d010092/atm/monthly/cesm2LE-historical-cmip6-TREFHTMX.zarr')dsets_osdf = cat_temp_osdf.to_dataset_dict()
--> The keys in the returned dictionary of datasets are constructed as follows:
'component.experiment.frequency.forcing_variant'
Loading...
Loading...
dset = dsets['atm.historical.monthly.smbb']
dset.TREFHTMX.isel(member_id=0,time=0).plot()
cluster.close()