# Imports
import intake
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import intake_esm
import glob
import pandas as pd
import osimport dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report# Set up your sratch folder path
username = os.environ["USER"]
glade_scratch = "/glade/derecho/scratch/" + username
print(glade_scratch)/glade/derecho/scratch/harshah
gdex_data = '/gdex/data/'
#
era5_posix = 'https://osdf-director.osg-htc.org/ncar/gdex/d633000/catalogs/d633000-posix.json'
era5_https_url = 'https://osdf-director.osg-htc.org/ncar/gdex/d633000/catalogs/d633000-http.json'
era5_osdf_url = 'https://osdf-director.osg-htc.org/ncar/gdex/d633000/catalogs/d633000-osdf.json'Create PBS cluster and scale¶
# Create a PBS cluster object
cluster = PBSCluster(
job_name = 'dask-wk24-hpc',
cores = 1,
memory = '4GiB',
processes = 1,
local_directory = glade_scratch+'/dask/',
resource_spec = 'select=1:ncpus=1:mem=4GB',
queue = 'casper',
walltime = '5:00:00',
log_directory = glade_scratch+'/dask/',
#interface = 'ib0'
interface = 'ext'
)/glade/u/home/harshah/venvs/osdf/lib/python3.10/site-packages/distributed/node.py:187: UserWarning: Port 8787 is already in use.
Perhaps you already have a cluster running?
Hosting the HTTP server on port 42683 instead
warnings.warn(
cluster.scale(3)
clusterLoading...
Open catalog and load data¶
Direct POSIX access for NCAR’s HPC users¶
cat = intake.open_esm_datastore(era5_posix)
catLoading...
cat.dfLoading...
cat_temp = cat.search(variable='VAR_2T')cat_temp.df['path'].valuesarray(['/glade/campaign/collections/gdex/data/d633000/e5.oper.an.sfc.zarr/e5.oper.an.sfc.2t.zarr'],
dtype=object)dsets = cat_temp.to_dataset_dict()
--> The keys in the returned dictionary of datasets are constructed as follows:
'variable.short_name'
Loading...
Loading...
dsets.keys()dict_keys(['VAR_2T.2t'])temp_posix = dsets['VAR_2T.2t']
temp_posixLoading...
temp_posix.VAR_2T.isel(time=0).plot()
Using HTTPS¶
catalog_https = intake.open_esm_datastore(era5_https_url)
cat_temp_https = catalog_https.search(variable='VAR_2T')cat_temp_https.df['path'].valuesarray(['https://data.gdex.ucar.edu/d633000/e5.oper.an.sfc.zarr/e5.oper.an.sfc.2t.zarr'],
dtype=object)dsets_https = cat_temp.to_dataset_dict()
--> The keys in the returned dictionary of datasets are constructed as follows:
'variable.short_name'
Loading...
Loading...
temp_https = dsets_https['VAR_2T.2t']temp_https.VAR_2T.isel(time=0).plot()
Using OSDF¶
catalog_osdf = intake.open_esm_datastore(era5_osdf_url)
cat_temp_osdf = catalog_osdf.search(variable='VAR_2T')cat_temp_osdf.df['path'].valuesarray(['osdf:///ncar/gdex/d633000/e5.oper.an.sfc.zarr/e5.oper.an.sfc.2t.zarr'],
dtype=object)dsets_osdf = cat_temp_osdf.to_dataset_dict()
--> The keys in the returned dictionary of datasets are constructed as follows:
'variable.short_name'
Loading...
Loading...
temp_osdf = dsets_osdf['VAR_2T.2t']temp_osdf.VAR_2T.isel(time=0).plot()
cluster.close()