Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Load data

import glob
import re
import matplotlib as plt
import numpy as np
import scipy as sp
import xarray as xr
import intake
import intake_esm
import pandas as pd
from dask_jobqueue import PBSCluster
from distributed import Client
######## File paths ################
lustre_scratch  = "/lustre/desc1/scratch/harshah"
gdex_data       = "/gdex/data/"
# gdex_url      = 'https://data.gdex.ucar.edu/'
eol_data        = "/gdex/data/special_projects/pythia_2025/eol-cookbook/m2hats_iss2_data/prof449Mhz_30min_winds/"
#########
era5_catalog      =  gdex_data + 'special_projects/pythia_2024/pythia_intake_catalogs/era5_catalog.json'
print(era5_catalog)
/gdex/data/special_projects/pythia_2024/pythia_intake_catalogs/era5_catalog.json
cluster = PBSCluster(
        job_name = 'dask-eol-25',
        cores = 1,
        memory = '4GiB',
        processes = 1,
        local_directory = lustre_scratch + '/dask/spill',
        log_directory = lustre_scratch + '/dask/logs/',
        resource_spec = 'select=1:ncpus=1:mem=4GB',
        queue = 'casper',
        walltime = '3:00:00',
        #interface = 'ib0'
        interface = 'ext')
client = Client(cluster)
# Scale the cluster and display cluster dashboard URL
n_workers =5
cluster.scale(n_workers)
client.wait_for_workers(n_workers = n_workers)
cluster
Loading...

Load data

# %%time
# prof449_wind = xr.open_mfdataset(eol_data + '*.nc',concat_dim = 'time',combine='nested')
prof449Mhz_wind_test = xr.open_dataset(eol_data + 'prof449.20230926.winds.30.nc')
prof449Mhz_wind_test
Loading...
# %%time
# prof449Mhz_wind_test.to_zarr(eol_data + 'prof449.20230926.winds.30.zarr')
prof449Mhz_wind_test_zarr = xr.open_zarr(eol_data + 'prof449.20230926.winds.30.zarr')
prof449Mhz_wind_test_zarr
Loading...

Load ERA5 data

%%time
era5_cat = intake.open_esm_datastore(era5_catalog)
era5_cat.df
/glade/u/home/harshah/venvs/osdf/lib/python3.10/site-packages/intake_esm/cat.py:250: DtypeWarning: Columns (2) have mixed types. Specify dtype option on import or set low_memory=False.
  df = pd.read_csv(
CPU times: user 1.96 s, sys: 197 ms, total: 2.16 s
Wall time: 2.23 s
Loading...
era5_cat.df['variable'].unique()
array(['PV', 'CRWC', 'CSWC', 'Z', 'T', 'U', 'V', 'Q', 'W', 'VO', 'D', 'R', 'O3', 'CLWC', 'CIWC', 'CC', 'ALUVP', 'ALUVD', 'ALNIP', 'ALNID', 'CI', 'ASN', 'RSN', 'SSTK', 'ISTL1', 'ISTL2', 'ISTL3', 'ISTL4', 'SWVL1', 'SWVL2', 'SWVL3', 'SWVL4', 'CAPE', 'LAILV', 'LAIHV', 'TCLW', 'TCIW', 'SP', 'TCW', 'TCWV', 'STL1', 'SD', 'CHNK', 'MSL', 'BLH', 'TCC', 'VAR_10U', 'VAR_10V', 'VAR_2T', 'VAR_2D', 'STL2', 'STL3', 'LCC', 'MCC', 'HCC', 'SRC', 'TCO3', 'IEWS', 'INSS', 'ISHF', 'IE', 'SKT', 'STL4', 'TSN', 'FAL', 'FSR', 'FLSR', 'LBLT', 'LTLT', 'LSHF', 'LICT', 'LICD', 'TCRW', 'TCSW', 'U10N', 'V10N', 'VAR_100U', 'VAR_100V', 'LMLT', 'LMLD', 'VIMA', 'VIT', 'VIKE', 'VITHE', 'VIPIE', 'VIPILE', 'VITOE', 'VIEC', 'VIMAE', 'VIMAN', 'VIKEE', 'VIKEN', 'VITHEE', 'VITHEN', 'VIWVE', 'VIWVN', 'VIGE', 'VIGN', 'VITOEE', 'VITOEN', 'VIOZE', 'VIOZN', 'VILWD', 'VIIWD', 'VIMAD', 'VIKED', 'VITHED', 'VIWVD', 'VIGD', 'VITOED', 'VIOZD', 'VILWE', 'VILWN', 'VIIWE', 'VIIWN', 'VIMAT', 'SRO', 'SSRO', 'ES', 'SMLT', 'LSPF', 'UVB', 'LSP', 'CP', 'SF', 'BLD', 'SSHF', 'SLHF', 'SSRD', 'STRD', 'SSR', 'STR', 'TSR', 'TTR', 'EWSS', 'NSSS', 'E', 'LGWS', 'MGWS', 'GWD', 'RO', 'TSRC', 'TTRC', 'SSRC', 'STRC', 'TISR', 'VIMD', 'CSF', 'LSF', 'FDIR', 'CDIR', 'SSRDC', 'STRDC', 'PEV', 'ZUST', 'DNDZN', 'DNDZA', 'DCTB', 'TPLB', 'TPLT', 'CBH', 'DEG01', 'I10FG', 'ILSPF', 'CRR', 'LSRR', 'CSFR', 'LSSFR', 'MSROR', 'MSSROR', 'MSER', 'MSMR', 'MLSPF', 'MSDWUVRF', 'MLSPR', 'MCPR', 'MSR', 'MBLD', 'MSSHF', 'MSLHF', 'MSDWSWRF', 'MSDWLWRF', 'MSNSWRF', 'MSNLWRF', 'MTNSWRF', 'MTNLWRF', 'METSS', 'MNTSS', 'MER', 'MEGWSS', 'MNGWSS', 'MGWD', 'MROR', 'MTNSWRFCS', 'MTNLWRFCS', 'MSNSWRFCS', 'MSNLWRFCS', 'MTDWSWRF', 'MVIMD', 'MTPR', 'MCSR', 'MLSSR', 'MSDRSWRF', 'MSDRSWRFCS', 'MSDWSWRFCS', 'MSDWLWRFCS', 'MPER', 'VAR_10FG', 'MX2T', 'MN2T', 'MXTPR', 'MNTPR'], dtype=object)
cat_subset = era5_cat.search(variable=['T'],frequency = 'hourly',year=2023,month=[7,8,9])
cat_subset
Loading...
# Define the xarray_open_kwargs with a compatible engine, for example, 'scipy'
xarray_open_kwargs = {
    'engine': 'h5netcdf',
    'chunks': {},  # Specify any chunking if needed
    'backend_kwargs': {}  # Any additional backend arguments if required
}
%%time
dsets = cat_subset.to_dataset_dict(xarray_open_kwargs=xarray_open_kwargs)

--> The keys in the returned dictionary of datasets are constructed as follows:
	'datatype.step_type'
Loading...
Loading...
CPU times: user 2.17 s, sys: 97.6 ms, total: 2.27 s
Wall time: 32.2 s
dset = dsets['an.pl']
dset 
Loading...
temps = dset.T
temps
Loading...