import glob
import re
import matplotlib as plt
import numpy as np
import scipy as sp
import xarray as xr
import intake
import intake_esm
import pandas as pdfrom dask_jobqueue import PBSCluster
from distributed import Client######## File paths ################
lustre_scratch = "/lustre/desc1/scratch/harshah"
gdex_data = "/gdex/data/"
# gdex_url = 'https://data.gdex.ucar.edu/'
eol_data = "/gdex/data/special_projects/pythia_2025/eol-cookbook/m2hats_iss2_data/prof449Mhz_30min_winds/"
#########
era5_catalog = gdex_data + 'special_projects/pythia_2024/pythia_intake_catalogs/era5_catalog.json'
print(era5_catalog)/gdex/data/special_projects/pythia_2024/pythia_intake_catalogs/era5_catalog.json
cluster = PBSCluster(
job_name = 'dask-eol-25',
cores = 1,
memory = '4GiB',
processes = 1,
local_directory = lustre_scratch + '/dask/spill',
log_directory = lustre_scratch + '/dask/logs/',
resource_spec = 'select=1:ncpus=1:mem=4GB',
queue = 'casper',
walltime = '3:00:00',
#interface = 'ib0'
interface = 'ext')client = Client(cluster)# Scale the cluster and display cluster dashboard URL
n_workers =5
cluster.scale(n_workers)
client.wait_for_workers(n_workers = n_workers)
clusterLoading...
Load data¶
# %%time
# prof449_wind = xr.open_mfdataset(eol_data + '*.nc',concat_dim = 'time',combine='nested')prof449Mhz_wind_test = xr.open_dataset(eol_data + 'prof449.20230926.winds.30.nc')
prof449Mhz_wind_testLoading...
# %%time
# prof449Mhz_wind_test.to_zarr(eol_data + 'prof449.20230926.winds.30.zarr')prof449Mhz_wind_test_zarr = xr.open_zarr(eol_data + 'prof449.20230926.winds.30.zarr')
prof449Mhz_wind_test_zarrLoading...
Load ERA5 data¶
%%time
era5_cat = intake.open_esm_datastore(era5_catalog)
era5_cat.dfLoading...
era5_cat.df['variable'].unique()array(['PV', 'CRWC', 'CSWC', 'Z', 'T', 'U', 'V', 'Q', 'W', 'VO', 'D', 'R',
'O3', 'CLWC', 'CIWC', 'CC', 'ALUVP', 'ALUVD', 'ALNIP', 'ALNID',
'CI', 'ASN', 'RSN', 'SSTK', 'ISTL1', 'ISTL2', 'ISTL3', 'ISTL4',
'SWVL1', 'SWVL2', 'SWVL3', 'SWVL4', 'CAPE', 'LAILV', 'LAIHV',
'TCLW', 'TCIW', 'SP', 'TCW', 'TCWV', 'STL1', 'SD', 'CHNK', 'MSL',
'BLH', 'TCC', 'VAR_10U', 'VAR_10V', 'VAR_2T', 'VAR_2D', 'STL2',
'STL3', 'LCC', 'MCC', 'HCC', 'SRC', 'TCO3', 'IEWS', 'INSS', 'ISHF',
'IE', 'SKT', 'STL4', 'TSN', 'FAL', 'FSR', 'FLSR', 'LBLT', 'LTLT',
'LSHF', 'LICT', 'LICD', 'TCRW', 'TCSW', 'U10N', 'V10N', 'VAR_100U',
'VAR_100V', 'LMLT', 'LMLD', 'VIMA', 'VIT', 'VIKE', 'VITHE',
'VIPIE', 'VIPILE', 'VITOE', 'VIEC', 'VIMAE', 'VIMAN', 'VIKEE',
'VIKEN', 'VITHEE', 'VITHEN', 'VIWVE', 'VIWVN', 'VIGE', 'VIGN',
'VITOEE', 'VITOEN', 'VIOZE', 'VIOZN', 'VILWD', 'VIIWD', 'VIMAD',
'VIKED', 'VITHED', 'VIWVD', 'VIGD', 'VITOED', 'VIOZD', 'VILWE',
'VILWN', 'VIIWE', 'VIIWN', 'VIMAT', 'SRO', 'SSRO', 'ES', 'SMLT',
'LSPF', 'UVB', 'LSP', 'CP', 'SF', 'BLD', 'SSHF', 'SLHF', 'SSRD',
'STRD', 'SSR', 'STR', 'TSR', 'TTR', 'EWSS', 'NSSS', 'E', 'LGWS',
'MGWS', 'GWD', 'RO', 'TSRC', 'TTRC', 'SSRC', 'STRC', 'TISR',
'VIMD', 'CSF', 'LSF', 'FDIR', 'CDIR', 'SSRDC', 'STRDC', 'PEV',
'ZUST', 'DNDZN', 'DNDZA', 'DCTB', 'TPLB', 'TPLT', 'CBH', 'DEG01',
'I10FG', 'ILSPF', 'CRR', 'LSRR', 'CSFR', 'LSSFR', 'MSROR',
'MSSROR', 'MSER', 'MSMR', 'MLSPF', 'MSDWUVRF', 'MLSPR', 'MCPR',
'MSR', 'MBLD', 'MSSHF', 'MSLHF', 'MSDWSWRF', 'MSDWLWRF', 'MSNSWRF',
'MSNLWRF', 'MTNSWRF', 'MTNLWRF', 'METSS', 'MNTSS', 'MER', 'MEGWSS',
'MNGWSS', 'MGWD', 'MROR', 'MTNSWRFCS', 'MTNLWRFCS', 'MSNSWRFCS',
'MSNLWRFCS', 'MTDWSWRF', 'MVIMD', 'MTPR', 'MCSR', 'MLSSR',
'MSDRSWRF', 'MSDRSWRFCS', 'MSDWSWRFCS', 'MSDWLWRFCS', 'MPER',
'VAR_10FG', 'MX2T', 'MN2T', 'MXTPR', 'MNTPR'], dtype=object)cat_subset = era5_cat.search(variable=['T'],frequency = 'hourly',year=2023,month=[7,8,9])
cat_subsetLoading...
# Define the xarray_open_kwargs with a compatible engine, for example, 'scipy'
xarray_open_kwargs = {
'engine': 'h5netcdf',
'chunks': {}, # Specify any chunking if needed
'backend_kwargs': {} # Any additional backend arguments if required
}%%time
dsets = cat_subset.to_dataset_dict(xarray_open_kwargs=xarray_open_kwargs)Loading...
dset = dsets['an.pl']
dset Loading...
temps = dset.TtempsLoading...