Skip to article frontmatterSkip to article content

Load data

import glob
import re
import matplotlib as plt
import numpy as np
import scipy as sp
import xarray as xr
import intake
import intake_esm
import pandas as pd
from dask_jobqueue import PBSCluster
from distributed import Client
######## File paths ################
lustre_scratch  = "/lustre/desc1/scratch/harshah"
gdex_data       = "/gdex/data/"
# gdex_url      = 'https://data.gdex.ucar.edu/'
eol_data        = "/gdex/data/special_projects/pythia_2025/eol-cookbook/m2hats_iss2_data/prof449Mhz_30min_winds/"
#########
era5_catalog      =  gdex_data + 'special_projects/pythia_2024/pythia_intake_catalogs/era5_catalog.json'
print(era5_catalog)
/gdex/data/special_projects/pythia_2024/pythia_intake_catalogs/era5_catalog.json
cluster = PBSCluster(
        job_name = 'dask-eol-25',
        cores = 1,
        memory = '4GiB',
        processes = 1,
        local_directory = lustre_scratch + '/dask/spill',
        log_directory = lustre_scratch + '/dask/logs/',
        resource_spec = 'select=1:ncpus=1:mem=4GB',
        queue = 'casper',
        walltime = '3:00:00',
        #interface = 'ib0'
        interface = 'ext')
client = Client(cluster)
# Scale the cluster and display cluster dashboard URL
n_workers =5
cluster.scale(n_workers)
client.wait_for_workers(n_workers = n_workers)
cluster
Loading...

Load data

# %%time
# prof449_wind = xr.open_mfdataset(eol_data + '*.nc',concat_dim = 'time',combine='nested')
prof449Mhz_wind_test = xr.open_dataset(eol_data + 'prof449.20230926.winds.30.nc')
prof449Mhz_wind_test
Loading...
# %%time
# prof449Mhz_wind_test.to_zarr(eol_data + 'prof449.20230926.winds.30.zarr')
prof449Mhz_wind_test_zarr = xr.open_zarr(eol_data + 'prof449.20230926.winds.30.zarr')
prof449Mhz_wind_test_zarr
Loading...

Load ERA5 data

%%time
era5_cat = intake.open_esm_datastore(era5_catalog)
era5_cat.df
Loading...
era5_cat.df['variable'].unique()
array(['PV', 'CRWC', 'CSWC', 'Z', 'T', 'U', 'V', 'Q', 'W', 'VO', 'D', 'R', 'O3', 'CLWC', 'CIWC', 'CC', 'ALUVP', 'ALUVD', 'ALNIP', 'ALNID', 'CI', 'ASN', 'RSN', 'SSTK', 'ISTL1', 'ISTL2', 'ISTL3', 'ISTL4', 'SWVL1', 'SWVL2', 'SWVL3', 'SWVL4', 'CAPE', 'LAILV', 'LAIHV', 'TCLW', 'TCIW', 'SP', 'TCW', 'TCWV', 'STL1', 'SD', 'CHNK', 'MSL', 'BLH', 'TCC', 'VAR_10U', 'VAR_10V', 'VAR_2T', 'VAR_2D', 'STL2', 'STL3', 'LCC', 'MCC', 'HCC', 'SRC', 'TCO3', 'IEWS', 'INSS', 'ISHF', 'IE', 'SKT', 'STL4', 'TSN', 'FAL', 'FSR', 'FLSR', 'LBLT', 'LTLT', 'LSHF', 'LICT', 'LICD', 'TCRW', 'TCSW', 'U10N', 'V10N', 'VAR_100U', 'VAR_100V', 'LMLT', 'LMLD', 'VIMA', 'VIT', 'VIKE', 'VITHE', 'VIPIE', 'VIPILE', 'VITOE', 'VIEC', 'VIMAE', 'VIMAN', 'VIKEE', 'VIKEN', 'VITHEE', 'VITHEN', 'VIWVE', 'VIWVN', 'VIGE', 'VIGN', 'VITOEE', 'VITOEN', 'VIOZE', 'VIOZN', 'VILWD', 'VIIWD', 'VIMAD', 'VIKED', 'VITHED', 'VIWVD', 'VIGD', 'VITOED', 'VIOZD', 'VILWE', 'VILWN', 'VIIWE', 'VIIWN', 'VIMAT', 'SRO', 'SSRO', 'ES', 'SMLT', 'LSPF', 'UVB', 'LSP', 'CP', 'SF', 'BLD', 'SSHF', 'SLHF', 'SSRD', 'STRD', 'SSR', 'STR', 'TSR', 'TTR', 'EWSS', 'NSSS', 'E', 'LGWS', 'MGWS', 'GWD', 'RO', 'TSRC', 'TTRC', 'SSRC', 'STRC', 'TISR', 'VIMD', 'CSF', 'LSF', 'FDIR', 'CDIR', 'SSRDC', 'STRDC', 'PEV', 'ZUST', 'DNDZN', 'DNDZA', 'DCTB', 'TPLB', 'TPLT', 'CBH', 'DEG01', 'I10FG', 'ILSPF', 'CRR', 'LSRR', 'CSFR', 'LSSFR', 'MSROR', 'MSSROR', 'MSER', 'MSMR', 'MLSPF', 'MSDWUVRF', 'MLSPR', 'MCPR', 'MSR', 'MBLD', 'MSSHF', 'MSLHF', 'MSDWSWRF', 'MSDWLWRF', 'MSNSWRF', 'MSNLWRF', 'MTNSWRF', 'MTNLWRF', 'METSS', 'MNTSS', 'MER', 'MEGWSS', 'MNGWSS', 'MGWD', 'MROR', 'MTNSWRFCS', 'MTNLWRFCS', 'MSNSWRFCS', 'MSNLWRFCS', 'MTDWSWRF', 'MVIMD', 'MTPR', 'MCSR', 'MLSSR', 'MSDRSWRF', 'MSDRSWRFCS', 'MSDWSWRFCS', 'MSDWLWRFCS', 'MPER', 'VAR_10FG', 'MX2T', 'MN2T', 'MXTPR', 'MNTPR'], dtype=object)
cat_subset = era5_cat.search(variable=['T'],frequency = 'hourly',year=2023,month=[7,8,9])
cat_subset
Loading...
# Define the xarray_open_kwargs with a compatible engine, for example, 'scipy'
xarray_open_kwargs = {
    'engine': 'h5netcdf',
    'chunks': {},  # Specify any chunking if needed
    'backend_kwargs': {}  # Any additional backend arguments if required
}
%%time
dsets = cat_subset.to_dataset_dict(xarray_open_kwargs=xarray_open_kwargs)
Loading...
dset = dsets['an.pl']
dset 
Loading...
temps = dset.T
temps
Loading...