Skip to article frontmatterSkip to article content

Access AWS CESM2 using the AWS open data origin data

#Imports
import intake
import numpy as np
import pandas as pd
import xarray as xr
import re
import aiohttp
import fsspec.implementations.http as fshttp
from pelicanfs.core import PelicanFileSystem, PelicanMap, OSDFFileSystem 
import dask 
from dask_jobqueue import SLURMCluster
from dask.distributed import Client
from dask.distributed import performance_report
# catalog_url = 'https://data.gdex.ucar.edu/d010092/catalogs/d010092-https-zarr.json' #Use this if you are working on NCAR's Casper
catalog_url = 'https://stratus.gdex.ucar.edu/d010092/catalogs/d010092-osdf-zarr.json'
# # Spin up SLURM cluster
#job_extra = ['--qos=icx','--account=ac_cumulus'] 
cluster =  SLURMCluster(queue="icx", cores=5, walltime='6:00:00', 
                local_directory='/home1/10234/hhampapura/dask_space/', 
                log_directory='/home1/10234/hhampapura/dask_space/', memory ="192GB")
                #job_extra_directives=job_extra, interface='eth0', memory="192GB") 
client  = Client(cluster) 
cluster.scale_up(1)
2024-10-09 22:18:30,894 - tornado.application - ERROR - Exception in callback functools.partial(<bound method IOLoop._discard_future_result of <tornado.platform.asyncio.AsyncIOMainLoop object at 0x14c6f0d2e370>>, <Task finished name='Task-32' coro=<SpecCluster._correct_state_internal() done, defined at /home1/10234/hhampapura/.local/lib/python3.9/site-packages/distributed/deploy/spec.py:346> exception=RuntimeError('Command exited with non-zero exit code.\nExit code: 1\nCommand:\nsbatch /tmp/tmp90uvwwy7.sh\nstdout:\n\n[TACC]: Job submission is not allowed from this host. Please submit\n[TACC]: through one of the available login resources.\n\n\nstderr:\n\n')>)
Traceback (most recent call last):
  File "/home1/10234/hhampapura/.local/lib/python3.9/site-packages/tornado/ioloop.py", line 750, in _run_callback
    ret = callback()
  File "/home1/10234/hhampapura/.local/lib/python3.9/site-packages/tornado/ioloop.py", line 774, in _discard_future_result
    future.result()
  File "/home1/10234/hhampapura/.local/lib/python3.9/site-packages/distributed/deploy/spec.py", line 390, in _correct_state_internal
    await asyncio.gather(*worker_futs)
  File "/scratch/projects/compilers/intel24.0/oneapi/intelpython/python3.9/lib/python3.9/asyncio/tasks.py", line 688, in _wrap_awaitable
    return (yield from awaitable.__await__())
  File "/home1/10234/hhampapura/.local/lib/python3.9/site-packages/distributed/deploy/spec.py", line 74, in _
    await self.start()
  File "/home1/10234/hhampapura/.local/lib/python3.9/site-packages/dask_jobqueue/core.py", line 426, in start
    out = await self._submit_job(fn)
  File "/home1/10234/hhampapura/.local/lib/python3.9/site-packages/dask_jobqueue/core.py", line 409, in _submit_job
    return await self._call(shlex.split(self.submit_command) + [script_filename])
  File "/home1/10234/hhampapura/.local/lib/python3.9/site-packages/dask_jobqueue/core.py", line 505, in _call
    raise RuntimeError(
RuntimeError: Command exited with non-zero exit code.
Exit code: 1
Command:
sbatch /tmp/tmp90uvwwy7.sh
stdout:

[TACC]: Job submission is not allowed from this host. Please submit
[TACC]: through one of the available login resources.


stderr:


Access the data from the AWS bucket using intake

osdf_catalog = intake.open_esm_datastore(intake_url)
osdf_catalog
Loading...
osdf_catalog.df['path'].head().values
array(['osdf:///aws-opendata/us-west-2/ncar-cesm2-lens/atm/daily/cesm2LE-historical-cmip6-FLNS.zarr', 'osdf:///aws-opendata/us-west-2/ncar-cesm2-lens/atm/daily/cesm2LE-historical-cmip6-FLNSC.zarr', 'osdf:///aws-opendata/us-west-2/ncar-cesm2-lens/atm/daily/cesm2LE-historical-cmip6-FLUT.zarr', 'osdf:///aws-opendata/us-west-2/ncar-cesm2-lens/atm/daily/cesm2LE-historical-cmip6-FSNS.zarr', 'osdf:///aws-opendata/us-west-2/ncar-cesm2-lens/atm/daily/cesm2LE-historical-cmip6-FSNSC.zarr'], dtype=object)
osdf_catalog_temp = osdf_catalog.search(variable ='TREFHTMX', frequency ='daily')
osdf_catalog_temp
Loading...
%%time
#dsets = osdf_catalog_temp.to_dataset_dict(storage_options={'anon':True})
dsets = osdf_catalog_temp.to_dataset_dict()
Loading...
dsets.keys()
dict_keys(['atm.ssp370.daily.smbb', 'atm.historical.daily.smbb', 'atm.historical.daily.cmip6', 'atm.ssp370.daily.cmip6'])
historical_smbb = dsets['atm.historical.daily.smbb']
historical_smbb
Loading...
%%time
historical_smbb.TREFHTMX.isel(member_id=0,time=0)
Loading...
%%time
historical_smbb.TREFHTMX.isel(member_id=0,time=0).plot()
CPU times: user 396 ms, sys: 173 ms, total: 570 ms
Wall time: 1.29 s
<Figure size 640x480 with 2 Axes>