import fsspec.implementations.http as fshttp
from pelicanfs.core import PelicanFileSystem, PelicanMap, OSDFFileSystem # import dask
# from dask_jobqueue import PBSCluster
# from dask.distributed import Client
# from dask.distributed import performance_reportinit_year0 = '1991'
init_year1 = '2020'
final_year0 = '2071'
final_year1 = '2100'# File paths
rda_url = 'https://data.rda.ucar.edu/'
database_num = 'd345001'
cam6_dart_url = rda_url + database_num
#
https_catalog = cam6_dart_url + '/catalogs/https/'+ database_num +'-https-zarr.json'
osdf_catalog = cam6_dart_url + '/catalogs/osdf/'+ database_num +'-osdf-zarr.json'Open intake catalog¶
df_https_test = intake.open_esm_datastore(https_catalog)
df_https_test.df['path'].valuesarray(['https://data.rda.ucar.edu/d345001/hourly6/HR.zarr',
'https://data.rda.ucar.edu/d345001/hourly6/TSA.zarr',
'https://data.rda.ucar.edu/d345001/hourly6/EFLX_LH_TOT.zarr',
'https://data.rda.ucar.edu/d345001/hourly6/ER.zarr',
'https://data.rda.ucar.edu/d345001/weekly/VS.zarr',
'https://data.rda.ucar.edu/d345001/weekly/PS.zarr',
'https://data.rda.ucar.edu/d345001/weekly/Q.zarr',
'https://data.rda.ucar.edu/d345001/weekly/US.zarr',
'https://data.rda.ucar.edu/d345001/weekly/CLDICE.zarr',
'https://data.rda.ucar.edu/d345001/weekly/T.zarr',
'https://data.rda.ucar.edu/d345001/weekly/CLDLIQ.zarr'],
dtype=object)df_osdf_test = intake.open_esm_datastore(osdf_catalog)
df_osdf_test.df['path'].valuesarray(['osdf:///ncar/rda/d345001/hourly6/HR.zarr',
'osdf:///ncar/rda/d345001/hourly6/TSA.zarr',
'osdf:///ncar/rda/d345001/hourly6/EFLX_LH_TOT.zarr',
'osdf:///ncar/rda/d345001/hourly6/ER.zarr',
'osdf:///ncar/rda/d345001/weekly/VS.zarr',
'osdf:///ncar/rda/d345001/weekly/PS.zarr',
'osdf:///ncar/rda/d345001/weekly/Q.zarr',
'osdf:///ncar/rda/d345001/weekly/US.zarr',
'osdf:///ncar/rda/d345001/weekly/CLDICE.zarr',
'osdf:///ncar/rda/d345001/weekly/T.zarr',
'osdf:///ncar/rda/d345001/weekly/CLDLIQ.zarr'], dtype=object)data_var = 'PS'
col_subset_https = df_https_test.search(variable=data_var)
col_subset_osdf = df_osdf_test.search(variable=data_var)dsets_https = col_subset_https.to_dataset_dict(zarr_kwargs={"consolidated": True})
#
print(f"\nDataset dictionary keys:\n {dsets_https.keys()}")
# Load the first dataset and display a summary.
dataset_key = list(dsets_https.keys())[0]
#
ds_https = dsets_https[dataset_key]
--> The keys in the returned dictionary of datasets are constructed as follows:
'variable.frequency.component.vertical_levels'
Loading...
Loading...
Dataset dictionary keys:
dict_keys(['PS.weekly.atm.1'])
dsets_osdf = col_subset_osdf.to_dataset_dict()
#
ds_osdf0 = dsets_osdf[dataset_key]
ds_https0 = dsets_https[dataset_key]
--> The keys in the returned dictionary of datasets are constructed as follows:
'variable.frequency.component.vertical_levels'
Loading...
Loading...
ds_osdf = ds_osdf0.PS
ds_https = ds_https0.PS
ds_httpsLoading...
%%time
ds_osdf.isel(member_id=0,time=0).plot()CPU times: user 3.81 s, sys: 5.02 s, total: 8.83 s
Wall time: 6.56 s

Data Access Speed tests¶
We will now test how long it takes to access data (via OSDF and https-only prrotocols) for various sizes using the above array
Prepare data subsets¶
ds_osdf_1Kb = ds_osdf.isel(lat=0,lon=0,member_id=0).isel(time=np.arange(130))
ds_https_1Kb = ds_https.isel(lat=0,lon=0,member_id=0).isel(time=np.arange(130))
#ds_https_1Kbds_osdf_1Mb = ds_osdf.isel(time=0).isel(member_id =1+ np.arange(3))
ds_https_1Mb = ds_https.isel(time=0).isel(member_id =1+ np.arange(3))
# ds_osdf_1Mbds_osdf_10Mb = ds_osdf.isel(member_id =4).isel(time=np.arange(24))
ds_https_10Mb = ds_https.isel(member_id =4).isel(time=np.arange(24))
# ds_osdf_10Mbds_osdf_100Mb = ds_osdf.isel(member_id =5).isel(time=np.arange(238))
ds_https_100Mb = ds_https.isel(member_id =5).isel(time=np.arange(238))
#ds_osdf_100Mbds_osdf_200Mb = ds_osdf.isel(member_id = 6)
ds_https_200Mb = ds_https.isel(member_id =6)
#ds_https_200Mb ds_osdf_400Mb = ds_osdf.isel(member_id = 7 +np.arange(2))
ds_https_400Mb = ds_https.isel(member_id =7 + np.arange(2))
# ds_https_400Mbds_osdf_600Mb = ds_osdf.isel(member_id = 10 +np.arange(3))
ds_https_600Mb = ds_https.isel(member_id = 10 + np.arange(3))
# ds_https_600Mbds_osdf_800Mb = ds_osdf.isel(member_id = 14 +np.arange(4))
ds_https_800Mb = ds_https.isel(member_id = 14 + np.arange(4))
# ds_https_800Mbds_osdf_1Gb = ds_osdf.isel(member_id = 19 + np.arange(6)).isel(time = np.arange(410))
ds_https_1Gb = ds_https.isel(member_id = 19 + np.arange(6)).isel(time = np.arange(410))
# ds_osdf_1Gb# ds_osdf_10Gb = ds_osdf.isel(member_id = 12 + np.arange(52))
# ds_https_10Gb = ds_https.isel(member_id = 12 + np.arange(52))
# ds_osdf_10GbAccess data and plot¶
# Define file path for CSV
csv_file_path = "ncar_benchmark_ap40_nodask.csv"ds_osdf_list = [ds_osdf_1Mb,ds_osdf_10Mb,ds_osdf_100Mb,ds_osdf_200Mb,ds_osdf_400Mb,
ds_osdf_600Mb,ds_osdf_800Mb,ds_osdf_1Gb]
ds_https_list = [ds_https_1Mb,ds_https_10Mb,ds_https_100Mb,ds_https_200Mb,ds_https_400Mb,
ds_https_600Mb,ds_https_800Mb,ds_https_1Gb]# Number of data access calls
num_calls = 8 # Modify this as needed
n_workers = 4 # Set this to your preferred number of workers# DiagnosticTimer class to keep track of runtimes
class DiagnosticTimer:
def __init__(self):
self.diagnostics = []
@contextmanager
def time(self, **kwargs):
tic = time.time()
yield
toc = time.time()
kwargs["runtime"] = toc - tic
self.diagnostics.append(kwargs)
def dataframe(self):
return pd.DataFrame(self.diagnostics)
# Initialize the DiagnosticTimer
diag_timer = DiagnosticTimer()# Function to check existing CSV file and determine missing runs
def load_existing_results():
if os.path.exists(csv_file_path):
# Load existing CSV into DataFrame
existing_df = pd.read_csv(csv_file_path)
else:
# Create an empty DataFrame if the file does not exist
existing_df = pd.DataFrame(columns=["dataset_size", "protocol", "call_number", "runtime", "MBps"])
return existing_df
def filter_missing_runs(datasets, protocol_name, existing_df):
# Convert dataset sizes to MB for checking, using a list of tuples
dataset_sizes_mb = [(dataset, dataset.nbytes / (1024 ** 2)) for dataset in datasets]
# Identify missing dataset sizes and calls
filtered_datasets = []
for dataset, dataset_size_mb in dataset_sizes_mb:
for call_num in range(1, num_calls + 1):
# Check if this dataset size and call number combination already exists
if not ((existing_df["dataset_size"] == dataset_size_mb) &
(existing_df["protocol"] == protocol_name) &
(existing_df["call_number"] == call_num)).any():
filtered_datasets.append((dataset, dataset_size_mb, call_num))
return filtered_datasetsdef benchmark_protocol(datasets, protocol_name, cluster=None):
existing_df = load_existing_results() # Load existing results as a checkpoint
# Filter for missing runs based on existing results
missing_runs = filter_missing_runs(datasets, protocol_name, existing_df)
diag_timer = DiagnosticTimer() # Initialize the diagnostic timer
# Process each dataset and call
for (dataset, dataset_size_mb, call_num) in missing_runs:
# Restart the Dask cluster if provided
if cluster is not None:
cluster.scale(0) # Scale down to release worker memory
cluster.scale(n_workers) # Scale up to required number of workers
client.wait_for_workers(n_workers) # Wait for workers to be ready
# Inform the start of processing for this dataset and call
print(f"Starting processing of dataset for protocol '{protocol_name}' (Size: {dataset_size_mb} MB) in call {call_num}")
# Only count the time for loading dataset into memory
dataset_copy = dataset.copy()
with diag_timer.time(dataset_size=dataset_size_mb, protocol=protocol_name, call_number=call_num):
dataset_copy.load() # Load the dataset into memory
# Convert the single call result to a DataFrame and add MBps column
call_result_df = diag_timer.dataframe().iloc[[-1]].copy() # Get the latest diagnostic entry
call_result_df["MBps"] = call_result_df["dataset_size"] / call_result_df["runtime"]
# Append this call's result to CSV
call_result_df.to_csv(csv_file_path, mode='a', header=not os.path.exists(csv_file_path), index=False)
print(f"Appended results for protocol '{protocol_name}', call {call_num} to '{csv_file_path}'")
# Print statement after finishing each call
print(f"Finished processing dataset for protocol '{protocol_name}' in call {call_num}")
# Run benchmark for each protocol
benchmark_protocol(ds_https_list, "HTTPS-only",cluster=None)
benchmark_protocol(ds_osdf_list, "OSDF-director",cluster=None)
# Convert diagnostics to a DataFrame for analysis
df_diagnostics = diag_timer.dataframe()
Starting processing of dataset for protocol 'HTTPS-only' (Size: 596.109375 MB) in call 7
Appended results for protocol 'HTTPS-only', call 7 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 7
Starting processing of dataset for protocol 'HTTPS-only' (Size: 596.109375 MB) in call 8
Appended results for protocol 'HTTPS-only', call 8 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 8
Starting processing of dataset for protocol 'HTTPS-only' (Size: 794.8125 MB) in call 1
Appended results for protocol 'HTTPS-only', call 1 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 1
Starting processing of dataset for protocol 'HTTPS-only' (Size: 794.8125 MB) in call 2
Appended results for protocol 'HTTPS-only', call 2 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 2
Starting processing of dataset for protocol 'HTTPS-only' (Size: 794.8125 MB) in call 3
Appended results for protocol 'HTTPS-only', call 3 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 3
Starting processing of dataset for protocol 'HTTPS-only' (Size: 794.8125 MB) in call 4
Appended results for protocol 'HTTPS-only', call 4 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 4
Starting processing of dataset for protocol 'HTTPS-only' (Size: 794.8125 MB) in call 5
Appended results for protocol 'HTTPS-only', call 5 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 5
Starting processing of dataset for protocol 'HTTPS-only' (Size: 794.8125 MB) in call 6
Appended results for protocol 'HTTPS-only', call 6 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 6
Starting processing of dataset for protocol 'HTTPS-only' (Size: 794.8125 MB) in call 7
Appended results for protocol 'HTTPS-only', call 7 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 7
Starting processing of dataset for protocol 'HTTPS-only' (Size: 794.8125 MB) in call 8
Appended results for protocol 'HTTPS-only', call 8 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 8
Starting processing of dataset for protocol 'HTTPS-only' (Size: 1037.8125 MB) in call 1
Appended results for protocol 'HTTPS-only', call 1 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 1
Starting processing of dataset for protocol 'HTTPS-only' (Size: 1037.8125 MB) in call 2
Appended results for protocol 'HTTPS-only', call 2 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 2
Starting processing of dataset for protocol 'HTTPS-only' (Size: 1037.8125 MB) in call 3
Appended results for protocol 'HTTPS-only', call 3 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 3
Starting processing of dataset for protocol 'HTTPS-only' (Size: 1037.8125 MB) in call 4
Appended results for protocol 'HTTPS-only', call 4 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 4
Starting processing of dataset for protocol 'HTTPS-only' (Size: 1037.8125 MB) in call 5
Appended results for protocol 'HTTPS-only', call 5 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 5
Starting processing of dataset for protocol 'HTTPS-only' (Size: 1037.8125 MB) in call 6
Appended results for protocol 'HTTPS-only', call 6 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 6
Starting processing of dataset for protocol 'HTTPS-only' (Size: 1037.8125 MB) in call 7
Appended results for protocol 'HTTPS-only', call 7 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 7
Starting processing of dataset for protocol 'HTTPS-only' (Size: 1037.8125 MB) in call 8
Appended results for protocol 'HTTPS-only', call 8 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'HTTPS-only' in call 8
Starting processing of dataset for protocol 'OSDF-director' (Size: 1.265625 MB) in call 1
Appended results for protocol 'OSDF-director', call 1 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 1
Starting processing of dataset for protocol 'OSDF-director' (Size: 1.265625 MB) in call 2
Appended results for protocol 'OSDF-director', call 2 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 2
Starting processing of dataset for protocol 'OSDF-director' (Size: 1.265625 MB) in call 3
Appended results for protocol 'OSDF-director', call 3 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 3
Starting processing of dataset for protocol 'OSDF-director' (Size: 1.265625 MB) in call 4
Appended results for protocol 'OSDF-director', call 4 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 4
Starting processing of dataset for protocol 'OSDF-director' (Size: 1.265625 MB) in call 5
Appended results for protocol 'OSDF-director', call 5 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 5
Starting processing of dataset for protocol 'OSDF-director' (Size: 1.265625 MB) in call 6
Appended results for protocol 'OSDF-director', call 6 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 6
Starting processing of dataset for protocol 'OSDF-director' (Size: 1.265625 MB) in call 7
Appended results for protocol 'OSDF-director', call 7 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 7
Starting processing of dataset for protocol 'OSDF-director' (Size: 1.265625 MB) in call 8
Appended results for protocol 'OSDF-director', call 8 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 8
Starting processing of dataset for protocol 'OSDF-director' (Size: 10.125 MB) in call 1
Appended results for protocol 'OSDF-director', call 1 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 1
Starting processing of dataset for protocol 'OSDF-director' (Size: 10.125 MB) in call 2
Appended results for protocol 'OSDF-director', call 2 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 2
Starting processing of dataset for protocol 'OSDF-director' (Size: 10.125 MB) in call 3
Appended results for protocol 'OSDF-director', call 3 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 3
Starting processing of dataset for protocol 'OSDF-director' (Size: 10.125 MB) in call 4
Appended results for protocol 'OSDF-director', call 4 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 4
Starting processing of dataset for protocol 'OSDF-director' (Size: 10.125 MB) in call 5
Appended results for protocol 'OSDF-director', call 5 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 5
Starting processing of dataset for protocol 'OSDF-director' (Size: 10.125 MB) in call 6
Appended results for protocol 'OSDF-director', call 6 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 6
Starting processing of dataset for protocol 'OSDF-director' (Size: 10.125 MB) in call 7
Appended results for protocol 'OSDF-director', call 7 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 7
Starting processing of dataset for protocol 'OSDF-director' (Size: 10.125 MB) in call 8
Appended results for protocol 'OSDF-director', call 8 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 8
Starting processing of dataset for protocol 'OSDF-director' (Size: 100.40625 MB) in call 1
Appended results for protocol 'OSDF-director', call 1 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 1
Starting processing of dataset for protocol 'OSDF-director' (Size: 100.40625 MB) in call 2
Appended results for protocol 'OSDF-director', call 2 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 2
Starting processing of dataset for protocol 'OSDF-director' (Size: 100.40625 MB) in call 3
Appended results for protocol 'OSDF-director', call 3 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 3
Starting processing of dataset for protocol 'OSDF-director' (Size: 100.40625 MB) in call 4
Appended results for protocol 'OSDF-director', call 4 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 4
Starting processing of dataset for protocol 'OSDF-director' (Size: 100.40625 MB) in call 5
Appended results for protocol 'OSDF-director', call 5 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 5
Starting processing of dataset for protocol 'OSDF-director' (Size: 100.40625 MB) in call 6
Appended results for protocol 'OSDF-director', call 6 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 6
Starting processing of dataset for protocol 'OSDF-director' (Size: 100.40625 MB) in call 7
Appended results for protocol 'OSDF-director', call 7 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 7
Starting processing of dataset for protocol 'OSDF-director' (Size: 100.40625 MB) in call 8
Appended results for protocol 'OSDF-director', call 8 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 8
Starting processing of dataset for protocol 'OSDF-director' (Size: 198.703125 MB) in call 1
Appended results for protocol 'OSDF-director', call 1 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 1
Starting processing of dataset for protocol 'OSDF-director' (Size: 198.703125 MB) in call 2
Appended results for protocol 'OSDF-director', call 2 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 2
Starting processing of dataset for protocol 'OSDF-director' (Size: 198.703125 MB) in call 3
Appended results for protocol 'OSDF-director', call 3 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 3
Starting processing of dataset for protocol 'OSDF-director' (Size: 198.703125 MB) in call 4
Appended results for protocol 'OSDF-director', call 4 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 4
Starting processing of dataset for protocol 'OSDF-director' (Size: 198.703125 MB) in call 5
Appended results for protocol 'OSDF-director', call 5 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 5
Starting processing of dataset for protocol 'OSDF-director' (Size: 198.703125 MB) in call 6
Appended results for protocol 'OSDF-director', call 6 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 6
Starting processing of dataset for protocol 'OSDF-director' (Size: 198.703125 MB) in call 7
Appended results for protocol 'OSDF-director', call 7 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 7
Starting processing of dataset for protocol 'OSDF-director' (Size: 198.703125 MB) in call 8
Appended results for protocol 'OSDF-director', call 8 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 8
Starting processing of dataset for protocol 'OSDF-director' (Size: 397.40625 MB) in call 1
Appended results for protocol 'OSDF-director', call 1 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 1
Starting processing of dataset for protocol 'OSDF-director' (Size: 397.40625 MB) in call 2
Appended results for protocol 'OSDF-director', call 2 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 2
Starting processing of dataset for protocol 'OSDF-director' (Size: 397.40625 MB) in call 3
Appended results for protocol 'OSDF-director', call 3 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 3
Starting processing of dataset for protocol 'OSDF-director' (Size: 397.40625 MB) in call 4
Appended results for protocol 'OSDF-director', call 4 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 4
Starting processing of dataset for protocol 'OSDF-director' (Size: 397.40625 MB) in call 5
Appended results for protocol 'OSDF-director', call 5 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 5
Starting processing of dataset for protocol 'OSDF-director' (Size: 397.40625 MB) in call 6
Appended results for protocol 'OSDF-director', call 6 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 6
Starting processing of dataset for protocol 'OSDF-director' (Size: 397.40625 MB) in call 7
Appended results for protocol 'OSDF-director', call 7 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 7
Starting processing of dataset for protocol 'OSDF-director' (Size: 397.40625 MB) in call 8
Appended results for protocol 'OSDF-director', call 8 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 8
Starting processing of dataset for protocol 'OSDF-director' (Size: 596.109375 MB) in call 1
Appended results for protocol 'OSDF-director', call 1 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 1
Starting processing of dataset for protocol 'OSDF-director' (Size: 596.109375 MB) in call 2
Appended results for protocol 'OSDF-director', call 2 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 2
Starting processing of dataset for protocol 'OSDF-director' (Size: 596.109375 MB) in call 3
Appended results for protocol 'OSDF-director', call 3 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 3
Starting processing of dataset for protocol 'OSDF-director' (Size: 596.109375 MB) in call 4
Appended results for protocol 'OSDF-director', call 4 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 4
Starting processing of dataset for protocol 'OSDF-director' (Size: 596.109375 MB) in call 5
Appended results for protocol 'OSDF-director', call 5 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 5
Starting processing of dataset for protocol 'OSDF-director' (Size: 596.109375 MB) in call 6
Appended results for protocol 'OSDF-director', call 6 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 6
Starting processing of dataset for protocol 'OSDF-director' (Size: 596.109375 MB) in call 7
Appended results for protocol 'OSDF-director', call 7 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 7
Starting processing of dataset for protocol 'OSDF-director' (Size: 596.109375 MB) in call 8
Appended results for protocol 'OSDF-director', call 8 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 8
Starting processing of dataset for protocol 'OSDF-director' (Size: 794.8125 MB) in call 1
Appended results for protocol 'OSDF-director', call 1 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 1
Starting processing of dataset for protocol 'OSDF-director' (Size: 794.8125 MB) in call 2
Appended results for protocol 'OSDF-director', call 2 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 2
Starting processing of dataset for protocol 'OSDF-director' (Size: 794.8125 MB) in call 3
Appended results for protocol 'OSDF-director', call 3 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 3
Starting processing of dataset for protocol 'OSDF-director' (Size: 794.8125 MB) in call 4
Appended results for protocol 'OSDF-director', call 4 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 4
Starting processing of dataset for protocol 'OSDF-director' (Size: 794.8125 MB) in call 5
Appended results for protocol 'OSDF-director', call 5 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 5
Starting processing of dataset for protocol 'OSDF-director' (Size: 794.8125 MB) in call 6
Appended results for protocol 'OSDF-director', call 6 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 6
Starting processing of dataset for protocol 'OSDF-director' (Size: 794.8125 MB) in call 7
Appended results for protocol 'OSDF-director', call 7 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 7
Starting processing of dataset for protocol 'OSDF-director' (Size: 794.8125 MB) in call 8
Appended results for protocol 'OSDF-director', call 8 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 8
Starting processing of dataset for protocol 'OSDF-director' (Size: 1037.8125 MB) in call 1
Appended results for protocol 'OSDF-director', call 1 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 1
Starting processing of dataset for protocol 'OSDF-director' (Size: 1037.8125 MB) in call 2
Appended results for protocol 'OSDF-director', call 2 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 2
Starting processing of dataset for protocol 'OSDF-director' (Size: 1037.8125 MB) in call 3
Appended results for protocol 'OSDF-director', call 3 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 3
Starting processing of dataset for protocol 'OSDF-director' (Size: 1037.8125 MB) in call 4
Appended results for protocol 'OSDF-director', call 4 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 4
Starting processing of dataset for protocol 'OSDF-director' (Size: 1037.8125 MB) in call 5
Appended results for protocol 'OSDF-director', call 5 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 5
Starting processing of dataset for protocol 'OSDF-director' (Size: 1037.8125 MB) in call 6
Appended results for protocol 'OSDF-director', call 6 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 6
Starting processing of dataset for protocol 'OSDF-director' (Size: 1037.8125 MB) in call 7
Appended results for protocol 'OSDF-director', call 7 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 7
Starting processing of dataset for protocol 'OSDF-director' (Size: 1037.8125 MB) in call 8
Appended results for protocol 'OSDF-director', call 8 to 'ncar_benchmark_ap40_nodask.csv'
Finished processing dataset for protocol 'OSDF-director' in call 8
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[29], line 9
6 df_diagnostics = diag_timer.dataframe()
8 # Calculate MB/s for each run
----> 9 df_diagnostics['MBps'] = df_diagnostics['dataset_size'] / df_diagnostics['runtime']
10 df_diagnostics
File ~/miniconda3/lib/python3.12/site-packages/pandas/core/frame.py:4102, in DataFrame.__getitem__(self, key)
4100 if self.columns.nlevels > 1:
4101 return self._getitem_multilevel(key)
-> 4102 indexer = self.columns.get_loc(key)
4103 if is_integer(indexer):
4104 indexer = [indexer]
File ~/miniconda3/lib/python3.12/site-packages/pandas/core/indexes/range.py:417, in RangeIndex.get_loc(self, key)
415 raise KeyError(key) from err
416 if isinstance(key, Hashable):
--> 417 raise KeyError(key)
418 self._check_indexing_error(key)
419 raise KeyError(key)
KeyError: 'dataset_size'# Calculate MB/s for each run
# df_diagnostics['MBps'] = df_diagnostics['dataset_size'] / df_diagnostics['runtime']
df_diagnostics = pd.read_csv(csv_file_path)
df_diagnosticsLoading...
# df_diagnostics = diag_timer.dataframe()
# df_diagnostics# Plotting MBps vs data size for each protocol and call type
# Define different alpha values for each protocol
alpha_values = {"HTTPS-only": 0.8, "OSDF-director": 0.6} # Adjust transparency as needed
marker_style = {"HTTPS-only": "o", "OSDF-director": "x"} # Define different markers for each protocol
edgecolors = {"HTTPS-only": "black", "OSDF-director": None}
#
# Define markers for each protocol
protocol_markers = {
"HTTPS-only": 'o', # Circle marker
"OSDF-director": 'x' # Cross marker
}
# Define alpha values for different access types
alpha_first_access = 0.8
alpha_subsequent_access = 0.5
fig, ax = plt.subplots(figsize=(10, 6))
for protocol in ["HTTPS-only", "OSDF-director"]:
# First access (call_number == 1)
first_access = df_diagnostics[(df_diagnostics['protocol'] == protocol) & (df_diagnostics['call_number'] == 1)]
ax.scatter(first_access['dataset_size'], first_access['MBps'],
label=f"{protocol} - First Access",
marker=protocol_markers[protocol],
alpha=alpha_first_access,
edgecolor=edgecolors[protocol])
# Subsequent access (call_number > 1)
subsequent_access = df_diagnostics[(df_diagnostics['protocol'] == protocol) & (df_diagnostics['call_number'] > 1)]
subsequent_access_avg = subsequent_access.groupby('dataset_size')['MBps'].mean()
# Add a slight jitter to dataset size to avoid overlapping
jitter = 0.1 * subsequent_access_avg.index.to_series().std() # Adjust jitter dynamically
jittered_index = subsequent_access_avg.index + np.random.uniform(-jitter, jitter, size=len(subsequent_access_avg.index))
ax.scatter(jittered_index, subsequent_access_avg.values,
label=f"{protocol} - Subsequent Access (Avg)",
marker=protocol_markers[protocol],
alpha=alpha_subsequent_access,
edgecolor=edgecolors[protocol])
# Customize plot appearance
ax.set_xlabel("Data Size (MB)")
ax.set_ylabel("Data Access Speed (MBps)")
ax.set_title("NCAR --> AP40, Dask: No, 8 requests")
ax.legend()
plt.show()
# Convert dataset size to categorical to control the order in the plot
#df_diagnostics['dataset_size'] = df_diagnostics['dataset_size'].astype("category")
df_diagnostics['dataset_size'] = df_diagnostics['dataset_size'].round(0).astype(int).astype("category")
# Set the order for dataset sizes to appear in ascending order
size_order = sorted(df_diagnostics['dataset_size'].unique())
# Create the box plot
plt.figure(figsize=(12, 6))
sns.boxplot(
data=df_diagnostics,
x="dataset_size",
y="MBps",
hue="protocol",
order=size_order
)
# Customize plot appearance
plt.xlabel("Data Size (MB)")
plt.ylabel("Data Access Speed (MBps)")
# plt.yscale('log')
plt.title("NCAR --> AP40, Dask: No, 8 requests")
plt.legend(title="Protocol")
plt.show()
# Convert dataset size to categorical to control the order in the plot
#df_diagnostics['dataset_size'] = df_diagnostics['dataset_size'].astype("category")
df_diagnostics['dataset_size'] = df_diagnostics['dataset_size'].round(0).astype(int).astype("category")
# Set the order for dataset sizes to appear in ascending order
size_order = sorted(df_diagnostics['dataset_size'].unique())
# Create the box plot
plt.figure(figsize=(12, 6))
sns.boxplot(
data=df_diagnostics,
x="dataset_size",
y="MBps",
hue="protocol",
order=size_order,
whis = [5,95]
)
# Customize plot appearance
plt.xlabel("Data Size (MB)")
plt.ylabel("Data Access Speed (MBps)")
# plt.yscale('log')
plt.title("NCAR --> AP40, Dask: No, 8 requests")
plt.legend(title="Protocol")
plt.show()