Analytical uncertainty at CO2 measurement stations

%load_ext autoreload
%autoreload 2
from collections import OrderedDict
import numpy as np
import pandas as pd
import xarray as xr
xr.set_options(display_style='text')

import matplotlib.pyplot as plt

import figure_panels
import obs_surface
import util

Load the monthly data

Specify the records at each station to examine.

stn_records = dict(
    SPO=[
        'SPO_NOAA_insitu_CO2',
        'SPO_NOAA_flask_CO2',
        'SPO_SIO_O2_flask_CO2',
        'SPO_CSIRO_flask_CO2',
        'SPO_SIO_CDK_flask_CO2',    
    ],
    CGO=[
        'CGO_CSIRO_insitu_CO2',
        'CGO_NOAA_flask_CO2',
        'CGO_CSIRO_flask_CO2',
        'CGO_SIO_O2_flask_CO2',
    ],
)
record_list = [ri for r in stn_records.values() for ri in r]
  1. Read txt file containing the station data.

  2. Make all records into a dataset for later plotting.

  3. Compute mean across records at each station and generate “anomaly” columns.

# generate column names for the "anomaly" columns (minus station mean)
stn_records_a = dict()
for stn, records in stn_records.items():
    stn_records_a[stn] = [f'{rec}_mmedian' for rec in records]
    
# read monthly data
file = obs_surface.data_files('CO2', 'obs')
df = obs_surface.read_stndata(file)

# get dataset
stninfo = obs_surface.get_stn_info('CO2')
ds = obs_surface.to_dataset(
    stninfo, df, 'CO2', 
    plot_coverage=False, 
    dropna=False, 
    unique_stn=False, 
    gap_fill=False).to_dataset()

# keep only columns from stations specified above
df = df[filter(lambda s: '_CO2' in s and 
               any(s in records for records in stn_records.values()) 
               or '_CO2' not in s, df.columns)]

# compute station median and add as new columns
for (stn, arecords), records in zip(stn_records_a.items(), stn_records.values()):
    df[stn] = df[records].median(axis=1)
    df[arecords] = df[records].sub(df[stn], axis=0)
df
year month day year_frac polar_year SPO_SIO_CDK_flask_CO2 SPO_NOAA_insitu_CO2 CGO_NOAA_flask_CO2 CGO_CSIRO_insitu_CO2 CGO_CSIRO_flask_CO2 ... SPO_NOAA_insitu_CO2_mmedian SPO_NOAA_flask_CO2_mmedian SPO_SIO_O2_flask_CO2_mmedian SPO_CSIRO_flask_CO2_mmedian SPO_SIO_CDK_flask_CO2_mmedian CGO CGO_CSIRO_insitu_CO2_mmedian CGO_NOAA_flask_CO2_mmedian CGO_CSIRO_flask_CO2_mmedian CGO_SIO_O2_flask_CO2_mmedian
date
1998-12-15 1998 12 15.5 1998.956164 1999 365.0013 365.0870 364.9250 NaN 364.8809 ... -0.00205 0.04095 0.00205 NaN -0.08775 364.88090 NaN 0.04410 0.00000 -0.12380
1999-01-15 1999 1 15.5 1999.041096 1999 364.9615 365.0233 364.9575 NaN 364.9474 ... -0.06450 0.09930 0.00000 0.1202 -0.12630 364.95750 NaN 0.00000 -0.01010 0.02390
1999-02-14 1999 2 14.0 1999.123288 1999 364.7820 364.9477 365.0900 NaN 365.1991 ... -0.01380 0.08050 0.00000 0.1495 -0.17950 365.09000 NaN 0.00000 0.10910 -0.05870
1999-03-15 1999 3 15.5 1999.202740 1999 364.8418 364.8559 364.8880 NaN 364.8469 ... -0.01580 0.20000 0.00000 0.1283 -0.02990 364.84690 NaN 0.04110 0.00000 -0.15130
1999-04-15 1999 4 15.0 1999.287671 1999 365.0213 365.0247 364.9900 NaN 365.0387 ... 0.00000 0.11780 0.03150 -0.0907 -0.00340 364.99000 NaN 0.00000 0.04870 -0.08340
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2019-10-15 2019 10 15.5 2019.789041 2020 408.8537 408.8633 408.6825 408.8186 408.6942 ... 0.00960 0.00460 -0.13460 -0.1977 0.00000 408.68835 0.13025 -0.00585 0.00585 -0.20365
2019-11-15 2019 11 15.0 2019.873973 2020 408.8636 408.9070 408.8037 408.7658 408.7523 ... 0.04340 0.01640 -0.10470 -0.0246 0.00000 408.75905 0.00675 0.04465 -0.00675 -0.14465
2019-12-15 2019 12 15.5 2019.956164 2020 409.0132 408.8532 408.5017 408.5183 408.6873 ... -0.08780 0.05650 -0.08230 0.0000 0.07220 408.51000 0.00830 -0.00830 0.17730 -0.08510
2020-01-15 2020 1 15.5 2020.040984 2020 408.8138 NaN NaN NaN NaN ... NaN NaN -0.04985 NaN 0.04985 408.02610 NaN NaN NaN 0.00000
2020-02-14 2020 2 14.5 2020.122951 2020 408.7241 NaN NaN NaN NaN ... NaN NaN -0.18450 NaN 0.18450 408.19060 NaN NaN NaN 0.00000

255 rows × 25 columns

Compute the long-term standard deviation of monthly records

print('Long-term std dev of monthly records:')
for stn, records in stn_records_a.items():
    print(f'{stn}: {np.nanstd(df[records].values):0.4f}')
Long-term std dev of monthly records:
SPO: 0.1269
CGO: 0.1077

Compute seasonal means

Loop over the seasons and generate seasonal averages.

Require at least 2 months to define a season.

seasons = OrderedDict([
    ('djf', [12, 1, 2]),
    ('mam', [3, 4, 5]),
    ('jja', [6, 7, 8]),
    ('son', [9, 10, 11]),
])
dfs_seasons = {}
for season, months in seasons.items():
    if season == 'djf':
        groupby_col = 'polar_year'
        drop_cols = ['month', 'day', 'year']        
    else:
        groupby_col = 'year'
        drop_cols = ['month', 'day', 'polar_year']

    grouped = df.loc[df.month.isin(months)].groupby(groupby_col)
    
    dfs_seasons[season] = grouped.mean().where(grouped.count()>=2)
    dfs_seasons[season] = dfs_seasons[season].set_index('year_frac')
    dfs_seasons[season] = dfs_seasons[season].drop(drop_cols, axis=1)
    
dfs_seasons['son']
SPO_SIO_CDK_flask_CO2 SPO_NOAA_insitu_CO2 CGO_NOAA_flask_CO2 CGO_CSIRO_insitu_CO2 CGO_CSIRO_flask_CO2 SPO_CSIRO_flask_CO2 CGO_SIO_O2_flask_CO2 SPO_SIO_O2_flask_CO2 SPO_NOAA_flask_CO2 SPO SPO_NOAA_insitu_CO2_mmedian SPO_NOAA_flask_CO2_mmedian SPO_SIO_O2_flask_CO2_mmedian SPO_CSIRO_flask_CO2_mmedian SPO_SIO_CDK_flask_CO2_mmedian CGO CGO_CSIRO_insitu_CO2_mmedian CGO_NOAA_flask_CO2_mmedian CGO_CSIRO_flask_CO2_mmedian CGO_SIO_O2_flask_CO2_mmedian
year_frac
1999.789954 366.546833 366.652033 366.485433 NaN 366.551133 366.622667 366.157433 366.636567 366.635833 366.622467 0.029567 0.013367 0.014100 0.000200 -0.075633 366.485433 NaN 0.000000 0.065700 -0.328000
2000.790528 367.783267 367.886567 367.676267 NaN 367.733767 367.693000 367.617033 367.929467 367.738800 367.781900 0.104667 -0.043100 0.147567 -0.057400 0.001367 367.676267 NaN 0.000000 0.057500 -0.059233
2001.789954 369.528167 369.798500 369.552500 NaN 369.554000 369.600500 369.451733 369.716500 369.689367 369.693767 0.104733 -0.004400 0.022733 -0.093267 -0.165600 369.536233 NaN 0.016267 0.017767 -0.084500
2002.789954 371.808200 371.996267 371.783767 NaN 371.837167 371.861000 371.803200 371.917867 371.904767 371.876567 0.119700 0.028200 0.041300 -0.015567 -0.068367 371.800600 NaN -0.016833 0.036567 0.002600
2003.789954 373.815700 374.085800 373.887233 NaN 373.939467 373.976000 373.765833 374.045000 374.006367 374.013700 0.072100 -0.007333 0.031300 -0.037700 -0.198000 373.891167 NaN -0.003933 0.048300 -0.125333
2004.790528 375.593833 375.713767 375.600400 375.657567 375.692033 375.832500 375.218267 375.909600 375.782367 375.782883 -0.069117 -0.000517 0.126717 0.029450 -0.189050 375.616933 0.040633 -0.016533 0.075100 -0.398667
2005.789954 377.834000 377.916767 377.713633 377.874400 377.910333 377.801500 377.662800 377.955333 377.895200 377.896867 0.019900 -0.001667 0.058467 -0.094300 -0.062867 377.800133 0.074267 -0.086500 0.110200 -0.137333
2006.789954 379.322967 379.542000 379.468500 379.442533 379.405733 379.590500 379.266467 379.558933 379.597367 379.546483 -0.004483 0.050883 0.012450 0.000000 -0.223517 379.406117 0.036417 0.062383 -0.000383 -0.139650
2007.789954 381.619633 381.748333 381.553600 381.721733 381.596667 NaN 381.519933 381.835667 381.868867 381.775933 -0.027600 0.092933 0.059733 NaN -0.156300 381.586500 0.135233 -0.032900 0.010167 -0.066567
2008.790528 383.354567 383.635400 383.582100 383.584767 383.613233 NaN 383.522433 383.575567 383.688867 383.617683 0.017717 0.071183 -0.042117 NaN -0.263117 383.579400 0.005367 0.002700 0.033833 -0.056967
2009.789954 384.973200 385.131367 385.073333 385.161800 385.063833 NaN 385.195867 385.059600 385.171400 385.071583 0.059783 0.099817 -0.011983 NaN -0.098383 385.086333 0.075467 -0.013000 -0.022500 0.109533
2010.789954 387.313067 387.412467 387.400000 387.370700 387.334433 387.382000 387.246600 387.349067 387.444133 387.388483 0.023983 0.055650 -0.039417 -0.019850 -0.075417 387.343083 0.027617 0.041525 -0.008650 -0.068775
2011.789954 388.991500 389.038333 388.994467 388.987867 388.984233 388.916333 388.850267 388.988167 389.018600 389.004767 0.033567 0.013833 -0.016600 -0.088433 -0.013267 388.958333 0.029533 0.036133 0.025900 -0.108067
2012.790528 391.231667 391.335700 391.220400 391.239667 391.290533 NaN 391.218333 391.239967 391.303600 391.278783 0.056917 0.024817 -0.038817 NaN -0.047117 391.238433 0.001233 -0.018033 0.052100 -0.020100
2013.789954 393.986967 393.999967 393.814733 393.928567 393.901033 394.091000 393.837400 393.902167 394.021100 394.005300 -0.005333 0.015800 -0.103133 0.085700 -0.018333 393.865400 0.063167 -0.050667 0.035633 -0.028000
2014.789954 395.954567 395.908433 395.725000 395.804000 395.841600 396.073667 395.490933 395.829900 395.935800 395.933733 -0.025300 0.002067 -0.103833 0.139933 0.020833 395.754033 0.049967 -0.029033 0.087567 -0.263100
2015.789954 398.563600 398.430967 398.469833 398.406500 398.446767 398.569667 398.309333 398.560333 398.534433 398.537433 -0.106467 -0.003000 0.022900 0.032233 0.026167 398.413400 -0.006900 0.056433 0.033367 -0.104067
2016.790528 401.810833 401.629667 401.493333 401.538233 401.521000 401.779333 401.320600 401.651300 401.664167 401.696833 -0.067167 -0.032667 -0.045533 0.082500 0.114000 401.500933 0.037300 -0.007600 0.020067 -0.180333
2017.789954 403.589000 403.638500 403.447533 403.673933 403.625500 403.517000 403.538600 403.545800 403.665267 403.617300 0.021200 0.047967 -0.071500 -0.100300 0.000000 403.594267 0.079667 -0.146733 0.031233 -0.055667
2018.789954 406.291133 406.126200 406.021667 405.938500 405.995100 406.003000 405.752733 405.997000 406.063500 406.046200 0.080000 0.017300 -0.049200 -0.043200 0.244933 405.933317 0.005183 0.088350 0.061783 -0.180583
2019.789954 408.820433 408.828767 408.737400 408.761267 408.726200 408.712333 408.527933 408.685833 408.813867 408.806867 0.021900 0.007000 -0.121033 -0.094533 0.013567 408.720033 0.041233 0.017367 0.006167 -0.192100

Long-term seasonal means

# make list of *all* records
records = [record for stn, records in stn_records_a.items() for record in records]

# dimension dictionary with lists
df_mean = {'season': list(seasons.keys())}
df_mean.update({r: [] for r in records})

# loop over seasons, compute long-term mean
for season in seasons:
    for r in records:
        df_mean[r].append(dfs_seasons[season][r].mean(axis=0))
df_mean = pd.DataFrame(df_mean).set_index('season')
df_mean
SPO_NOAA_insitu_CO2_mmedian SPO_NOAA_flask_CO2_mmedian SPO_SIO_O2_flask_CO2_mmedian SPO_CSIRO_flask_CO2_mmedian SPO_SIO_CDK_flask_CO2_mmedian CGO_CSIRO_insitu_CO2_mmedian CGO_NOAA_flask_CO2_mmedian CGO_CSIRO_flask_CO2_mmedian CGO_SIO_O2_flask_CO2_mmedian
season
djf -0.017290 0.024567 -0.004355 0.106728 -0.056947 0.004599 0.027571 0.007436 -0.068187
mam 0.039309 0.006748 -0.035228 -0.040449 -0.001835 0.061413 -0.005066 -0.009310 -0.039366
jja 0.067344 -0.003321 -0.036373 -0.078673 0.015745 0.032955 0.004226 -0.000957 -0.056112
son 0.021917 0.021340 -0.005043 -0.016149 -0.058767 0.043461 -0.004791 0.037020 -0.118329

SD of the long-term mean at each station

error = {'season': list(seasons.keys())}
error.update({stn: [] for stn in stn_records.keys()})
for stn, arecords in stn_records_a.items():
    for season in seasons:
        error[stn].append(df_mean.loc[season][arecords].std(ddof=1))

df_error = pd.DataFrame(error).set_index('season')
df_error
SPO CGO
season
djf 0.061237 0.041960
mam 0.032685 0.042500
jja 0.054904 0.037213
son 0.033189 0.074899

Median across records at SPO and CGO

ds_mSPO_med = (ds.sel(record=stn_records['SPO']) - ds.sel(record=stn_records['SPO']).median('record', skipna=True))
ds_mCGO_med = (ds.sel(record=stn_records['CGO']) - ds.sel(record=stn_records['CGO']).median('record', skipna=True))
ds_m_med = xr.concat((ds_mSPO_med, ds_mCGO_med), 'record')
ds_m_med.record
<xarray.DataArray 'record' (record: 9)>
array(['SPO_NOAA_insitu_CO2', 'SPO_NOAA_flask_CO2', 'SPO_SIO_O2_flask_CO2',
       'SPO_CSIRO_flask_CO2', 'SPO_SIO_CDK_flask_CO2', 'CGO_CSIRO_insitu_CO2',
       'CGO_NOAA_flask_CO2', 'CGO_CSIRO_flask_CO2', 'CGO_SIO_O2_flask_CO2'],
      dtype=object)
Coordinates:
  * record       (record) object 'SPO_NOAA_insitu_CO2' ... 'CGO_SIO_O2_flask_...
    institution  (record) object 'NOAA' 'NOAA' 'SIO_O2' ... 'CSIRO' 'SIO_O2'
    lat          (record) float64 -89.98 -89.98 -89.98 ... -40.68 -40.68 -40.68
    lon          (record) float64 -24.8 -24.8 -24.8 -24.8 ... 144.7 144.7 144.7
    stncode      (record) object 'SPO' 'SPO' 'SPO' 'SPO' ... 'CGO' 'CGO' 'CGO'

Seasonal means of the medians

ds_djf = util.ann_mean(ds_m_med, season='DJF', time_bnds_varname=None, n_req=2,)
ds_jja = util.ann_mean(ds_m_med, season='JJA', time_bnds_varname=None, n_req=2,)

ds_djf.time
<xarray.DataArray 'time' (time: 22)>
array([1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020])
Coordinates:
  * time     (time) int64 1999 2000 2001 2002 2003 ... 2016 2017 2018 2019 2020

Time series of SPO and CGO records

fig = plt.figure(figsize=(6, 8))

ncol = 2
nrow = 2
fig, axs = plt.subplots(nrow, ncol, figsize=(6.5*ncol, 4*nrow))   

marker_spec = figure_panels.marker_spec_co2_inst()

labels = dict(
    SPO_NOAA_insitu_CO2='NOAA in situ',
    SPO_NOAA_flask_CO2='NOAA flasks',
    SPO_SIO_O2_flask_CO2='SIO O$_2$ Program flasks',
    SPO_SIO_CDK_flask_CO2='SIO CO$_2$ Program flasks',
    SPO_CSIRO_flask_CO2='CSIRO flasks',
    CGO_NOAA_flask_CO2='NOAA flasks',
    CGO_SIO_O2_flask_CO2='SIO O$_2$ Program flasks',
    CGO_CSIRO_flask_CO2='CSIRO flasks',
    CGO_CSIRO_insitu_CO2='CSIRO in situ',
)

def ammendments(ax):
    ax.axhline(0, color='k', lw=1);
    ax.set_ylabel('$\Delta$CO$_2$ [ppm]')    
    ax.set_xticks(np.arange(1998, 2022, 2));
    ax.set_xlim([1998, 2022])
    ax.set_ylim([-0.73, 0.63]);

plotted_elements = []
legend_elements = []
    
    
dset = ds_djf.CO2.sel(record=record_list).copy()
# for stn in ['SPO', 'CGO']:
#     idx = np.where(dset.stncode == stn)[0]
#     dset[:, idx] = dset[:, idx]


x = dset.time + util.season_yearfrac['DJF']
for i, record in enumerate(dset.record.values):    
    ax = axs[0, 0] if 'SPO' in record else axs[1, 0]
    y = dset.sel(record=record)
    ls = '--' if 'insitu' in record else '-'
        
    inst = str(dset.sel(record=record).institution.values)
    p = ax.plot(x, y, linestyle=ls, label=labels[record], **marker_spec[inst])
    if labels[record] not in plotted_elements:
        legend_elements.append(p[0])
        plotted_elements.append(labels[record])


dset = ds_jja.CO2.sel(record=record_list).copy()
# for stn in ['SPO', 'CGO']:
#     idx = np.where(dset.stncode == stn)[0]
#     dset[:, idx] = dset[:, idx]


x = dset.time + util.season_yearfrac['JJA']
for i, record in enumerate(dset.record.values):
    ax = axs[0, 1] if 'SPO' in record else axs[1, 1]
    y = dset.sel(record=record)
    ls = '--' if 'insitu' in record else '-'
    inst = str(dset.sel(record=record).institution.values)    
    p = ax.plot(x, y, linestyle=ls, label=labels[record], **marker_spec[inst])
    if labels[record] not in plotted_elements:
        legend_elements.append(p[0])
        plotted_elements.append(labels[record])

for ax in axs.ravel():
    ammendments(ax)

    
xoff = 1
yoff = 0.05
str_text = f'$\sigma$ = {df_error.loc["djf"].SPO:0.2f} ppm'
axs[0, 0].text(ax.get_xlim()[0]+xoff, ax.get_ylim()[0]+yoff, 
        str_text, fontsize=12, fontweight='bold',
       )

str_text = f'$\sigma$ = {df_error.loc["djf"].CGO:0.2f} ppm'
axs[1, 0].text(ax.get_xlim()[0]+xoff, ax.get_ylim()[0]+yoff, 
        str_text, fontsize=12, fontweight='bold',
       )

str_text = f'$\sigma$ = {df_error.loc["jja"].SPO:0.2f} ppm'
axs[0, 1].text(ax.get_xlim()[0]+xoff, ax.get_ylim()[0]+yoff, 
        str_text, fontsize=12, fontweight='bold',
       )

str_text = f'$\sigma$ = {df_error.loc["jja"].CGO:0.2f} ppm'
axs[1, 1].text(ax.get_xlim()[0]+xoff, ax.get_ylim()[0]+yoff, 
        str_text, fontsize=12, fontweight='bold',
       )    
          
axs[0, 0].set_title('DJF SPO records, SPO median subtracted')
axs[1, 0].set_title('DJF CGO records, CGO median subtracted')


axs[0, 1].set_title('JJA SPO records, SPO median subtracted')
axs[1, 1].set_title('JJA CGO records, CGO median subtracted')

util.label_plots(fig, [ax for ax in axs.ravel()])

axs[0, 0].legend(handles=legend_elements, ncol=2, loc=(0.02, 0.78), frameon=False, labelspacing=0.1)

util.savefig('SPO-CGO-record-discrepancies')
<Figure size 432x576 with 0 Axes>
_images/obs-surface-error_20_1.png