Analytical uncertainty at CO₂ measurement stations¶

%load_ext autoreload
%autoreload 2

from collections import OrderedDict
import numpy as np
import pandas as pd
import xarray as xr
xr.set_options(display_style='text')

import matplotlib.pyplot as plt

import figure_panels
import obs_surface
import util

Load the monthly data¶

Specify the records at each station to examine.

stn_records = dict(
    SPO=[
        'SPO_NOAA_insitu_CO2',
        'SPO_NOAA_flask_CO2',
        'SPO_SIO_O2_flask_CO2',
        'SPO_CSIRO_flask_CO2',
        'SPO_SIO_CDK_flask_CO2',    
    ],
    CGO=[
        'CGO_CSIRO_insitu_CO2',
        'CGO_NOAA_flask_CO2',
        'CGO_CSIRO_flask_CO2',
        'CGO_SIO_O2_flask_CO2',
    ],
)
record_list = [ri for r in stn_records.values() for ri in r]

Read txt file containing the station data.
Make all records into a dataset for later plotting.
Compute mean across records at each station and generate “anomaly” columns.

# generate column names for the "anomaly" columns (minus station mean)
stn_records_a = dict()
for stn, records in stn_records.items():
    stn_records_a[stn] = [f'{rec}_mmedian' for rec in records]
    
# read monthly data
file = obs_surface.data_files('CO2', 'obs')
df = obs_surface.read_stndata(file)

# get dataset
stninfo = obs_surface.get_stn_info('CO2')
ds = obs_surface.to_dataset(
    stninfo, df, 'CO2', 
    plot_coverage=False, 
    dropna=False, 
    unique_stn=False, 
    gap_fill=False).to_dataset()

# keep only columns from stations specified above
df = df[filter(lambda s: '_CO2' in s and 
               any(s in records for records in stn_records.values()) 
               or '_CO2' not in s, df.columns)]

# compute station median and add as new columns
for (stn, arecords), records in zip(stn_records_a.items(), stn_records.values()):
    df[stn] = df[records].median(axis=1)
    df[arecords] = df[records].sub(df[stn], axis=0)
df

	year	month	day	year_frac	polar_year	SPO_SIO_CDK_flask_CO2	SPO_NOAA_insitu_CO2	CGO_NOAA_flask_CO2	CGO_CSIRO_insitu_CO2	CGO_CSIRO_flask_CO2	...	SPO_NOAA_insitu_CO2_mmedian	SPO_NOAA_flask_CO2_mmedian	SPO_SIO_O2_flask_CO2_mmedian	SPO_CSIRO_flask_CO2_mmedian	SPO_SIO_CDK_flask_CO2_mmedian	CGO	CGO_CSIRO_insitu_CO2_mmedian	CGO_NOAA_flask_CO2_mmedian	CGO_CSIRO_flask_CO2_mmedian	CGO_SIO_O2_flask_CO2_mmedian
date
1998-12-15	1998	12	15.5	1998.956164	1999	365.0013	365.0870	364.9250	NaN	364.8809	...	-0.00205	0.04095	0.00205	NaN	-0.08775	364.88090	NaN	0.04410	0.00000	-0.12380
1999-01-15	1999	1	15.5	1999.041096	1999	364.9615	365.0233	364.9575	NaN	364.9474	...	-0.06450	0.09930	0.00000	0.1202	-0.12630	364.95750	NaN	0.00000	-0.01010	0.02390
1999-02-14	1999	2	14.0	1999.123288	1999	364.7820	364.9477	365.0900	NaN	365.1991	...	-0.01380	0.08050	0.00000	0.1495	-0.17950	365.09000	NaN	0.00000	0.10910	-0.05870
1999-03-15	1999	3	15.5	1999.202740	1999	364.8418	364.8559	364.8880	NaN	364.8469	...	-0.01580	0.20000	0.00000	0.1283	-0.02990	364.84690	NaN	0.04110	0.00000	-0.15130
1999-04-15	1999	4	15.0	1999.287671	1999	365.0213	365.0247	364.9900	NaN	365.0387	...	0.00000	0.11780	0.03150	-0.0907	-0.00340	364.99000	NaN	0.00000	0.04870	-0.08340
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2019-10-15	2019	10	15.5	2019.789041	2020	408.8537	408.8633	408.6825	408.8186	408.6942	...	0.00960	0.00460	-0.13460	-0.1977	0.00000	408.68835	0.13025	-0.00585	0.00585	-0.20365
2019-11-15	2019	11	15.0	2019.873973	2020	408.8636	408.9070	408.8037	408.7658	408.7523	...	0.04340	0.01640	-0.10470	-0.0246	0.00000	408.75905	0.00675	0.04465	-0.00675	-0.14465
2019-12-15	2019	12	15.5	2019.956164	2020	409.0132	408.8532	408.5017	408.5183	408.6873	...	-0.08780	0.05650	-0.08230	0.0000	0.07220	408.51000	0.00830	-0.00830	0.17730	-0.08510
2020-01-15	2020	1	15.5	2020.040984	2020	408.8138	NaN	NaN	NaN	NaN	...	NaN	NaN	-0.04985	NaN	0.04985	408.02610	NaN	NaN	NaN	0.00000
2020-02-14	2020	2	14.5	2020.122951	2020	408.7241	NaN	NaN	NaN	NaN	...	NaN	NaN	-0.18450	NaN	0.18450	408.19060	NaN	NaN	NaN	0.00000

255 rows × 25 columns

Compute the long-term standard deviation of monthly records¶

print('Long-term std dev of monthly records:')
for stn, records in stn_records_a.items():
    print(f'{stn}: {np.nanstd(df[records].values):0.4f}')

Long-term std dev of monthly records:
SPO: 0.1269
CGO: 0.1077

Compute seasonal means¶

Loop over the seasons and generate seasonal averages.

Require at least 2 months to define a season.

seasons = OrderedDict([
    ('djf', [12, 1, 2]),
    ('mam', [3, 4, 5]),
    ('jja', [6, 7, 8]),
    ('son', [9, 10, 11]),
])
dfs_seasons = {}
for season, months in seasons.items():
    if season == 'djf':
        groupby_col = 'polar_year'
        drop_cols = ['month', 'day', 'year']        
    else:
        groupby_col = 'year'
        drop_cols = ['month', 'day', 'polar_year']

    grouped = df.loc[df.month.isin(months)].groupby(groupby_col)
    
    dfs_seasons[season] = grouped.mean().where(grouped.count()>=2)
    dfs_seasons[season] = dfs_seasons[season].set_index('year_frac')
    dfs_seasons[season] = dfs_seasons[season].drop(drop_cols, axis=1)
    
dfs_seasons['son']

	SPO_SIO_CDK_flask_CO2	SPO_NOAA_insitu_CO2	CGO_NOAA_flask_CO2	CGO_CSIRO_insitu_CO2	CGO_CSIRO_flask_CO2	SPO_CSIRO_flask_CO2	CGO_SIO_O2_flask_CO2	SPO_SIO_O2_flask_CO2	SPO_NOAA_flask_CO2	SPO	SPO_NOAA_insitu_CO2_mmedian	SPO_NOAA_flask_CO2_mmedian	SPO_SIO_O2_flask_CO2_mmedian	SPO_CSIRO_flask_CO2_mmedian	SPO_SIO_CDK_flask_CO2_mmedian	CGO	CGO_CSIRO_insitu_CO2_mmedian	CGO_NOAA_flask_CO2_mmedian	CGO_CSIRO_flask_CO2_mmedian	CGO_SIO_O2_flask_CO2_mmedian
year_frac
1999.789954	366.546833	366.652033	366.485433	NaN	366.551133	366.622667	366.157433	366.636567	366.635833	366.622467	0.029567	0.013367	0.014100	0.000200	-0.075633	366.485433	NaN	0.000000	0.065700	-0.328000
2000.790528	367.783267	367.886567	367.676267	NaN	367.733767	367.693000	367.617033	367.929467	367.738800	367.781900	0.104667	-0.043100	0.147567	-0.057400	0.001367	367.676267	NaN	0.000000	0.057500	-0.059233
2001.789954	369.528167	369.798500	369.552500	NaN	369.554000	369.600500	369.451733	369.716500	369.689367	369.693767	0.104733	-0.004400	0.022733	-0.093267	-0.165600	369.536233	NaN	0.016267	0.017767	-0.084500
2002.789954	371.808200	371.996267	371.783767	NaN	371.837167	371.861000	371.803200	371.917867	371.904767	371.876567	0.119700	0.028200	0.041300	-0.015567	-0.068367	371.800600	NaN	-0.016833	0.036567	0.002600
2003.789954	373.815700	374.085800	373.887233	NaN	373.939467	373.976000	373.765833	374.045000	374.006367	374.013700	0.072100	-0.007333	0.031300	-0.037700	-0.198000	373.891167	NaN	-0.003933	0.048300	-0.125333
2004.790528	375.593833	375.713767	375.600400	375.657567	375.692033	375.832500	375.218267	375.909600	375.782367	375.782883	-0.069117	-0.000517	0.126717	0.029450	-0.189050	375.616933	0.040633	-0.016533	0.075100	-0.398667
2005.789954	377.834000	377.916767	377.713633	377.874400	377.910333	377.801500	377.662800	377.955333	377.895200	377.896867	0.019900	-0.001667	0.058467	-0.094300	-0.062867	377.800133	0.074267	-0.086500	0.110200	-0.137333
2006.789954	379.322967	379.542000	379.468500	379.442533	379.405733	379.590500	379.266467	379.558933	379.597367	379.546483	-0.004483	0.050883	0.012450	0.000000	-0.223517	379.406117	0.036417	0.062383	-0.000383	-0.139650
2007.789954	381.619633	381.748333	381.553600	381.721733	381.596667	NaN	381.519933	381.835667	381.868867	381.775933	-0.027600	0.092933	0.059733	NaN	-0.156300	381.586500	0.135233	-0.032900	0.010167	-0.066567
2008.790528	383.354567	383.635400	383.582100	383.584767	383.613233	NaN	383.522433	383.575567	383.688867	383.617683	0.017717	0.071183	-0.042117	NaN	-0.263117	383.579400	0.005367	0.002700	0.033833	-0.056967
2009.789954	384.973200	385.131367	385.073333	385.161800	385.063833	NaN	385.195867	385.059600	385.171400	385.071583	0.059783	0.099817	-0.011983	NaN	-0.098383	385.086333	0.075467	-0.013000	-0.022500	0.109533
2010.789954	387.313067	387.412467	387.400000	387.370700	387.334433	387.382000	387.246600	387.349067	387.444133	387.388483	0.023983	0.055650	-0.039417	-0.019850	-0.075417	387.343083	0.027617	0.041525	-0.008650	-0.068775
2011.789954	388.991500	389.038333	388.994467	388.987867	388.984233	388.916333	388.850267	388.988167	389.018600	389.004767	0.033567	0.013833	-0.016600	-0.088433	-0.013267	388.958333	0.029533	0.036133	0.025900	-0.108067
2012.790528	391.231667	391.335700	391.220400	391.239667	391.290533	NaN	391.218333	391.239967	391.303600	391.278783	0.056917	0.024817	-0.038817	NaN	-0.047117	391.238433	0.001233	-0.018033	0.052100	-0.020100
2013.789954	393.986967	393.999967	393.814733	393.928567	393.901033	394.091000	393.837400	393.902167	394.021100	394.005300	-0.005333	0.015800	-0.103133	0.085700	-0.018333	393.865400	0.063167	-0.050667	0.035633	-0.028000
2014.789954	395.954567	395.908433	395.725000	395.804000	395.841600	396.073667	395.490933	395.829900	395.935800	395.933733	-0.025300	0.002067	-0.103833	0.139933	0.020833	395.754033	0.049967	-0.029033	0.087567	-0.263100
2015.789954	398.563600	398.430967	398.469833	398.406500	398.446767	398.569667	398.309333	398.560333	398.534433	398.537433	-0.106467	-0.003000	0.022900	0.032233	0.026167	398.413400	-0.006900	0.056433	0.033367	-0.104067
2016.790528	401.810833	401.629667	401.493333	401.538233	401.521000	401.779333	401.320600	401.651300	401.664167	401.696833	-0.067167	-0.032667	-0.045533	0.082500	0.114000	401.500933	0.037300	-0.007600	0.020067	-0.180333
2017.789954	403.589000	403.638500	403.447533	403.673933	403.625500	403.517000	403.538600	403.545800	403.665267	403.617300	0.021200	0.047967	-0.071500	-0.100300	0.000000	403.594267	0.079667	-0.146733	0.031233	-0.055667
2018.789954	406.291133	406.126200	406.021667	405.938500	405.995100	406.003000	405.752733	405.997000	406.063500	406.046200	0.080000	0.017300	-0.049200	-0.043200	0.244933	405.933317	0.005183	0.088350	0.061783	-0.180583
2019.789954	408.820433	408.828767	408.737400	408.761267	408.726200	408.712333	408.527933	408.685833	408.813867	408.806867	0.021900	0.007000	-0.121033	-0.094533	0.013567	408.720033	0.041233	0.017367	0.006167	-0.192100

Long-term seasonal means¶

# make list of *all* records
records = [record for stn, records in stn_records_a.items() for record in records]

# dimension dictionary with lists
df_mean = {'season': list(seasons.keys())}
df_mean.update({r: [] for r in records})

# loop over seasons, compute long-term mean
for season in seasons:
    for r in records:
        df_mean[r].append(dfs_seasons[season][r].mean(axis=0))
df_mean = pd.DataFrame(df_mean).set_index('season')
df_mean

	SPO_NOAA_insitu_CO2_mmedian	SPO_NOAA_flask_CO2_mmedian	SPO_SIO_O2_flask_CO2_mmedian	SPO_CSIRO_flask_CO2_mmedian	SPO_SIO_CDK_flask_CO2_mmedian	CGO_CSIRO_insitu_CO2_mmedian	CGO_NOAA_flask_CO2_mmedian	CGO_CSIRO_flask_CO2_mmedian	CGO_SIO_O2_flask_CO2_mmedian
season
djf	-0.017290	0.024567	-0.004355	0.106728	-0.056947	0.004599	0.027571	0.007436	-0.068187
mam	0.039309	0.006748	-0.035228	-0.040449	-0.001835	0.061413	-0.005066	-0.009310	-0.039366
jja	0.067344	-0.003321	-0.036373	-0.078673	0.015745	0.032955	0.004226	-0.000957	-0.056112
son	0.021917	0.021340	-0.005043	-0.016149	-0.058767	0.043461	-0.004791	0.037020	-0.118329

SD of the long-term mean at each station¶

error = {'season': list(seasons.keys())}
error.update({stn: [] for stn in stn_records.keys()})
for stn, arecords in stn_records_a.items():
    for season in seasons:
        error[stn].append(df_mean.loc[season][arecords].std(ddof=1))

df_error = pd.DataFrame(error).set_index('season')
df_error

	SPO	CGO
season
djf	0.061237	0.041960
mam	0.032685	0.042500
jja	0.054904	0.037213
son	0.033189	0.074899

Median across records at SPO and CGO¶

ds_mSPO_med = (ds.sel(record=stn_records['SPO']) - ds.sel(record=stn_records['SPO']).median('record', skipna=True))
ds_mCGO_med = (ds.sel(record=stn_records['CGO']) - ds.sel(record=stn_records['CGO']).median('record', skipna=True))
ds_m_med = xr.concat((ds_mSPO_med, ds_mCGO_med), 'record')
ds_m_med.record

<xarray.DataArray 'record' (record: 9)>
array(['SPO_NOAA_insitu_CO2', 'SPO_NOAA_flask_CO2', 'SPO_SIO_O2_flask_CO2',
       'SPO_CSIRO_flask_CO2', 'SPO_SIO_CDK_flask_CO2', 'CGO_CSIRO_insitu_CO2',
       'CGO_NOAA_flask_CO2', 'CGO_CSIRO_flask_CO2', 'CGO_SIO_O2_flask_CO2'],
      dtype=object)
Coordinates:
  * record       (record) object 'SPO_NOAA_insitu_CO2' ... 'CGO_SIO_O2_flask_...
    institution  (record) object 'NOAA' 'NOAA' 'SIO_O2' ... 'CSIRO' 'SIO_O2'
    lat          (record) float64 -89.98 -89.98 -89.98 ... -40.68 -40.68 -40.68
    lon          (record) float64 -24.8 -24.8 -24.8 -24.8 ... 144.7 144.7 144.7
    stncode      (record) object 'SPO' 'SPO' 'SPO' 'SPO' ... 'CGO' 'CGO' 'CGO'

Seasonal means of the medians¶

ds_djf = util.ann_mean(ds_m_med, season='DJF', time_bnds_varname=None, n_req=2,)
ds_jja = util.ann_mean(ds_m_med, season='JJA', time_bnds_varname=None, n_req=2,)

ds_djf.time

<xarray.DataArray 'time' (time: 22)>
array([1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020])
Coordinates:
  * time     (time) int64 1999 2000 2001 2002 2003 ... 2016 2017 2018 2019 2020

Time series of SPO and CGO records¶

fig = plt.figure(figsize=(6, 8))

ncol = 2
nrow = 2
fig, axs = plt.subplots(nrow, ncol, figsize=(6.5*ncol, 4*nrow))   

marker_spec = figure_panels.marker_spec_co2_inst()

labels = dict(
    SPO_NOAA_insitu_CO2='NOAA in situ',
    SPO_NOAA_flask_CO2='NOAA flasks',
    SPO_SIO_O2_flask_CO2='SIO O$_2$ Program flasks',
    SPO_SIO_CDK_flask_CO2='SIO CO$_2$ Program flasks',
    SPO_CSIRO_flask_CO2='CSIRO flasks',
    CGO_NOAA_flask_CO2='NOAA flasks',
    CGO_SIO_O2_flask_CO2='SIO O$_2$ Program flasks',
    CGO_CSIRO_flask_CO2='CSIRO flasks',
    CGO_CSIRO_insitu_CO2='CSIRO in situ',
)

def ammendments(ax):
    ax.axhline(0, color='k', lw=1);
    ax.set_ylabel('$\Delta$CO$_2$ [ppm]')    
    ax.set_xticks(np.arange(1998, 2022, 2));
    ax.set_xlim([1998, 2022])
    ax.set_ylim([-0.73, 0.63]);

plotted_elements = []
legend_elements = []
    
    
dset = ds_djf.CO2.sel(record=record_list).copy()
# for stn in ['SPO', 'CGO']:
#     idx = np.where(dset.stncode == stn)[0]
#     dset[:, idx] = dset[:, idx]


x = dset.time + util.season_yearfrac['DJF']
for i, record in enumerate(dset.record.values):    
    ax = axs[0, 0] if 'SPO' in record else axs[1, 0]
    y = dset.sel(record=record)
    ls = '--' if 'insitu' in record else '-'
        
    inst = str(dset.sel(record=record).institution.values)
    p = ax.plot(x, y, linestyle=ls, label=labels[record], **marker_spec[inst])
    if labels[record] not in plotted_elements:
        legend_elements.append(p[0])
        plotted_elements.append(labels[record])


dset = ds_jja.CO2.sel(record=record_list).copy()
# for stn in ['SPO', 'CGO']:
#     idx = np.where(dset.stncode == stn)[0]
#     dset[:, idx] = dset[:, idx]


x = dset.time + util.season_yearfrac['JJA']
for i, record in enumerate(dset.record.values):
    ax = axs[0, 1] if 'SPO' in record else axs[1, 1]
    y = dset.sel(record=record)
    ls = '--' if 'insitu' in record else '-'
    inst = str(dset.sel(record=record).institution.values)    
    p = ax.plot(x, y, linestyle=ls, label=labels[record], **marker_spec[inst])
    if labels[record] not in plotted_elements:
        legend_elements.append(p[0])
        plotted_elements.append(labels[record])

for ax in axs.ravel():
    ammendments(ax)

    
xoff = 1
yoff = 0.05
str_text = f'$\sigma$ = {df_error.loc["djf"].SPO:0.2f} ppm'
axs[0, 0].text(ax.get_xlim()[0]+xoff, ax.get_ylim()[0]+yoff, 
        str_text, fontsize=12, fontweight='bold',
       )

str_text = f'$\sigma$ = {df_error.loc["djf"].CGO:0.2f} ppm'
axs[1, 0].text(ax.get_xlim()[0]+xoff, ax.get_ylim()[0]+yoff, 
        str_text, fontsize=12, fontweight='bold',
       )

str_text = f'$\sigma$ = {df_error.loc["jja"].SPO:0.2f} ppm'
axs[0, 1].text(ax.get_xlim()[0]+xoff, ax.get_ylim()[0]+yoff, 
        str_text, fontsize=12, fontweight='bold',
       )

str_text = f'$\sigma$ = {df_error.loc["jja"].CGO:0.2f} ppm'
axs[1, 1].text(ax.get_xlim()[0]+xoff, ax.get_ylim()[0]+yoff, 
        str_text, fontsize=12, fontweight='bold',
       )    
          
axs[0, 0].set_title('DJF SPO records, SPO median subtracted')
axs[1, 0].set_title('DJF CGO records, CGO median subtracted')


axs[0, 1].set_title('JJA SPO records, SPO median subtracted')
axs[1, 1].set_title('JJA CGO records, CGO median subtracted')

util.label_plots(fig, [ax for ax in axs.ravel()])

axs[0, 0].legend(handles=legend_elements, ncol=2, loc=(0.02, 0.78), frameon=False, labelspacing=0.1)

util.savefig('SPO-CGO-record-discrepancies')

<Figure size 432x576 with 0 Axes>

Strong Southern Ocean Carbon Uptake Evident in Airborne Observations

Analytical uncertainty at CO₂ measurement stations

Contents

Analytical uncertainty at CO₂ measurement stations¶

Load the monthly data¶

Compute the long-term standard deviation of monthly records¶

Compute seasonal means¶

Long-term seasonal means¶

SD of the long-term mean at each station¶

Median across records at SPO and CGO¶

Seasonal means of the medians¶

Time series of SPO and CGO records¶

Strong Southern Ocean Carbon Uptake Evident in Airborne Observations

Analytical uncertainty at CO2 measurement stations

Contents

Analytical uncertainty at CO2 measurement stations¶

Load the monthly data¶

Compute the long-term standard deviation of monthly records¶

Compute seasonal means¶

Long-term seasonal means¶

SD of the long-term mean at each station¶

Median across records at SPO and CGO¶

Seasonal means of the medians¶

Time series of SPO and CGO records¶

Analytical uncertainty at CO₂ measurement stations

Analytical uncertainty at CO₂ measurement stations¶