Sensitivity analysis of emergent constraint

%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
import os
from itertools import product

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

import numpy as np
import pandas as pd
import xarray as xr
import dask

import datasets
import emergent_constraint as ec
import figure_panels
import obs_aircraft
import obs_surface
import models
import util
use_dask = True

if use_dask:
    from ncar_jobqueue import NCARCluster
    from dask.distributed import Client
    
    cluster = NCARCluster()
    cluster.scale(4)
    client = Client(cluster) # Connect this local process to remote workers
else:
    cluster = None
    client = None
cluster

Compute constraint varying region boundary

max_lat = -30.
min_lat = -70.
flux_lat_ranges = [(-90., x) for x in np.arange(min_lat, max_lat+1, 1)]
flux_memory_dist = [30, 90]
ubin_dist = np.arange(290., 315, 1.)
gradient_lat_ranges = [(-90., x) for x in np.arange(-45., -30.+1., 5.)]

def df_constructor():
    cols = ['flux_lat_range', 'gradient_lat_range', 'flux_memory', 'ubin',]
    index = list(set(
        [ndx for ndx in product(flux_lat_ranges, gradient_lat_ranges, flux_memory_dist, [300.],)] +
        [ndx for ndx in product([(-90., -45.)], [(-90., -45.)], [90], ubin_dist,)]
    ))
    df = pd.DataFrame(index, columns=cols).set_index(cols)
    return df
df_constructor()
flux_lat_range gradient_lat_range flux_memory ubin
(-90.0, -35.0) (-90.0, -35.0) 90 300.0
(-90.0, -36.0) (-90.0, -45.0) 90 300.0
(-90.0, -45.0) (-90.0, -45.0) 90 304.0
(-90.0, -69.0) (-90.0, -40.0) 90 300.0
(-90.0, -38.0) (-90.0, -35.0) 90 300.0
... ... ... ...
(-90.0, -42.0) (-90.0, -30.0) 90 300.0
(-90.0, -45.0) (-90.0, -45.0) 90 296.0
290.0
(-90.0, -38.0) (-90.0, -45.0) 90 300.0
(-90.0, -45.0) (-90.0, -45.0) 90 292.0

352 rows × 0 columns

def signal2noise_flux(sc_or_ac, key):
    """compute signal to noise metric from ratio of estimated flux and associated error"""
    if isinstance(sc_or_ac,  ec.surface_constraint):
        df = sc_or_ac.surface_flux
        signal = df.loc[('2009-2020', key)].flux
        noise = df.loc[('2009-2020', key)].flux_error
        return np.abs(signal) / noise
    
    elif isinstance(sc_or_ac,  ec.aircraft_constraint):    
        df = sc_or_ac.campaign_flux
        signal = df.loc[sc_or_ac.campaign_groups[key]].flux.mean()
        noise = np.sqrt(
        np.sum(df.loc[sc_or_ac.campaign_groups[key]].flux_error**2)
            ) / len(sc_or_ac.campaign_groups[key])
        return np.abs(signal) / noise

def signal2noise(sc_or_ac, key):
    """compute signal to noise metric from constraint properties"""    
    N = int(1e6)
    if isinstance(sc_or_ac,  ec.surface_constraint):
        ndx = ('2009-2020', key)
    else:
        ndx = key
        
    beta = np.random.multivariate_normal(
        mean=sc_or_ac.df_fits.loc[ndx].fit_beta,
        cov=sc_or_ac.df_fits.loc[ndx].fit_cov_beta,
        size=N,
    )
    signal = sc_or_ac.df_fits.loc[ndx].fit_beta[0]
    noise = beta.std(axis=0)[0]
    return np.abs(signal) / noise
    
%%time
clobber = False
clobber_deep = False
air_parms = ec.get_parameters('default')

obj_srf = {}; obj_air = {};
acs_lat = {}; 
acs_ubin = {}; 
scs = {};
dfs = {}
for constraint_type in ['ocean_constraint',]:
    model_input_lists = ec.get_model_tracer_lists(constraint_type)
    obj_srf[constraint_type] = ec.whole_enchilada_srf(**model_input_lists)       
    scs[constraint_type] = obj_srf[constraint_type].sensitivity(
        'flux_lat_range', flux_lat_ranges, clobber=clobber, use_dask=use_dask,
    )
    
for constraint_type in ['ocean_constraint',]:    
    model_input_lists = ec.get_model_tracer_lists(constraint_type)
    obj_air[constraint_type] = ec.whole_enchilada(clobber=clobber_deep, **model_input_lists)
    
    df = df_constructor()
    
    results = []; acs = []
    for ndx in df.index:
        parameters = dict(**ec.get_parameters())
        for n, name in enumerate(df.index.names):
            parameters[name] = ndx[n]
        results.append(
            dask.delayed(obj_air[constraint_type].get_ac)(
                **parameters,
                clobber=clobber,
            )
        )
        if len(results) == 12:
            with util.timer(name=f'compute {len(results)}', normalize=len(results)):
                acs.extend(dask.compute(*results)) 
            print(f'completed {len(acs)}')                
            results = []
    
    if results:
        acs.extend(dask.compute(*results)) 
    
    for key in acs[0].fit_groups:
        df[f's2n_{key}'] = [ac.df_fits.loc[key].fit_s2n for ac in acs]
    df['flux'] = [ac.estimate_ann_mean_flux[0] for ac in acs]
    df['flux_err'] = [ac.estimate_ann_mean_flux[1] for ac in acs]    
    df['object'] = [ac for ac in acs]
    
    dfs[constraint_type] = df
[compute 12]: 1.92090s/process
completed 12
[compute 12]: 1.76340s/process
completed 24
[compute 12]: 2.00661s/process
completed 36
[compute 12]: 2.17283s/process
completed 48
[compute 12]: 1.79268s/process
completed 60
[compute 12]: 2.00677s/process
completed 72
[compute 12]: 1.82041s/process
completed 84
[compute 12]: 2.08134s/process
completed 96
[compute 12]: 2.15109s/process
completed 108
[compute 12]: 1.77523s/process
completed 120
[compute 12]: 1.77058s/process
completed 132
[compute 12]: 1.84384s/process
completed 144
[compute 12]: 2.00792s/process
completed 156
[compute 12]: 2.15302s/process
completed 168
[compute 12]: 1.78228s/process
completed 180
[compute 12]: 1.75691s/process
completed 192
[compute 12]: 1.79337s/process
completed 204
[compute 12]: 1.73266s/process
completed 216
[compute 12]: 2.36044s/process
completed 228
[compute 12]: 1.67692s/process
completed 240
[compute 12]: 1.65035s/process
completed 252
[compute 12]: 1.79696s/process
completed 264
[compute 12]: 1.80964s/process
completed 276
[compute 12]: 1.88151s/process
completed 288
[compute 12]: 2.09539s/process
completed 300
[compute 12]: 1.67178s/process
completed 312
[compute 12]: 10.53219s/process
completed 324
[compute 12]: 7.72337s/process
completed 336
[compute 12]: 7.69750s/process
completed 348
CPU times: user 4min 40s, sys: 3min 54s, total: 8min 34s
Wall time: 16min 37s
if cluster is not None:
    cluster.close()
    client.close()
ubin = 300.
flux_memory = 90
constraint_type = 'ocean_constraint'
df = dfs[constraint_type]

fig, axs = util.canvas(1, 2)

for n, group in enumerate(['DJF', 'MAMJJASON']):
    ax = axs[0, n]
    s2n = np.empty((len(flux_lat_ranges), len(gradient_lat_ranges)))
    for i, flux_lat_range in enumerate(flux_lat_ranges):
        for j, gradient_lat_range in enumerate(gradient_lat_ranges):
            s2n[i, j] = df.loc[(flux_lat_range, gradient_lat_range, flux_memory, ubin)][f's2n_{group}']

    for j in range(len(gradient_lat_ranges)):
        ax.plot([l[-1] for l in flux_lat_ranges], s2n[:, j], 
                 label=f'Gradient lat range = {gradient_lat_ranges[j]}')

    ax.set_xlabel('Latitude [°N]')
    ax.set_ylabel('Signal:Noise')
    ax.legend();
    ax.set_title(group)
_images/emergent-constraint-s2n_10_0.png

Sensitivity to θ-bin parameters

df_obs = obs_aircraft.open_aircraft_data('obs')    
sel_loc = obs_aircraft.groups_select_profiles(df_obs, -90., -45., profiles_only=True)
df_obs =  df_obs.loc[sel_loc][['campaign_id', 'theta', 'co2', 'strat']].where(df_obs.co2.notnull())
df_obs = df_obs.where(df_obs.campaign_id.notnull()).dropna()
df_obs
loading obs
campaign_id theta co2 strat
12900 HIPPO-1 294.2498 383.3111 0.0
12901 HIPPO-1 294.1424 383.2453 0.0
12902 HIPPO-1 294.0021 383.2316 0.0
12903 HIPPO-1 293.8232 383.4580 0.0
12904 HIPPO-1 293.6586 383.6040 0.0
... ... ... ... ...
338779 ORCAS-F 312.0988 400.0530 0.0
338780 ORCAS-F 311.8446 400.0580 0.0
338781 ORCAS-F 311.5282 400.0750 0.0
338782 ORCAS-F 311.2981 400.0690 0.0
338783 ORCAS-F 310.9262 400.0480 0.0

44516 rows × 4 columns

udθ = air_parms['udθ']
strat_influence = xr.DataArray(
    np.zeros((len(df_obs.campaign_id.unique()), len(ubin_dist))), 
    dims=('campaign', 'theta'),
    coords=dict(
        campaign=xr.DataArray(df_obs.campaign_id.unique(), dims=('campaign'), name='campaign'),
        theta=xr.DataArray(ubin_dist, dims=('theta'), name='theta')
    ),
)

for i, campaign in enumerate(df_obs.campaign_id.unique()):
    for k, ubin in enumerate(ubin_dist):        
        theta_bin = (ubin - udθ / 2, ubin + udθ / 2)
        sel = (df_obs.campaign_id == campaign) 
        sel = sel & (theta_bin[0] <= df_obs.theta) & (df_obs.theta <= theta_bin[1]) 
        strat_influence.data[i, k] = df_obs.loc[sel].strat.mean()
constraint_type = 'ocean_constraint'
df = dfs[constraint_type]

relationship = ('Flux vs. $\Delta_{\\theta}$CO$_2^{ocn}$' 
                if constraint_type == 'ocean_constraint' else
                'Flux vs. $\Delta_{\\theta}$CO$_2$' 
               )
ndx_vline = np.where(ubin_dist == 300.)[0][0]

fig, axs = util.canvas(1, 3, figsize=(4, 6), use_gridspec=True, wspace=0.06)

ax = axs[0, 0]
colors = list(figure_panels.palette_colors)
for n, group in enumerate(['DJF', 'MAMJJASON']):
    label = 'Dec-Jan' if group == 'DJF' else 'Mar-Nov' if group == 'MAMJJASON' else group
    slope = [df.loc[((-90., -45.), (-90., -45.), 90, ubin)].object.df_fits.loc[group].fit_beta[0] 
             for ubin in ubin_dist]
    slope_err = [df.loc[((-90., -45.), (-90., -45.), 90, ubin)].object.df_fits.loc[group].fit_stderr_beta[0] 
                 for ubin in ubin_dist]
    
    ref = slope[ndx_vline]
    ref_err = slope_err[ndx_vline]
    c = colors.pop(0)
    ax.errorbar(slope-ref, ubin_dist, 
                xerr=slope_err, marker='.', label=label,
                color=c,
               )
    ax.text(0.05, 303-n*1.5, f'{ref:0.2f}±{ref_err:0.2f}', c=c, fontweight='bold')

ax.axvline(0., lw=0.5, c='k', linestyle='--')    

    
ax.set_title(f'Slope sensitivity\n to upper $\\theta$ bin')
ax.set_xlabel(f'{relationship} slope [Pg C yr$^{{-1}}$ ppm$^{{-1}}$]')    
ax.set_ylabel('$\\theta$ bin center [K]')
ax.legend(frameon=False);

ax = axs[0, 1]
colors = list(figure_panels.palette_colors)

s2n_ubin_djf = [df.loc[((-90., -45.), (-90., -45.), 90, ubin)].s2n_DJF for ubin in ubin_dist]
s2n_ubin_mamjjason = [df.loc[((-90., -45.), (-90., -45.), 90, ubin)].s2n_MAMJJASON for ubin in ubin_dist]
ax.plot(s2n_ubin_djf, ubin_dist, '.-', label='Dec-Jan', color=colors.pop(0))
ax.plot(s2n_ubin_mamjjason, ubin_dist, '.-', label='Mar-Nov', color=colors.pop(0))

ax.set_title(f'Signal:Noise sensitivity\n to upper $\\theta$ bin')
ax.set_xlabel('Signal:Noise')
ax.set_yticklabels([])
ax.legend(frameon=False);

ax = axs[0, 2]

marker_spec = figure_panels.marker_spec_campaigns()

for c in strat_influence.campaign.values:
    ax.plot(strat_influence.sel(campaign=c), 
            strat_influence.theta,
            label=c, **marker_spec[c], markersize=4)
ax.set_title('Fraction of samples\nwith stratosphere influence')

ax.set_yticks(axs[0, 0].get_yticks())
ax.set_ylim(axs[0, 0].get_ylim())
ax.set_yticklabels([])

xlm = ax.get_xlim()
ax.set_xlim((xlm[0], 0.55))
ax.set_xlabel('Fraction of samples')
ax.legend(frameon=False, loc='lower right', ncol=1);

util.label_plots(fig, [ax for ax in axs.ravel()], xoff=-0.01, yoff=0.03)


util.savefig(f'theta-bin-S2N-{constraint_type}')
_images/emergent-constraint-s2n_14_0.png

Examine dependence on bounding latitude

constraint_type = 'ocean_constraint'
df = dfs[constraint_type]

ubin = 300.
gradient_lat_range = (-90., -45.)

acs = {
    flux_memory: [df.loc[(flux_lat_range, gradient_lat_range, flux_memory, ubin)].object 
                  for flux_lat_range in flux_lat_ranges]
    for flux_memory in flux_memory_dist
}

s2n_air = {}
for flux_memory in [30, 90]:
    assert acs[flux_memory][0].flux_memory == flux_memory
    for group in acs[flux_memory][0].campaign_groups.keys():
        period = 'Mar-Nov' if group == 'MAMJJASON' else 'Dec-Feb' if group == 'DJF' else group        
        key = f'{period} ({flux_memory} d)'        
        s2n_air[key] = [ac.df_fits.loc[group].fit_s2n for ac in acs[flux_memory]]

ec_flux_estimate = {}        
for flux_memory in [30, 90]:
    key = f'Emergent constraint ({flux_memory}d)'            
    ec_flux_estimate[key] = [ac.estimate_ann_mean_flux for ac in acs[flux_memory]]

s2n_srf = {}        
for group in ['DJF', 'JJA']:
    period = 'Dec-Feb' if group == 'DJF' else 'Jun-Aug' if group == 'JJA' else group        
    key = f'{period}'
    s2n_srf[key] = [sc.df_fits.loc[('2009-2020', group)].fit_s2n for sc in scs[constraint_type]]    
lat = [l[-1] for l in flux_lat_ranges]
for key, values in s2n_air.items():
    plt.plot(lat, values, '.-', label=key,)

for key, values in s2n_srf.items():
    plt.plot(lat, values, '.-', label=key,)
    
#plt.axhline(1., c='k', lw=0.5)
plt.axvline(-45, c='k', lw=0.5, linestyle='--')
plt.legend();
plt.xlabel('Latitude [°N]')
plt.ylabel('Signal:Noise');
_images/emergent-constraint-s2n_17_0.png

Compute inversion and pCO2-based zonal-mean, seasonal fluxes

sfco2_ocn_inversions = []
for m, t in model_input_lists['model_tracer_list']:
    if not any(s in m for s in ['TM5', 'CESM', 'prior']) and m not in sfco2_ocn_inversions:
        sfco2_ocn_inversions.append(m)
sfco2_ocn_inversions
['CT2017',
 'CT2019B',
 'CTE2018',
 'CTE2020',
 'MIROC',
 'CAMSv20r1',
 's99oc_v2020',
 's99oc_ADJocI40S_v2020',
 's99oc_SOCCOM_v2020']
sfco2_ocn_soccom = [
    'TM5-Flux-m0f', 'TM5-Flux-mwf', 'TM5-Flux-mmf',
]
sfco2_ocn_pco2_clim = ['SOM-FFN',]
%%capture
dsets_za_djf = {}
dsets_za_jja = {}
for model in sfco2_ocn_inversions+sfco2_ocn_pco2_clim:
    mobj = models.Model(model)
    ds = mobj.open_derived_dataset(
                'fluxes_za_ts_monthly', 
            ).sel(time=slice('1998', '2020'), lat=slice(-80., -30.))
    with xr.set_options(keep_attrs=True):        
        ds = ds.groupby('time.season').mean('time')
    dsets_za_djf[model] =  ds.sel(season='DJF').compute()
    dsets_za_jja[model] = ds.sel(season='JJA').compute()

Compute cumulative-sum of fluxes as a function of latitude

%%capture
ds_cum_cache_file = f'{datasets.cache_dir}/sfco2-inversions-cumsum-lat.nc'

if os.path.exists(ds_cum_cache_file) and not clobber:
    ds_cum = xr.open_dataset(ds_cum_cache_file)

else:
    dsets_cum = {}
    for model in sfco2_ocn_inversions:
        mobj = models.Model(model)
        ds_list = []
        for lat_range in flux_lat_ranges:
            ds = mobj.open_derived_dataset(
                    'flux_ts_monthly', lat_range=lat_range
                ).sel(time=slice('1998', '2020'))

            with xr.set_options(keep_attrs=True):                
                dss = ds.groupby('time.season').mean('time').compute()
            ds_list.append(dss)

        dsets_cum[model] = xr.concat(ds_list, 
                                    dim=xr.DataArray(
                                        [rng[-1] for rng in flux_lat_ranges], 
                                        dims=('lat'), 
                                        name='lat')
                                   )
    model = xr.DataArray(sfco2_ocn_inversions, dims=('model'), name='model')
    ds_cum = xr.concat([dsets_cum[m].SFCO2_OCN for m in sfco2_ocn_inversions], dim=model).to_dataset()
    ds_cum.to_netcdf(ds_cum_cache_file)

ds_cum = ds_cum.sel(lat=slice(min_lat, max_lat))
flux_inv_lat = ds_cum.lat
flux_inv_mean = ds_cum.SFCO2_OCN.mean(['model', 'season'])
flux_inv_std = ds_cum.SFCO2_OCN.mean(['season']).std('model')
plt.plot(flux_inv_lat, flux_inv_mean, color='k', lw=2)
plt.fill_between(flux_inv_lat, flux_inv_mean - flux_inv_std, 
                 flux_inv_mean + flux_inv_std, alpha=0.2)
<matplotlib.collections.PolyCollection at 0x2b29f4561350>
_images/emergent-constraint-s2n_24_1.png
fig, axs = util.canvas(2, 2, wspace=0.25)

marker_spec = figure_panels.marker_spec_models()

#--- DJF flux
ax = axs[0, 0]
for model, ds in dsets_za_djf.items():
    ax.plot(ds.lat, ds.SFCO2_OCN, **marker_spec[model]['CO2_OCN'])
    
ax.set_ylabel(f'Air-sea CO$_2$ flux [{ds.SFCO2_OCN.units}]')    
ax.set_title('DJF air-sea CO$_2$ flux')
ax.axhline(0, lw=0.5, color='k')
ylm_djf = ax.get_ylim()

#--- JJA flux 
ax = axs[0, 1]
for model, ds in dsets_za_jja.items():
    ax.plot(ds.lat, ds.SFCO2_OCN, **marker_spec[model]['CO2_OCN'])
    
ax.set_title('JJA air-sea CO$_2$ flux')
ax.axhline(0, lw=0.5, color='k')
ylm_jja = ax.get_ylim()

h, l = ax.get_legend_handles_labels()
leg1 = ax.legend(h[:], l[:], frameon=False,
                 loc=(1.02, 0.));

# leg2 = ax.legend(h[2:], l[2:], frameon=False,
#                  loc=(0.02, 0.01));

#ax.add_artist(leg1)

ylm = [np.min((ylm_djf[0], ylm_jja[0])), np.max((ylm_djf[1], ylm_jja[1])),]
axs[0, 0].set_ylim(ylm)
axs[0, 1].set_ylim(ylm)

#--- cumulatice flux with latitude
ax = axs[1, 0]

colors = list(figure_panels.palette_colors)
p = ax.plot(
    flux_inv_lat, flux_inv_mean, lw=2, 
    zorder=10, label='Multi-inversion mean', 
    c=colors.pop(0),
)
ax.fill_between(flux_inv_lat, 
                flux_inv_mean - flux_inv_std, 
                flux_inv_mean + flux_inv_std, 
                alpha=0.25, zorder=-10,
                color=p[0].get_color(),
                label='Multi-inversion ±1$\sigma$'
               )

lat = [l[-1] for l in flux_lat_ranges]
for key, values in ec_flux_estimate.items():
    ax.errorbar(lat, [v[0] for v in values], 
                yerr=[v[1] for v in values],
                color=colors.pop(0),
                marker='.', label=key,)

ax.set_ylabel(f'Air-sea CO$_2$ flux [Pg C yr$^{-1}$]')
ax.set_title('Annual-mean air-sea CO$_2$ flux')
ax.axhline(0, lw=0.5, color='k')
ax.set_xlabel('Latitude [°N]')     

ax.legend(frameon=False);

#--- signal to noise ratio
ax = axs[1, 1]
#p = ax.plot(df.lat_ubnd, df.s2n_djf, '.-', label=f'Dec-Feb')# ({flux_memory}d)')
#ax.plot(df.lat_ubnd, df.s2n_mamjjason, '.-', label=f'Mar-Nov')# ({flux_memory}d)')   

colors = list(figure_panels.palette_colors)
lat = [l[-1] for l in flux_lat_ranges]
for key, values in s2n_air.items():
    ax.plot(lat, values, '.-', label=key, c=colors.pop(0))

for key, values in s2n_srf.items():
    ax.plot(lat, values, '.-', label=key, c=colors.pop(0))
    
ax.legend();

ax.axhline(0, lw=0.5, color='k')
ax.set_title('Signal to noise ratio')
ax.set_ylabel(f'Signal:Noise')
ax.set_xlabel('Latitude [°N]')     

ylm = np.array(ax.get_ylim())
ax.set_ylim(ylm)
h, l = ax.get_legend_handles_labels()

order_ndx = [3, 1, 2, 0] #[1, 3, 0, 2,]
lg1 = ax.legend([h[i] for i in order_ndx], [l[i] for i in order_ndx],
          frameon=False,
          loc=(1.02, 0.5), #'upper right',
          handletextpad=0.15,  
          handlelength=1,                
          title='$\\bf{Aircraft}$'                
         )
order_ndx = [-2, -1]
lg2 = ax.legend([h[i] for i in order_ndx], [l[i] for i in order_ndx],
          frameon=False, 
          loc=(1.02, 0.2), #'upper right',
          handletextpad=0.15,
          handlelength=1,
          title='$\\bf{Surface}$'
         )
ax.add_artist(lg1)
    
#axs[0, 0].set_xlim(ax.get_xlim())
#axs[0, 1].set_xlim(ax.get_xlim())

util.label_plots(fig, axs.ravel(), xoff=-0.02)

util.savefig(f'fluxes-region-S2N-{constraint_type}.pdf')
_images/emergent-constraint-s2n_26_0.png