Process airborne Medusa merge files

  • R Program to read in HIPPO, ORCAS, and ATom MED merge files, massage time variables, add a strat flag, and subset,

  • then filter aircraft data for strong local continental influences, subtract off NOAA in situ SPO, and write out flat text files

  • have to run process_aircraft_10s.ipynb first

library('ncdf4')
library('yaml')
project_tmpdir_obs = read_yaml('../_config_calc.yml')$project_tmpdir_obs
username = Sys.info()['user']
project_tmpdir_obs = gsub('\\{\\{env\\[\'USER\'\\]\\}\\}', username, project_tmpdir_obs)
# specify aircraft data file names
hippomergefile=paste(project_tmpdir_obs,'/aircraft-merge-products/HIPPO_medusa_flasks_merge_insitu_20121129.tbl',sep='') # this is the official HIPPO merge product
atommergedir=paste(project_tmpdir_obs,'/aircraft-merge-products',sep='') # these are version 2.0 (21-08-26)
atommergefiles=c('MER-MED_DC8_ATom-1.nc','MER-MED_DC8_ATom-2.nc','MER-MED_DC8_ATom-3.nc','MER-MED_DC8_ATom-4.nc')  # these files have problems (see below)
orcasmergefile=paste(project_tmpdir_obs,'/aircraft-merge-products/ORCASall.mergeMED.tbl',sep='') # original release version
# set strat flag cutoffs for use below based upon Jin et al., 2021 (https://doi.org/10.5194/acp-21-217-2021)
stratcoh2o=50
stratcoo3=150
stratcon2o=319
# read in global N2O for detrending aircraft N2O
glbn2ofile=url('ftp://aftp.cmdl.noaa.gov/products/trends/n2o/n2o_annmean_gl.txt')
hlines=61
glbn2o=read.table(glbn2ofile,skip=hlines,header=F,stringsAsFactors=F)
colnames(glbn2o)=c('year','n2o','unc')
# for HIPPO, need to interpolate prof variable from 10-sec merge - load from process_aircraft_10s.ipynb output before reading MED data below
load('HIPPO_10s.RData')
hippomerge10s=hippomerge # 'hippomerge' reused below
hippo10sdt=ISOdatetime(hippomerge$year,hippomerge$mon,hippomerge$day,hippomerge$hour,hippomerge$min,hippomerge$sec,tz='UTC')

read in HIPPO file, calc strat flag, and subset

# read and add time variables
hippomerge=read.table(hippomergefile,header=T)
hippodt=strptime(paste(hippomerge[,"Year"],hippomerge[,"DOY"]),format='%Y %j',tz='UTC')+hippomerge[,"UTC"] # DOY is day of year of takeoff; UTC is seconds since midnight on day of takeoff
hippomerge$Month=as.POSIXlt(hippodt)$mon+1
hippomerge$Day=as.POSIXlt(hippodt)$mday
hippomerge$Hour=as.POSIXlt(hippodt)$hour
hippomerge$Min=as.POSIXlt(hippodt)$min
hippomerge$Sec=as.POSIXlt(hippodt)$sec
# filter in situ records for overlap
hippomerge$CO2_QCLS[hippomerge$wt.qcls<0.5]=NA
hippomerge$CO2_AO2[hippomerge$wt.ao2<0.5]=NA
hippomerge$CO2_OMS[hippomerge$wt.oms<0.5]=NA
# interpolate prof from 10 sec file
hippomerge$prof=approx(as.POSIXct(hippo10sdt),hippomerge10s$prof,as.POSIXct(hippodt),method='constant',f=0)$y
# add strat flag
hippomerge$strat=rep(0,nrow(hippomerge)) # 0 means trop
h2oref=hippomerge$H2Oppmv_vxl; h2oref[is.na(h2oref)]=hippomerge$H2O_UWV[is.na(h2oref)]
hippomerge$h2oref=h2oref # for output
h2oref[is.na(h2oref)]=0 # if H2O missing treat as if potentially strat
n2oref=hippomerge$N2O_QCLS 
n2oref=n2oref-(approx(glbn2o$year+0.5,glbn2o$n2o,hippomerge$Year+hippomerge$DOY/365)$y-glbn2o$n2o[glbn2o$year==2009])
hippomerge$n2oref=n2oref # for output
n2oref[is.na(n2oref)]=400 # if N2O missing do not use for filter
o3ref=hippomerge$O3_ppb; o3ref[is.na(o3ref)]=hippomerge$O3_UO3[is.na(o3ref)]
hippomerge$o3ref=o3ref # for output
o3ref[is.na(o3ref)]=0 # if O3 missing do not use for filter
hippomerge$strat[h2oref<stratcoh2o&(o3ref>stratcoo3|n2oref<stratcon2o|(o3ref==0&n2oref==400))]=1 # if either o3 or n2o criteria are met, or if both are missing, consider strat
hippomerge$strat[h2oref==0&o3ref==0&n2oref==400&hippomerge$GGALT<8000]=0 # if all 3 missing assume < 8 km is trop
# select columns
colsel=c('Year','Month','Day','Hour','Min','Sec','H.no','flt','prof','GGLAT','GGLON','GGALT','PSXC','THETA','CO2_MED','CO2_QCLS','CO2_OMS','CO2_AO2','strat','h2oref','n2oref','o3ref')
hippomerge=hippomerge[,is.element(colnames(hippomerge),colsel)]
hippomerge=hippomerge[,match(colsel,names(hippomerge))] # reorder
names(hippomerge)=c('year','mon','day','hour','min','sec','camp','flt','prof','lat','lon','alt','pressure','theta','co2','co2qcls','co2oms','co2ao2','strat','h2oref','n2oref','o3ref') ## 'co2' = CO2_MED

read in ORCAS file, calc strat flag, and subset

# read and add time variables
orcasmerge=read.table(orcasmergefile,header=T)
orcasdt=strptime(paste(orcasmerge[,"Year"],orcasmerge[,"DOY"]),format='%Y %j',tz='UTC')+orcasmerge[,"UTC"] # DOY is day of year of takeoff; UTC is seconds since midnight on day of takeoff
orcasmerge$Month=as.POSIXlt(orcasdt)$mon+1
orcasmerge$Day=as.POSIXlt(orcasdt)$mday
orcasmerge$Hour=as.POSIXlt(orcasdt)$hour
orcasmerge$Min=as.POSIXlt(orcasdt)$min
orcasmerge$Sec=as.POSIXlt(orcasdt)$sec
# filter in situ records for overlap ### add for HIPPO and ATom?
orcasmerge$CO2_NOAA[orcasmerge$wt.CO2_NOAA<0.5]=NA
orcasmerge$CO2_QCLS[orcasmerge$wt.CO2_QCLS<0.5]=NA
orcasmerge$CO2_AO2[orcasmerge$wt.CO2_AO2<0.5]=NA
orcasmerge$CO2.X[orcasmerge$wt.CO2.X<0.5]=NA
# add strat flag
orcasmerge$strat=rep(0,nrow(orcasmerge)) # 0 means trop
h2oref=orcasmerge$VMR_VXL; h2oref[is.na(h2oref)]=orcasmerge$H2O_NOAA[is.na(h2oref)]
orcasmerge$h2oref=h2oref # for output
h2oref[is.na(h2oref)]=0 # if H2O missing treat as if potentially strat
n2oref=orcasmerge$N2O_QCLS
n2oref=n2oref-(approx(glbn2o$year+0.5,glbn2o$n2o,orcasmerge$Year+orcasmerge$DOY/365)$y-glbn2o$n2o[glbn2o$year==2009])
orcasmerge$n2oref=n2oref # for output
n2oref[is.na(n2oref)]=400 # if N2O missing do not use for filter
orcasmerge$strat[h2oref<stratcoh2o&(n2oref<stratcon2o|n2oref==400)]=1 # no O3, if n2o criteria is met or missing, consider strat
orcasmerge$strat[h2oref==0&n2oref==400&orcasmerge$G_ALT<8000]=0 # if both missing assume < 8 km is trop
# select columns
colsel=c('Year','Month','Day','Hour','Min','Sec','flt','n.prof','GGLAT','GGLON','GGALT','PSXC','THETA','CO2_MED','CO2.X','CO2_QCLS','CO2_NOAA','CO2_AO2','strat','h2oref','n2oref')
orcasmerge=orcasmerge[,is.element(colnames(orcasmerge),colsel)]
orcasmerge=orcasmerge[,match(colsel,names(orcasmerge))] # reorder
names(orcasmerge)=c('year','mon','day','hour','min','sec','flt','prof','lat','lon','alt','pressure','theta','co2','co2x','co2qcls','co2noaa','co2ao2','strat','h2oref','n2oref') ## 'co2' = CO2_MED

read in ATom files, calc strat flag, and subset

# read and add time variables
atomvar=c('time','Flight_Date','DLH-H2O/H2O_ppmv','UCATS-H2O/H2O_UWV','QCLS-CH4-CO-N2O/N2O_QCLS','NOyO3-O3/O3_CL','UCATS-O3/O3_UO3','MMS/G_ALT','RF','prof.no','MMS/P','MMS/POT','MMS/G_LAT','MMS/G_LONG',
          'MMS/G_ALT','MEDUSA/CO2_MED','NOAA-Picarro/CO2_NOAA','QCLS-CO2/CO2_QCLS','AO2/CO2_AO2','NOAA-Picarro/CH4_NOAA','QCLS-CH4-CO-N2O/CH4_QCLS')
atommerge=NULL
for(i in c(1:4)){
	atomnc=nc_open(paste(atommergedir,'/',atommergefiles[i],sep=''))
       	count=length(ncvar_get(atomnc,'time'))
	campdata=NULL
	for(var in atomvar){
		if(i==1&var=='UCATS-H2O/H2O_UWV'){ # no UCATS H2O on ATom-1
			campdata=cbind(campdata,rep(NA,count))
		} else {
			campdata=cbind(campdata,ncvar_get(atomnc,var))
		}
	}
	campdata=cbind(campdata,rep(i,count)) # A.no
    nc_close(atomnc)
	atommerge=rbind(atommerge,campdata)
}
atommerge=data.frame(atommerge,stringsAsFactors=F)
names(atommerge)=c(gsub('.*/','',atomvar),'A.no')
atommerge$YYYYMMDD=atommerge$Flight_Date
atomdt=as.POSIXlt(ISOdatetime(2016,1,1,0,0,0,tz='UTC')+atommerge$time,tz='UTC')
atommerge$Year=atomdt$year+1900
atommerge$Month=as.POSIXlt(atomdt)$mon+1
atommerge$Day=as.POSIXlt(atomdt)$mday
atommerge$Hour=as.POSIXlt(atomdt)$hour
atommerge$Min=as.POSIXlt(atomdt)$min
atommerge$Sec=as.POSIXlt(atomdt)$sec
# filter in situ records for overlap
atommerge$CO2_NOAA[atommerge$wt.CO2_NOAA<0.5]=NA
atommerge$CO2_QCLS[atommerge$wt.CO2_QCLS<0.5]=NA
atommerge$CO2_AO2[atommerge$wt.CO2_AO2<0.5]=NA
# add strat flag
atommerge$strat=rep(0,nrow(atommerge)) # 0 means trop
h2oref=atommerge$H2O_ppmv; h2oref[is.na(h2oref)]=atommerge$H2O_UWV[is.na(h2oref)]
atommerge$h2oref=h2oref # for output
h2oref[is.na(h2oref)]=0 # if H2O missing treat as if potentially strat
n2oref=atommerge$N2O_QCLS
n2oref=n2oref-(approx(glbn2o$year+0.5,glbn2o$n2o,atommerge$Year+atomdt$yday/365)$y-glbn2o$n2o[glbn2o$year==2009])
atommerge$n2oref=n2oref # for outptut
n2oref[is.na(n2oref)]=400 # if N2O missing do not use for filter
o3ref=atommerge$O3_CL; o3ref[is.na(o3ref)]=atommerge$O3_UO3[is.na(o3ref)]
atommerge$o3ref=o3ref
o3ref[is.na(o3ref)]=0 # if O3 missing do not use for filter
atommerge$strat[h2oref<stratcoh2o&(o3ref>stratcoo3|n2oref<stratcon2o|(o3ref==0&n2oref==400))]=1 # if either o3 or n2o criteria are met, or if both are missing, consider strat
atommerge$strat[h2oref==0&o3ref==0&n2oref==400&atommerge$G_ALT<8000]=0 # if all 3 missing assume < 8 km is trop
# select column
colsel=c('Year','Month','Day','Hour','Min','Sec','A.no','RF','prof.no','G_LAT','G_LONG','G_ALT','P','POT','CO2_MED','CO2_NOAA','CO2_QCLS','CO2_AO2','CH4_NOAA','CH4_QCLS','strat','h2oref','n2oref','o3ref')
atommerge=atommerge[,is.element(colnames(atommerge),colsel)]
atommerge=atommerge[,match(colsel,names(atommerge))] # reorder
names(atommerge)=c('year','mon','day','hour','min','sec','camp','flt','prof','lat','lon','alt','pressure','theta','co2','co2noaa','co2qcls','co2ao2','ch4noaa','ch4qcls','strat','h2oref','n2oref','o3ref') ## 'co2' = CO2_MED

Filter airborne Medusa data and subtract SPO

# read in NOAA in situ record from SPO
sponc=nc_open(paste(project_tmpdir_obs,'/obspack_co2_1_GLOBALVIEWplus_v6.0_2020-09-11/data/nc/co2_spo_surface-insitu_1_allvalid.nc',sep=''))
spoco2=data.frame(cbind(ncvar_get(sponc,'time_decimal'),t(ncvar_get(sponc,'time_components')),ncvar_get(sponc,'value')*1E6)) ; colnames(spoco2)=c('date','year','mon','day','hour','min','sec','co2')
qcflag=ncvar_get(sponc,'qcflag'); spoco2$co2[substr(qcflag,1,1)!='.']=NA; spoco2$co2[substr(qcflag,2,2)!='.']=NA
spodt=ISOdatetime(spoco2$year,spoco2$mon,spoco2$day,spoco2$hour,spoco2$min,spoco2$sec,tz='UTC')

HIPPO

# filter
ints=read.table(paste(project_tmpdir_obs,'/hippo_xsect_filt_datetime.txt',sep=''),header=T) 
startdt=ISOdatetime(ints$startyear,ints$startmon,ints$startday,ints$starthour,ints$startmin,ints$startsec,tz='UTC')
stopdt=ISOdatetime(ints$stopyear,ints$stopmon,ints$stopday,ints$stophour,ints$stopmin,ints$stopsec,tz='UTC')
blfilt=rep(T,nrow(hippomerge))
for(i in c(1:nrow(ints))){
    blfilt[difftime(hippodt,startdt[i])>=0&difftime(hippodt,stopdt[i])<=0]=F
}
hippodt=hippodt[blfilt]
hippomerge=hippomerge[blfilt,]
print(paste('Filtered ',sum(!blfilt),' of ',length(blfilt),' HIPPO obs (',round(sum(!blfilt)/length(blfilt)*100,1),'%)',sep=''))
[1] "Filtered 12 of 1690 HIPPO obs (0.7%)"
# calculate differences
hippomerge$co2mspo=round(hippomerge$co2-approx(as.POSIXct(spodt),spoco2$co2,as.POSIXct(hippodt))$y,3) ## co2 = 'CO2_MED'
hippomerge$co2mqcls=round(hippomerge$co2-hippomerge$co2qcls,3)
hippomerge$co2moms=round(hippomerge$co2-hippomerge$co2oms,3)
hippomerge$co2mao2=round(hippomerge$co2-hippomerge$co2ao2,3)
# write out
write(names(hippomerge),'../data/aircraft-obs/HIPPO_SO_mSPO_medusa.txt',ncol=ncol(hippomerge))
write(t(hippomerge),'../data/aircraft-obs/HIPPO_SO_mSPO_medusa.txt',ncol=ncol(hippomerge),append=T)

print(apply(!is.na(hippomerge),2,sum))
    year      mon      day     hour      min      sec     camp      flt 
    1678     1677     1677     1677     1677     1677     1678     1678 
    prof      lat      lon      alt pressure    theta      co2  co2qcls 
    1677     1677     1677     1677     1677     1677     1558     1129 
  co2oms   co2ao2    strat   h2oref   n2oref    o3ref  co2mspo co2mqcls 
    1167     1432     1678     1672     1542     1676     1558     1050 
 co2moms  co2mao2 
    1090     1334 

ORCAS

# filter
ints=read.table(paste(project_tmpdir_obs,'/orcas_xsect_filt_datetime.txt',sep=''),header=T)
startdt=ISOdatetime(ints$startyear,ints$startmon,ints$startday,ints$starthour,ints$startmin,ints$startsec,tz='UTC')
stopdt=ISOdatetime(ints$stopyear,ints$stopmon,ints$stopday,ints$stophour,ints$stopmin,ints$stopsec,tz='UTC')
blfilt=rep(T,nrow(orcasmerge))
for(i in c(1:nrow(ints))){
    blfilt[difftime(orcasdt,startdt[i])>=0&difftime(orcasdt,stopdt[i])<=0]=F
}
orcasdt=orcasdt[blfilt]
orcasmerge=orcasmerge[blfilt,]
print(paste('Filtered ',sum(!blfilt),' of ',length(blfilt),' ORCAS obs (',round(sum(!blfilt)/length(blfilt)*100,1),'%)',sep=''))
[1] "Filtered 9 of 392 ORCAS obs (2.3%)"
# calculate differences
orcasmerge$co2mspo=round(orcasmerge$co2-approx(as.POSIXct(spodt),spoco2$co2,as.POSIXct(orcasdt))$y,2) ## co2 = 'CO2_MED'
orcasmerge$co2mqcls=round(orcasmerge$co2-orcasmerge$co2qcls,3)
orcasmerge$co2mx=round(orcasmerge$co2-orcasmerge$co2x,3)
orcasmerge$co2mnoaa=round(orcasmerge$co2-orcasmerge$co2noaa,3)
orcasmerge$co2mao2=round(orcasmerge$co2-orcasmerge$co2ao2,3)
# write out
write(names(orcasmerge),'../data/aircraft-obs/ORCAS_SO_mSPO_medusa.txt',ncol=ncol(orcasmerge))
write(t(orcasmerge),'../data/aircraft-obs/ORCAS_SO_mSPO_medusa.txt',ncol=ncol(orcasmerge),append=T)

print(apply(!is.na(orcasmerge),2,sum))
    year      mon      day     hour      min      sec      flt     prof 
     383      383      383      383      383      383      383      383 
     lat      lon      alt pressure    theta      co2     co2x  co2qcls 
     383      383      383      383      383      340      361      162 
 co2noaa   co2ao2    strat   h2oref   n2oref  co2mspo co2mqcls    co2mx 
     361      337      383      383      366      340      138      320 
co2mnoaa  co2mao2 
     320      299 

ATom

# filter
ints=read.table(paste(project_tmpdir_obs,'/atom_xsect_filt_datetime.txt',sep=''),header=T)
startdt=ISOdatetime(ints$startyear,ints$startmon,ints$startday,ints$starthour,ints$startmin,ints$startsec,tz='UTC')
stopdt=ISOdatetime(ints$stopyear,ints$stopmon,ints$stopday,ints$stophour,ints$stopmin,ints$stopsec,tz='UTC')
blfilt=rep(T,nrow(atommerge))
for(i in c(1:nrow(ints))){
    blfilt[difftime(atomdt,startdt[i])>=0&difftime(atomdt,stopdt[i])<=0]=F
}
atomdt=atomdt[blfilt]
atommerge=atommerge[blfilt,]
print(paste('Filtered ',sum(!blfilt),' of ',length(blfilt),' ATom obs (',round(sum(!blfilt)/length(blfilt)*100,1),'%)',sep=''))
[1] "Filtered 29 of 1370 ATom obs (2.1%)"
# calculate differences
atommerge$co2mspo=round(atommerge$co2-approx(as.POSIXct(spodt),spoco2$co2,as.POSIXct(atomdt))$y,2) ## co2 = 'CO2_MED'
atommerge$co2mqcls=round(atommerge$co2-atommerge$co2qcls,3)
atommerge$co2mao2=round(atommerge$co2-atommerge$co2ao2,3)
atommerge$co2mnoaa=round(atommerge$co2-atommerge$co2noaa,3)
# write out
write(names(atommerge),'../data/aircraft-obs/ATOM_SO_mSPO_medusa.txt',ncol=ncol(atommerge))
write(t(atommerge),'../data/aircraft-obs/ATOM_SO_mSPO_medusa.txt',ncol=ncol(atommerge),append=T)

print(apply(!is.na(atommerge),2,sum))
    year      mon      day     hour      min      sec     camp      flt 
    1341     1341     1341     1341     1341     1341     1341     1341 
    prof      lat      lon      alt pressure    theta      co2  co2noaa 
    1341     1341     1341     1341     1341     1341     1241     1279 
 co2qcls   co2ao2  ch4noaa  ch4qcls    strat   h2oref   n2oref    o3ref 
    1279     1314     1336      962     1341     1032      958     1337 
 co2mspo co2mqcls  co2mao2 co2mnoaa 
    1241     1188     1221     1184