Process airborne Medusa merge files
Contents
Process airborne Medusa merge files¶
R Program to read in HIPPO, ORCAS, and ATom MED merge files, massage time variables, add a strat flag, and subset,
then filter aircraft data for strong local continental influences, subtract off NOAA in situ SPO, and write out flat text files
have to run process_aircraft_10s.ipynb first
library('ncdf4')
library('yaml')
project_tmpdir_obs = read_yaml('../_config_calc.yml')$project_tmpdir_obs
username = Sys.info()['user']
project_tmpdir_obs = gsub('\\{\\{env\\[\'USER\'\\]\\}\\}', username, project_tmpdir_obs)
# specify aircraft data file names
hippomergefile=paste(project_tmpdir_obs,'/aircraft-merge-products/HIPPO_medusa_flasks_merge_insitu_20121129.tbl',sep='') # this is the official HIPPO merge product
atommergedir=paste(project_tmpdir_obs,'/aircraft-merge-products',sep='') # these are version 2.0 (21-08-26)
atommergefiles=c('MER-MED_DC8_ATom-1.nc','MER-MED_DC8_ATom-2.nc','MER-MED_DC8_ATom-3.nc','MER-MED_DC8_ATom-4.nc') # these files have problems (see below)
orcasmergefile=paste(project_tmpdir_obs,'/aircraft-merge-products/ORCASall.mergeMED.tbl',sep='') # original release version
# set strat flag cutoffs for use below based upon Jin et al., 2021 (https://doi.org/10.5194/acp-21-217-2021)
stratcoh2o=50
stratcoo3=150
stratcon2o=319
# read in global N2O for detrending aircraft N2O
glbn2ofile=url('ftp://aftp.cmdl.noaa.gov/products/trends/n2o/n2o_annmean_gl.txt')
hlines=61
glbn2o=read.table(glbn2ofile,skip=hlines,header=F,stringsAsFactors=F)
colnames(glbn2o)=c('year','n2o','unc')
# for HIPPO, need to interpolate prof variable from 10-sec merge - load from process_aircraft_10s.ipynb output before reading MED data below
load('HIPPO_10s.RData')
hippomerge10s=hippomerge # 'hippomerge' reused below
hippo10sdt=ISOdatetime(hippomerge$year,hippomerge$mon,hippomerge$day,hippomerge$hour,hippomerge$min,hippomerge$sec,tz='UTC')
read in HIPPO file, calc strat flag, and subset¶
# read and add time variables
hippomerge=read.table(hippomergefile,header=T)
hippodt=strptime(paste(hippomerge[,"Year"],hippomerge[,"DOY"]),format='%Y %j',tz='UTC')+hippomerge[,"UTC"] # DOY is day of year of takeoff; UTC is seconds since midnight on day of takeoff
hippomerge$Month=as.POSIXlt(hippodt)$mon+1
hippomerge$Day=as.POSIXlt(hippodt)$mday
hippomerge$Hour=as.POSIXlt(hippodt)$hour
hippomerge$Min=as.POSIXlt(hippodt)$min
hippomerge$Sec=as.POSIXlt(hippodt)$sec
# filter in situ records for overlap
hippomerge$CO2_QCLS[hippomerge$wt.qcls<0.5]=NA
hippomerge$CO2_AO2[hippomerge$wt.ao2<0.5]=NA
hippomerge$CO2_OMS[hippomerge$wt.oms<0.5]=NA
# interpolate prof from 10 sec file
hippomerge$prof=approx(as.POSIXct(hippo10sdt),hippomerge10s$prof,as.POSIXct(hippodt),method='constant',f=0)$y
# add strat flag
hippomerge$strat=rep(0,nrow(hippomerge)) # 0 means trop
h2oref=hippomerge$H2Oppmv_vxl; h2oref[is.na(h2oref)]=hippomerge$H2O_UWV[is.na(h2oref)]
hippomerge$h2oref=h2oref # for output
h2oref[is.na(h2oref)]=0 # if H2O missing treat as if potentially strat
n2oref=hippomerge$N2O_QCLS
n2oref=n2oref-(approx(glbn2o$year+0.5,glbn2o$n2o,hippomerge$Year+hippomerge$DOY/365)$y-glbn2o$n2o[glbn2o$year==2009])
hippomerge$n2oref=n2oref # for output
n2oref[is.na(n2oref)]=400 # if N2O missing do not use for filter
o3ref=hippomerge$O3_ppb; o3ref[is.na(o3ref)]=hippomerge$O3_UO3[is.na(o3ref)]
hippomerge$o3ref=o3ref # for output
o3ref[is.na(o3ref)]=0 # if O3 missing do not use for filter
hippomerge$strat[h2oref<stratcoh2o&(o3ref>stratcoo3|n2oref<stratcon2o|(o3ref==0&n2oref==400))]=1 # if either o3 or n2o criteria are met, or if both are missing, consider strat
hippomerge$strat[h2oref==0&o3ref==0&n2oref==400&hippomerge$GGALT<8000]=0 # if all 3 missing assume < 8 km is trop
# select columns
colsel=c('Year','Month','Day','Hour','Min','Sec','H.no','flt','prof','GGLAT','GGLON','GGALT','PSXC','THETA','CO2_MED','CO2_QCLS','CO2_OMS','CO2_AO2','strat','h2oref','n2oref','o3ref')
hippomerge=hippomerge[,is.element(colnames(hippomerge),colsel)]
hippomerge=hippomerge[,match(colsel,names(hippomerge))] # reorder
names(hippomerge)=c('year','mon','day','hour','min','sec','camp','flt','prof','lat','lon','alt','pressure','theta','co2','co2qcls','co2oms','co2ao2','strat','h2oref','n2oref','o3ref') ## 'co2' = CO2_MED
read in ORCAS file, calc strat flag, and subset¶
# read and add time variables
orcasmerge=read.table(orcasmergefile,header=T)
orcasdt=strptime(paste(orcasmerge[,"Year"],orcasmerge[,"DOY"]),format='%Y %j',tz='UTC')+orcasmerge[,"UTC"] # DOY is day of year of takeoff; UTC is seconds since midnight on day of takeoff
orcasmerge$Month=as.POSIXlt(orcasdt)$mon+1
orcasmerge$Day=as.POSIXlt(orcasdt)$mday
orcasmerge$Hour=as.POSIXlt(orcasdt)$hour
orcasmerge$Min=as.POSIXlt(orcasdt)$min
orcasmerge$Sec=as.POSIXlt(orcasdt)$sec
# filter in situ records for overlap ### add for HIPPO and ATom?
orcasmerge$CO2_NOAA[orcasmerge$wt.CO2_NOAA<0.5]=NA
orcasmerge$CO2_QCLS[orcasmerge$wt.CO2_QCLS<0.5]=NA
orcasmerge$CO2_AO2[orcasmerge$wt.CO2_AO2<0.5]=NA
orcasmerge$CO2.X[orcasmerge$wt.CO2.X<0.5]=NA
# add strat flag
orcasmerge$strat=rep(0,nrow(orcasmerge)) # 0 means trop
h2oref=orcasmerge$VMR_VXL; h2oref[is.na(h2oref)]=orcasmerge$H2O_NOAA[is.na(h2oref)]
orcasmerge$h2oref=h2oref # for output
h2oref[is.na(h2oref)]=0 # if H2O missing treat as if potentially strat
n2oref=orcasmerge$N2O_QCLS
n2oref=n2oref-(approx(glbn2o$year+0.5,glbn2o$n2o,orcasmerge$Year+orcasmerge$DOY/365)$y-glbn2o$n2o[glbn2o$year==2009])
orcasmerge$n2oref=n2oref # for output
n2oref[is.na(n2oref)]=400 # if N2O missing do not use for filter
orcasmerge$strat[h2oref<stratcoh2o&(n2oref<stratcon2o|n2oref==400)]=1 # no O3, if n2o criteria is met or missing, consider strat
orcasmerge$strat[h2oref==0&n2oref==400&orcasmerge$G_ALT<8000]=0 # if both missing assume < 8 km is trop
# select columns
colsel=c('Year','Month','Day','Hour','Min','Sec','flt','n.prof','GGLAT','GGLON','GGALT','PSXC','THETA','CO2_MED','CO2.X','CO2_QCLS','CO2_NOAA','CO2_AO2','strat','h2oref','n2oref')
orcasmerge=orcasmerge[,is.element(colnames(orcasmerge),colsel)]
orcasmerge=orcasmerge[,match(colsel,names(orcasmerge))] # reorder
names(orcasmerge)=c('year','mon','day','hour','min','sec','flt','prof','lat','lon','alt','pressure','theta','co2','co2x','co2qcls','co2noaa','co2ao2','strat','h2oref','n2oref') ## 'co2' = CO2_MED
read in ATom files, calc strat flag, and subset¶
# read and add time variables
atomvar=c('time','Flight_Date','DLH-H2O/H2O_ppmv','UCATS-H2O/H2O_UWV','QCLS-CH4-CO-N2O/N2O_QCLS','NOyO3-O3/O3_CL','UCATS-O3/O3_UO3','MMS/G_ALT','RF','prof.no','MMS/P','MMS/POT','MMS/G_LAT','MMS/G_LONG',
'MMS/G_ALT','MEDUSA/CO2_MED','NOAA-Picarro/CO2_NOAA','QCLS-CO2/CO2_QCLS','AO2/CO2_AO2','NOAA-Picarro/CH4_NOAA','QCLS-CH4-CO-N2O/CH4_QCLS')
atommerge=NULL
for(i in c(1:4)){
atomnc=nc_open(paste(atommergedir,'/',atommergefiles[i],sep=''))
count=length(ncvar_get(atomnc,'time'))
campdata=NULL
for(var in atomvar){
if(i==1&var=='UCATS-H2O/H2O_UWV'){ # no UCATS H2O on ATom-1
campdata=cbind(campdata,rep(NA,count))
} else {
campdata=cbind(campdata,ncvar_get(atomnc,var))
}
}
campdata=cbind(campdata,rep(i,count)) # A.no
nc_close(atomnc)
atommerge=rbind(atommerge,campdata)
}
atommerge=data.frame(atommerge,stringsAsFactors=F)
names(atommerge)=c(gsub('.*/','',atomvar),'A.no')
atommerge$YYYYMMDD=atommerge$Flight_Date
atomdt=as.POSIXlt(ISOdatetime(2016,1,1,0,0,0,tz='UTC')+atommerge$time,tz='UTC')
atommerge$Year=atomdt$year+1900
atommerge$Month=as.POSIXlt(atomdt)$mon+1
atommerge$Day=as.POSIXlt(atomdt)$mday
atommerge$Hour=as.POSIXlt(atomdt)$hour
atommerge$Min=as.POSIXlt(atomdt)$min
atommerge$Sec=as.POSIXlt(atomdt)$sec
# filter in situ records for overlap
atommerge$CO2_NOAA[atommerge$wt.CO2_NOAA<0.5]=NA
atommerge$CO2_QCLS[atommerge$wt.CO2_QCLS<0.5]=NA
atommerge$CO2_AO2[atommerge$wt.CO2_AO2<0.5]=NA
# add strat flag
atommerge$strat=rep(0,nrow(atommerge)) # 0 means trop
h2oref=atommerge$H2O_ppmv; h2oref[is.na(h2oref)]=atommerge$H2O_UWV[is.na(h2oref)]
atommerge$h2oref=h2oref # for output
h2oref[is.na(h2oref)]=0 # if H2O missing treat as if potentially strat
n2oref=atommerge$N2O_QCLS
n2oref=n2oref-(approx(glbn2o$year+0.5,glbn2o$n2o,atommerge$Year+atomdt$yday/365)$y-glbn2o$n2o[glbn2o$year==2009])
atommerge$n2oref=n2oref # for outptut
n2oref[is.na(n2oref)]=400 # if N2O missing do not use for filter
o3ref=atommerge$O3_CL; o3ref[is.na(o3ref)]=atommerge$O3_UO3[is.na(o3ref)]
atommerge$o3ref=o3ref
o3ref[is.na(o3ref)]=0 # if O3 missing do not use for filter
atommerge$strat[h2oref<stratcoh2o&(o3ref>stratcoo3|n2oref<stratcon2o|(o3ref==0&n2oref==400))]=1 # if either o3 or n2o criteria are met, or if both are missing, consider strat
atommerge$strat[h2oref==0&o3ref==0&n2oref==400&atommerge$G_ALT<8000]=0 # if all 3 missing assume < 8 km is trop
# select column
colsel=c('Year','Month','Day','Hour','Min','Sec','A.no','RF','prof.no','G_LAT','G_LONG','G_ALT','P','POT','CO2_MED','CO2_NOAA','CO2_QCLS','CO2_AO2','CH4_NOAA','CH4_QCLS','strat','h2oref','n2oref','o3ref')
atommerge=atommerge[,is.element(colnames(atommerge),colsel)]
atommerge=atommerge[,match(colsel,names(atommerge))] # reorder
names(atommerge)=c('year','mon','day','hour','min','sec','camp','flt','prof','lat','lon','alt','pressure','theta','co2','co2noaa','co2qcls','co2ao2','ch4noaa','ch4qcls','strat','h2oref','n2oref','o3ref') ## 'co2' = CO2_MED
Filter airborne Medusa data and subtract SPO¶
# read in NOAA in situ record from SPO
sponc=nc_open(paste(project_tmpdir_obs,'/obspack_co2_1_GLOBALVIEWplus_v6.0_2020-09-11/data/nc/co2_spo_surface-insitu_1_allvalid.nc',sep=''))
spoco2=data.frame(cbind(ncvar_get(sponc,'time_decimal'),t(ncvar_get(sponc,'time_components')),ncvar_get(sponc,'value')*1E6)) ; colnames(spoco2)=c('date','year','mon','day','hour','min','sec','co2')
qcflag=ncvar_get(sponc,'qcflag'); spoco2$co2[substr(qcflag,1,1)!='.']=NA; spoco2$co2[substr(qcflag,2,2)!='.']=NA
spodt=ISOdatetime(spoco2$year,spoco2$mon,spoco2$day,spoco2$hour,spoco2$min,spoco2$sec,tz='UTC')
HIPPO¶
# filter
ints=read.table(paste(project_tmpdir_obs,'/hippo_xsect_filt_datetime.txt',sep=''),header=T)
startdt=ISOdatetime(ints$startyear,ints$startmon,ints$startday,ints$starthour,ints$startmin,ints$startsec,tz='UTC')
stopdt=ISOdatetime(ints$stopyear,ints$stopmon,ints$stopday,ints$stophour,ints$stopmin,ints$stopsec,tz='UTC')
blfilt=rep(T,nrow(hippomerge))
for(i in c(1:nrow(ints))){
blfilt[difftime(hippodt,startdt[i])>=0&difftime(hippodt,stopdt[i])<=0]=F
}
hippodt=hippodt[blfilt]
hippomerge=hippomerge[blfilt,]
print(paste('Filtered ',sum(!blfilt),' of ',length(blfilt),' HIPPO obs (',round(sum(!blfilt)/length(blfilt)*100,1),'%)',sep=''))
[1] "Filtered 12 of 1690 HIPPO obs (0.7%)"
# calculate differences
hippomerge$co2mspo=round(hippomerge$co2-approx(as.POSIXct(spodt),spoco2$co2,as.POSIXct(hippodt))$y,3) ## co2 = 'CO2_MED'
hippomerge$co2mqcls=round(hippomerge$co2-hippomerge$co2qcls,3)
hippomerge$co2moms=round(hippomerge$co2-hippomerge$co2oms,3)
hippomerge$co2mao2=round(hippomerge$co2-hippomerge$co2ao2,3)
# write out
write(names(hippomerge),'../data/aircraft-obs/HIPPO_SO_mSPO_medusa.txt',ncol=ncol(hippomerge))
write(t(hippomerge),'../data/aircraft-obs/HIPPO_SO_mSPO_medusa.txt',ncol=ncol(hippomerge),append=T)
print(apply(!is.na(hippomerge),2,sum))
year mon day hour min sec camp flt
1678 1677 1677 1677 1677 1677 1678 1678
prof lat lon alt pressure theta co2 co2qcls
1677 1677 1677 1677 1677 1677 1558 1129
co2oms co2ao2 strat h2oref n2oref o3ref co2mspo co2mqcls
1167 1432 1678 1672 1542 1676 1558 1050
co2moms co2mao2
1090 1334
ORCAS¶
# filter
ints=read.table(paste(project_tmpdir_obs,'/orcas_xsect_filt_datetime.txt',sep=''),header=T)
startdt=ISOdatetime(ints$startyear,ints$startmon,ints$startday,ints$starthour,ints$startmin,ints$startsec,tz='UTC')
stopdt=ISOdatetime(ints$stopyear,ints$stopmon,ints$stopday,ints$stophour,ints$stopmin,ints$stopsec,tz='UTC')
blfilt=rep(T,nrow(orcasmerge))
for(i in c(1:nrow(ints))){
blfilt[difftime(orcasdt,startdt[i])>=0&difftime(orcasdt,stopdt[i])<=0]=F
}
orcasdt=orcasdt[blfilt]
orcasmerge=orcasmerge[blfilt,]
print(paste('Filtered ',sum(!blfilt),' of ',length(blfilt),' ORCAS obs (',round(sum(!blfilt)/length(blfilt)*100,1),'%)',sep=''))
[1] "Filtered 9 of 392 ORCAS obs (2.3%)"
# calculate differences
orcasmerge$co2mspo=round(orcasmerge$co2-approx(as.POSIXct(spodt),spoco2$co2,as.POSIXct(orcasdt))$y,2) ## co2 = 'CO2_MED'
orcasmerge$co2mqcls=round(orcasmerge$co2-orcasmerge$co2qcls,3)
orcasmerge$co2mx=round(orcasmerge$co2-orcasmerge$co2x,3)
orcasmerge$co2mnoaa=round(orcasmerge$co2-orcasmerge$co2noaa,3)
orcasmerge$co2mao2=round(orcasmerge$co2-orcasmerge$co2ao2,3)
# write out
write(names(orcasmerge),'../data/aircraft-obs/ORCAS_SO_mSPO_medusa.txt',ncol=ncol(orcasmerge))
write(t(orcasmerge),'../data/aircraft-obs/ORCAS_SO_mSPO_medusa.txt',ncol=ncol(orcasmerge),append=T)
print(apply(!is.na(orcasmerge),2,sum))
year mon day hour min sec flt prof
383 383 383 383 383 383 383 383
lat lon alt pressure theta co2 co2x co2qcls
383 383 383 383 383 340 361 162
co2noaa co2ao2 strat h2oref n2oref co2mspo co2mqcls co2mx
361 337 383 383 366 340 138 320
co2mnoaa co2mao2
320 299
ATom¶
# filter
ints=read.table(paste(project_tmpdir_obs,'/atom_xsect_filt_datetime.txt',sep=''),header=T)
startdt=ISOdatetime(ints$startyear,ints$startmon,ints$startday,ints$starthour,ints$startmin,ints$startsec,tz='UTC')
stopdt=ISOdatetime(ints$stopyear,ints$stopmon,ints$stopday,ints$stophour,ints$stopmin,ints$stopsec,tz='UTC')
blfilt=rep(T,nrow(atommerge))
for(i in c(1:nrow(ints))){
blfilt[difftime(atomdt,startdt[i])>=0&difftime(atomdt,stopdt[i])<=0]=F
}
atomdt=atomdt[blfilt]
atommerge=atommerge[blfilt,]
print(paste('Filtered ',sum(!blfilt),' of ',length(blfilt),' ATom obs (',round(sum(!blfilt)/length(blfilt)*100,1),'%)',sep=''))
[1] "Filtered 29 of 1370 ATom obs (2.1%)"
# calculate differences
atommerge$co2mspo=round(atommerge$co2-approx(as.POSIXct(spodt),spoco2$co2,as.POSIXct(atomdt))$y,2) ## co2 = 'CO2_MED'
atommerge$co2mqcls=round(atommerge$co2-atommerge$co2qcls,3)
atommerge$co2mao2=round(atommerge$co2-atommerge$co2ao2,3)
atommerge$co2mnoaa=round(atommerge$co2-atommerge$co2noaa,3)
# write out
write(names(atommerge),'../data/aircraft-obs/ATOM_SO_mSPO_medusa.txt',ncol=ncol(atommerge))
write(t(atommerge),'../data/aircraft-obs/ATOM_SO_mSPO_medusa.txt',ncol=ncol(atommerge),append=T)
print(apply(!is.na(atommerge),2,sum))
year mon day hour min sec camp flt
1341 1341 1341 1341 1341 1341 1341 1341
prof lat lon alt pressure theta co2 co2noaa
1341 1341 1341 1341 1341 1341 1241 1279
co2qcls co2ao2 ch4noaa ch4qcls strat h2oref n2oref o3ref
1279 1314 1336 962 1341 1032 958 1337
co2mspo co2mqcls co2mao2 co2mnoaa
1241 1188 1221 1184