Interpolate and fill SeaWIFS data#

Adapted from: https://github.com/adcroft/interp_and_fill/blob/master/Interpolate and fill SeaWIFS.ipynb

import numpy 
import netCDF4
import scipy.sparse
import scipy.sparse.linalg
import matplotlib.pyplot as plt
%matplotlib inline

Read MOM6 grid and mask#

def MOM6grid(supergrid):
  '''
  Reads supergrid and returns an object with grid metrics
  '''
  # MOM6 supergrid file
  nc_sgrd = netCDF4.Dataset(supergrid)
  x = nc_sgrd.variables['x'][:]
  y = nc_sgrd.variables['y'][:]
  dx = nc_sgrd.variables['dx'][:]
  dy = nc_sgrd.variables['dy'][:]
  area = nc_sgrd.variables['area'][:]
  angle_dx = nc_sgrd.variables['angle_dx'][:]
  nc_sgrd.close()
  #################### Model grid ####################
  # T point locations
  tlon = x[1::2,1::2]
  tlat = y[1::2,1::2]
  # U point locations
  ulon = x[1::2,::2]
  ulat = y[1::2,::2]
  # V point locations
  vlon = x[::2,1::2]
  vlat = y[::2,1::2]
  # Corner point locations
  qlon = x[::2,::2]
  qlat = y[::2,::2]
  # T cell area (sum of four supergrid cells)
  tarea = area[::2,::2] + area[1::2,1::2] + area[::2,1::2] + area[::2,1::2]
  # t-point angle
  angle = angle_dx[1::2,1::2]
  # x-distance between u-points, centered at t
  dxt = dx[1::2,::2] + dx[1::2,1::2]
  # y-distance between v-points, centered at t
  dyt = dy[::2,1::2] + dy[1::2,1::2]
  # x-distance between  q-points, centered at v
  dxCv = dx[2::2,::2] + dx[2::2,1::2]
  # y-distance between  q-points, centered at u
  dyCu = dy[::2,2::2] + dy[1::2,2::2]
  # grid aspect ratio
  ar = dyt / dxt
  # grid effective grid spacing
  # A = 4*pi*r^2 , area of sphere of radius r
  # dA = (r*cos(theta)*dlambda)*(r*dtheta), differential area on sphere
  #    = r^2*domega
  # domega = dA/r^2, differential solid angle  (steradians, sr)
  # 1 sr = (180./pi)^2 square degrees
  costheta = numpy.cos(tlat*numpy.pi/180.)
  rearth = 637122000 # Earth radius in centimeter
  domega = tarea / rearth**2
  egs  = numpy.sqrt(domega * (180./numpy.pi)**2)

  # create an empty class object
  class MOM6_grd:
    pass

  # fill grid object
  MOM6_grd.tlon = tlon
  MOM6_grd.tlat = tlat
  MOM6_grd.ulon = ulon
  MOM6_grd.ulat = ulat
  MOM6_grd.vlon = vlon
  MOM6_grd.vlat = vlat
  MOM6_grd.qlon = qlon
  MOM6_grd.qlat = qlat
  MOM6_grd.dxt = dxt
  MOM6_grd.dyt = dyt
  MOM6_grd.dxCv = dxCv
  MOM6_grd.dyCu = dyCu
  MOM6_grd.angle = angle
  MOM6_grd.tarea = tarea
  MOM6_grd.L = tarea.shape[1]
  MOM6_grd.M = tarea.shape[0]
  print('MOM6 grid was successfully loaded... \n')
  return MOM6_grd
sg_path = '/glade/p/cesmdata/cseg/inputdata/ocn/mom/tx0.66v1/'
grd = MOM6grid('../supergrid/ORCA_gridgen/ocean_hgrid_trimmed.nc')
ocn_qlon = grd.qlon
ocn_qlat = grd.qlat
MOM6 grid was successfully loaded... 
ocn_mask = netCDF4.Dataset('../topography/topo.tx1_4v1.SRTM15_V2.4.SmL1.0_C1.0.nc').variables['mask'][:,:]
ocn_nj, ocn_ni = ocn_mask.shape
plt.pcolormesh(ocn_qlon, ocn_qlat, ocn_mask); plt.colorbar();
../_images/7a5ab518761fe40d0bfed4829d5b9bf790f7c07f53ccf00d23ec1bcd3b4ac0cb.png

Read SeaWIFS data#

src_nc = netCDF4.Dataset('ncfiles/SeaWIFS.L3m_MC_CHL_chlor_a_0.25deg.nc')
src_data = src_nc.variables['chlor_a']
src_nj, src_ni = src_data.shape[-2], src_data.shape[-1]
print('src shape = %i x %i'%(src_nj,src_ni))
src_lon = src_nc.variables[src_data.dimensions[-1]]
src_lat = src_nc.variables[src_data.dimensions[-2]]
src_lat = ((numpy.arange(src_nj)+0.5)/src_nj - 0.5)*180. # Recompute as doubles
src_x0 = int( ( src_lon[0] + src_lon[-1] )/2 + 0.5) - 180.
src_lon = ((numpy.arange(src_ni)+0.5)/src_ni)*360.+src_x0 # Recompute as doubles
src_qlat = ((numpy.arange(src_nj+1))/src_nj - 0.5)*180. # For plotting
src_qlon = ((numpy.arange(src_ni+1))/src_ni)*360.+src_x0 # For plotting
src shape = 720 x 1440
plt.pcolormesh(numpy.ma.log10(src_data[0,::-1,:])); plt.colorbar();
../_images/1cf3782132426f1b251e106e2ddd2bc867ca09754a033909b7e4e9040495eaad.png
def super_sample_grid(ocn_qlat, ocn_qlon, ocn_mask, src_nj, src_ni):
    nj, ni = ocn_mask.shape
    fac = 1
    while fac*nj<src_nj and fac*ni<src_ni:
        fac += 1
    lon = numpy.zeros( (nj,fac,ni,fac) )
    lat = numpy.zeros( (nj,fac,ni,fac) )
    mask = numpy.zeros( (nj,fac,ni,fac) )
    for j in range(fac):
        ya = ( 2*j+1 ) / ( 2*fac )
        yb = 1. - ya
        for i in range(fac):
            xa = ( 2*i+1 ) / ( 2*fac )
            xb = 1. - xa
            lon[:,j,:,i] = (  yb * ( xb * ocn_qlon[:-1,:-1] + xa * ocn_qlon[:-1,1:] )
                            + ya * ( xb * ocn_qlon[1:,:-1] + xa * ocn_qlon[1:,1:] ) )
            lat[:,j,:,i] = (  yb * ( xb * ocn_qlat[:-1,:-1] + xa * ocn_qlat[:-1,1:] )
                            + ya * ( xb * ocn_qlat[1:,:-1] + xa * ocn_qlat[1:,1:] ) )
    return lat, lon
spr_lat, spr_lon = super_sample_grid(ocn_qlat, ocn_qlon, ocn_mask, src_nj, src_ni)
def latlon2ji(src_lat, src_lon, lat, lon):
    nj, ni = len(src_lat), len(src_lon)
    src_x0 = int( ( src_lon[0] + src_lon[-1] )/2 + 0.5) - 180.
    j = numpy.maximum(0, numpy.floor( ( ( lat + 90. ) / 180. ) * nj - 0.5 ).astype(int))
    i = numpy.mod( numpy.floor( ( ( lon - src_x0 ) / 360. ) * ni - 0.5 ), ni ).astype(int)
    jp1 = numpy.minimum(nj-1, j+1)
    ip1 = numpy.mod(i+1, ni)
    return j,i,jp1,ip1
y, x = src_lat[0], src_lon[0]
latlon2ji(src_lat, src_lon, y, x)
(0, 0, 1, 1)
def super_interp(src_lat, src_lon, data, spr_lat, spr_lon):
    nj, ni = data.shape
    dy, dx = 180./nj, 360./ni
    j0, i0, j1, i1 = latlon2ji(src_lat, src_lon, spr_lat, spr_lon)
    def ydist(lat0, lat1):
        return numpy.abs( lat1-lat0 )
    def xdist(lon0, lon1):
        return numpy.abs( numpy.mod((lon1-lon0)+180, 360) - 180 )
    w_e = xdist( src_lon[i0], spr_lon) / dx
    w_w = 1. - w_e
    w_n = ydist( src_lat[j0], spr_lat) / dy
    w_s = 1. - w_n
    return ( w_s*w_w * data[j0,i0] + w_n*w_e * data[j1,i1] ) + ( w_n*w_w * data[j1,i0] + w_s*w_e * data[j0,i1] )
def fill_missing_data(idata, mask, verbose=True, maxiter=0, debug=False, stabilizer=1.e-14):
    """
    Returns data with masked values objectively interpolated except where mask==0.
    
    Arguments:
    data - numpy.ma.array with mask==True where there is missing data or land.
    mask - numpy.array of 0 or 1, 0 for land, 1 for ocean.
    
    Returns a numpy.ma.array.
    """
    nj,ni = idata.shape
    fdata = idata.filled(0.) # Working with an ndarray is faster than working with a masked array
    if debug:
        plt.figure(); plt.pcolormesh(mask); plt.title('mask'); plt.colorbar();
        #plt.figure(); plt.pcolormesh(idata.mask); plt.title('idata.mask'); plt.colorbar();
        plt.figure(); plt.pcolormesh(idata); plt.title('idata'); plt.colorbar();
        plt.figure(); plt.pcolormesh(idata.filled(3.)); plt.title('idata.filled'); plt.colorbar();
        plt.figure(); plt.pcolormesh(idata.filled(3.)); plt.title('fdata'); plt.colorbar();
    missing_j, missing_i = numpy.where( idata.mask & (mask>0) )
    n_missing = missing_i.size
    if verbose:
        print('Data shape: %i x %i = %i with %i missing values'%(nj, ni, nj*ni, numpy.count_nonzero(idata.mask)))
        print('Mask shape: %i x %i = %i with %i land cells'%(mask.shape[0], mask.shape[1],
                                                                 numpy.prod(mask.shape), numpy.count_nonzero(1-mask)))
        print('Data has %i missing values in ocean'%(n_missing))
        print('Data range: %g .. %g '%(idata.min(),idata.max()))
    # ind contains column of matrix/row of vector corresponding to point [j,i]
    ind = numpy.zeros( fdata.shape, dtype=int ) - int(1e6)
    ind[missing_j,missing_i] = numpy.arange( n_missing )
    if verbose: print('Building matrix')
    A = scipy.sparse.lil_matrix( (n_missing, n_missing) )
    b = numpy.zeros( (n_missing) )
    ld = numpy.zeros( (n_missing) )
    A[range(n_missing),range(n_missing)] = 0.
    if verbose: print('Looping over cells')
    for n in range(n_missing):
        j,i = missing_j[n],missing_i[n]
        im1 = ( i + ni - 1 ) % ni
        ip1 = ( i + 1 ) % ni
        jm1 = max( j-1, 0)
        jp1 = min( j+1, nj-1)
        if j>0 and mask[jm1,i]>0:
            ld[n] -= 1.
            ij = ind[jm1,i]
            if ij>=0:
                A[n,ij] = 1.
            else:
                b[n] -= fdata[jm1,i]
        if mask[j,im1]>0:
            ld[n] -= 1.
            ij = ind[j,im1]
            if ij>=0:
                A[n,ij] = 1.
            else:
                b[n] -= fdata[j,im1]
        if mask[j,ip1]>0:
            ld[n] -= 1.
            ij = ind[j,ip1]
            if ij>=0:
                A[n,ij] = 1.
            else:
                b[n] -= fdata[j,ip1]
        if j<nj-1 and mask[jp1,i]>0:
            ld[n] -= 1.
            ij = ind[jp1,i]
            if ij>=0:
                A[n,ij] = 1.
            else:
                b[n] -= fdata[jp1,i]
        if j==nj-1 and mask[j,ni-1-i]>0: # Tri-polar fold
            ld[n] -= 1.
            ij = ind[j,ni-1-i]
            if ij>=0:
                A[n,ij] = 1.
            else:
                b[n] -= fdata[j,ni-1-i]
    if debug:
        tmp = numpy.zeros((nj,ni)); tmp[ missing_j, missing_i ] = b
        plt.figure(); plt.pcolormesh(tmp); plt.title('b (initial)'); plt.colorbar();
    # Set leading diagonal
    b[ld>=0] = 0.
    A[range(n_missing),range(n_missing)] = ld - stabilizer
    if debug:
        tmp = numpy.zeros((nj,ni)); tmp[ missing_j, missing_i ] = b
        plt.figure(); plt.pcolormesh(tmp); plt.title('b (final)'); plt.colorbar();
        tmp = numpy.ones((nj,ni)); tmp[ missing_j, missing_i ] = A.diagonal()
        plt.figure(); plt.pcolormesh(tmp); plt.title('A[i,i]'); plt.colorbar();
    if verbose: print('Matrix constructed')
    A = scipy.sparse.csr_matrix(A)
    if verbose: print('Matrix converted')
    new_data = numpy.ma.array( fdata, mask=(mask==0))
    if maxiter is None:
        x,info = scipy.sparse.linalg.bicg(A, b)
    elif maxiter==0:
        x = scipy.sparse.linalg.spsolve(A, b)
    else:
        x,info = scipy.sparse.linalg.bicg(A, b, maxiter=maxiter)
    if verbose: print('Matrix inverted')
    new_data[missing_j,missing_i] = x
    return new_data

Read Chla dataset for tx1_4v1#

chla_tx1_4 = netCDF4.Dataset('seawifs-clim-1997-2010-tx1_4v1.nc', 'r+')
chlor_a = chla_tx1_4.variables['CHL_A'][:]

Main loop#

for t in range(src_data.shape[0]):
#for t in range(1):
    q_int = super_interp(src_lat, src_lon, src_data[t,::-1,:], spr_lat, spr_lon)
    q_int = q_int.swapaxes(1,2).reshape((ocn_nj,ocn_ni,q_int.shape[3]*q_int.shape[-1])).mean(axis=-1)
    q = q_int * ocn_mask
    chlor_a[t,:] = fill_missing_data(q, ocn_mask)
Data shape: 1080 x 1440 = 1555200 with 803218 missing values
Mask shape: 1080 x 1440 = 1555200 with 649598 land cells
Data has 155224 missing values in ocean
Data range: 0 .. 66.1113 
Building matrix
Looping over cells
Matrix constructed
Matrix converted
Matrix inverted
Data shape: 1080 x 1440 = 1555200 with 768968 missing values
Mask shape: 1080 x 1440 = 1555200 with 649598 land cells
Data has 121156 missing values in ocean
Data range: 0 .. 52.6533 
Building matrix
Looping over cells
Matrix constructed
Matrix converted
Matrix inverted
Data shape: 1080 x 1440 = 1555200 with 768946 missing values
Mask shape: 1080 x 1440 = 1555200 with 649598 land cells
Data has 121117 missing values in ocean
Data range: 0 .. 29.4216 
Building matrix
Looping over cells
Matrix constructed
Matrix converted
Matrix inverted
Data shape: 1080 x 1440 = 1555200 with 850443 missing values
Mask shape: 1080 x 1440 = 1555200 with 649598 land cells
Data has 202503 missing values in ocean
Data range: 0 .. 34.7537 
Building matrix
Looping over cells
Matrix constructed
Matrix converted
Matrix inverted
Data shape: 1080 x 1440 = 1555200 with 954092 missing values
Mask shape: 1080 x 1440 = 1555200 with 649598 land cells
Data has 306176 missing values in ocean
Data range: 0 .. 43.9314 
Building matrix
Looping over cells
Matrix constructed
Matrix converted
Matrix inverted
Data shape: 1080 x 1440 = 1555200 with 996172 missing values
Mask shape: 1080 x 1440 = 1555200 with 649598 land cells
Data has 348495 missing values in ocean
Data range: 0 .. 36.3215 
Building matrix
Looping over cells
Matrix constructed
Matrix converted
Matrix inverted
Data shape: 1080 x 1440 = 1555200 with 955845 missing values
Mask shape: 1080 x 1440 = 1555200 with 649598 land cells
Data has 308575 missing values in ocean
Data range: 0 .. 60.6444 
Building matrix
Looping over cells
Matrix constructed
Matrix converted
Matrix inverted
Data shape: 1080 x 1440 = 1555200 with 861897 missing values
Mask shape: 1080 x 1440 = 1555200 with 649598 land cells
Data has 215037 missing values in ocean
Data range: 0 .. 54.5344 
Building matrix
Looping over cells
Matrix constructed
Matrix converted
Matrix inverted
Data shape: 1080 x 1440 = 1555200 with 838740 missing values
Mask shape: 1080 x 1440 = 1555200 with 649598 land cells
Data has 191652 missing values in ocean
Data range: 0 .. 56.4415 
Building matrix
Looping over cells
Matrix constructed
Matrix converted
Matrix inverted
Data shape: 1080 x 1440 = 1555200 with 869961 missing values
Mask shape: 1080 x 1440 = 1555200 with 649598 land cells
Data has 222136 missing values in ocean
Data range: 0 .. 64.9256 
Building matrix
Looping over cells
Matrix constructed
Matrix converted
Matrix inverted
Data shape: 1080 x 1440 = 1555200 with 877629 missing values
Mask shape: 1080 x 1440 = 1555200 with 649598 land cells
Data has 229577 missing values in ocean
Data range: 0 .. 26.5329 
Building matrix
Looping over cells
Matrix constructed
Matrix converted
Matrix inverted
Data shape: 1080 x 1440 = 1555200 with 842986 missing values
Mask shape: 1080 x 1440 = 1555200 with 649598 land cells
Data has 194922 missing values in ocean
Data range: 0 .. 41.0065 
Building matrix
Looping over cells
Matrix constructed
Matrix converted
Matrix inverted

Compare result against original data#

for t in range(src_data.shape[0]):
  fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(18,5.))
  plt.suptitle('Month # {}'.format(t+1))
  ax[0].pcolormesh(grd.qlon, grd.qlat,numpy.ma.log10(chlor_a[t,:]),vmin=-2,vmax=2)
  ax[0].set_title('chlor_a remapped to tx1_4v1')
    
  ax[1].pcolormesh(numpy.ma.log10(src_data[t,::-1,:]),vmin=-2,vmax=2)
  ax[1].set_title('original data')
../_images/522ea333f409f02dd4f235caef30d96efbd711dcceb2bbf362b2626aadb62c87.png ../_images/a563013593710d1f11eb1b8cdbbfab332ab6244d2daf8845c1007267ec5566cc.png ../_images/bc9f7f2bef69fe10d7c785d89f17ba834c6f99f7d537f706e6e4cc6a02393134.png ../_images/4bdd5c048ff80300c3079a6b9ebca192e9dea2ff3d0cfbc5fd02f6092b9fce62.png ../_images/b5b1b6348c80cac2de8e12a1683467f604d20a5117bc87d6d3e21331da0e1dfe.png ../_images/c1cdab30bf50d3ab7618ab0eef1f920a9ed1115fadeb127f88687fa1375a8000.png ../_images/aeb293267d1165a5d1565e5e619aaeaff2be76628152aeb3e0c5baa4ab299b8b.png ../_images/1f74098535ac068437aa1df76289e1ae50358db953d1dcf8e7e498ce297e2ac5.png ../_images/9514cbad3ab9bfba52e9094def560fe30961c7fdbc7ea8a835320a04ed8c33d0.png ../_images/c1ed434210e3a37c9f8d6ded93f10eb19d43b9f55f66ac6f68ea70f82f3a1151.png ../_images/fde18202f55e2963eca57d5116e13f0dc019ee92b41f9cecdae53402cfab2847.png ../_images/c6a99be24cba3d7668179a6aae56c570778159d638f9f01b2f2569de1895cd5d.png

Save data#

from datetime import datetime
# Global attrs
chla_tx1_4.title = 'Chlorophyll Concentration, OCI Algorithm, interpolated and objectivelly filled to tx1_4v1'
chla_tx1_4.repository = 'https://github.com/NCAR/tx1_4/chlorophyll/'
chla_tx1_4.authors = 'Gustavo Marques (gmarques@ucar.edu) and Frank Bryan (bryan@ucar.edu )'
chla_tx1_4.date = datetime.now().isoformat()
jm, im = numpy.shape(grd.tlon)
chla_tx1_4.variables['CHL_A'][:,:,:] = chlor_a[:,:,:]
chla_tx1_4.variables['LON'][:] = grd.tlon[int(jm/2),:]
chla_tx1_4.variables['LAT'][:] = grd.tlat[:,int(im/2)]
chla_tx1_4.sync()
chla_tx1_4.close()