Use the GeoJSON file to get the Area of Interest and query the STAC catalog for items
# Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
# import s3fs
import re
from pelicanfs.core import PelicanFileSystem, PelicanMap,OSDFFileSystem
# import fsspec.implementations.http as fshttp
import geopandas as gpd
from pystac_client import Client
# from odc.stac import stac_load
from rasterio.mask import mask
import rasterio
from shapely.geometry import box
from urllib.parse import urlparse
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client as dask_client
from dask.distributed import performance_report
Use the GeoJSON file to get the Area of Interest and query the STAC catalog for items¶
# Load the GeoJSON file
geojson_path = '3100180240.geojson'
gdf = gpd.read_file(geojson_path)
# Display the loaded GeoDataFrame
print(gdf)
# Connect to the Earth Search STAC API (Sentinel-2 Level-2A COGs are available here)
catalog_url = "https://earth-search.aws.element84.com/v1"
catalog = Client.open(catalog_url)
# Define the date range as strings
start_date = "2019-01"
end_date = "2023-02"
# Define cloud cover threshold
cloud_cover_max = 0.05 # 5% cloud cover threshold
#cloud_cover_max = 0.20 # 20% cloud cover threshold
# Perform the search
search = catalog.search(
collections=["sentinel-2-l2a"],
bbox=aoi_bounds,
datetime=f"{start_date}/{end_date}",
#datetime="2022-06-01/2022-09-30",
query={"eo:cloud_cover": {"lt": cloud_cover_max * 100}}
)
# Get all matching items
items = list(search.items())
print(f"Found {len(items)} matching items.")
Found 258 matching items.
# Confirm that the matching items are hosted in a aws-us-region by printing out the href links for the first n items
n=1
for i, item in enumerate(items[:n]):
print(f"\nItem {i+1}: {item.id}")
for asset_key, asset in item.assets.items():
print(f" {asset_key}: {asset.href}")
def clp(image_src, aoi_geometry):
"""
Clip a raster image to the Area of Interest (AOI).
Parameters:
- image_src: Open rasterio dataset.
- aoi_geometry: AOI geometry as a GeoJSON-like object.
Returns:
- Clipped image array and updated metadata.
"""
# out_image, out_transform = mask(image_src, [aoi_geometry], crop=True)
out_image, out_transform = mask(image_src, aoi_geometry, crop=True)
out_meta = image_src.meta.copy()
out_meta.update({
"driver": "GTiff",
"height": out_image.shape[1],
"width": out_image.shape[2],
"transform": out_transform
})
return out_image, out_meta
def maskWater(image, water_mask):
"""
Masks out water pixels using the MODIS water mask.
Parameters:
- image: Raster image to mask.
- water_mask: Water mask raster.
Returns:
- Water-masked image.
"""
water = water_mask.read(1) # Read the water mask
mask = water < 1 # Mask water pixels (water < 1)
image_masked = np.where(mask, image, np.nan)
return image_masked
def maskS2snow(image, snow_prob):
"""
Masks snow pixels using the MSK_SNWPRB (Snow Probability Mask).
Parameters:
- image: Raster image to mask.
- snow_prob: Snow probability raster.
Returns:
- Snow-masked image.
"""
snow = snow_prob.read(1) # Read the snow probability mask
mask = snow < 0.009 # Mask snow pixels (snow probability < 0.9%)
image_masked = np.where(mask, image, np.nan)
return image_masked
def maskWhite(image, b2, b3, b4):
"""
Masks white pixels by converting RGB to grayscale and applying a threshold.
Parameters:
- image: Raster image to mask.
- b2, b3, b4: Blue, Green, and Red bands respectively.
Returns:
- Grayscale-masked image.
"""
# Convert RGB to grayscale
grayscale = (0.3 * b4.read(1) + 0.59 * b3.read(1) + 0.11 * b2.read(1)) * 1e4
mask = grayscale <= 2000 # Mask white pixels (grayscale > 2000)
image_masked = np.where(mask, image, np.nan)
return image_masked
%%time
# Loop through each item in the STAC query
for idx, item in enumerate(items, start=1):
print(f"Processing dataset #{idx}")
# Check required assets
required_assets = ["red", "green", "blue", "scl"]
if not all(asset in item.assets for asset in required_assets):
print(f"Skipping dataset #{idx}: Missing required assets.")
continue
# Get asset URLs
red_url = item.assets["red"].href
green_url = item.assets["green"].href
blue_url = item.assets["blue"].href
scl_url = item.assets["scl"].href
# Reproject AOI to match raster CRS
with rasterio.open(red_url) as src:
raster_crs = CRS(src.crs)
aoi_geometry_reprojected = GeoSeries(aoi_geometry).set_crs(gdf.crs).to_crs(raster_crs)
# Open and clip bands
with rasterio.open(red_url) as red_src, \
rasterio.open(green_url) as green_src, \
rasterio.open(blue_url) as blue_src, \
rasterio.open(scl_url) as scl_src:
aoi_geometry_reprojected = GeoSeries(aoi_geometry).set_crs(gdf.crs).to_crs(raster_crs)
raster_bounds = box(*red_src.bounds)
if not aoi_geometry_reprojected[0].intersects(raster_bounds):
print(f"Warning: AOI does not intersect the bounds of {red_url}. Skipping.")
continue
# Clip all RGB and SCL bands to AOI
red_clipped, red_meta = clp(red_src, aoi_geometry_reprojected)
green_clipped, _ = clp(green_src, aoi_geometry_reprojected)
blue_clipped, _ = clp(blue_src, aoi_geometry_reprojected)
scl_clipped, _ = clp(scl_src, aoi_geometry_reprojected)
# # Clip water_mask if available
# if water_mask_available:
# with rasterio.open(water_mask_url) as water_mask_src:
# water_mask_clipped, _ = clp(water_mask_src, aoi_geometry_reprojected)
# Apply maskS2clouds
# red_masked = maskS2clouds(red_clipped, scl_clipped)
# green_masked = maskS2clouds(green_clipped, scl_clipped)
# blue_masked = maskS2clouds(blue_clipped, scl_clipped)
# # Apply maskWater if available
# if water_mask_available:
# red_masked = maskWater(red_masked, water_mask_clipped)
# green_masked = maskWater(green_masked, water_mask_clipped)
# blue_masked = maskWater(blue_masked, water_mask_clipped)
# # Apply maskWhite
# red_final = maskWhite(red_clipped, blue_clipped, green_clipped, red_clipped)
# green_final = maskWhite(green_clipped, blue_clipped, green_clipped, red_clipped)
# blue_final = maskWhite(blue_clipped, blue_clipped, green_clipped, red_clipped)
print(f"Dataset #{idx} processed. Ready for visualization or further analysis.")
Processing dataset #1
Dataset #1 processed. Ready for visualization or further analysis.
Processing dataset #2
Dataset #2 processed. Ready for visualization or further analysis.
Processing dataset #3
Dataset #3 processed. Ready for visualization or further analysis.
Processing dataset #4
Dataset #4 processed. Ready for visualization or further analysis.
Processing dataset #5
Dataset #5 processed. Ready for visualization or further analysis.
Processing dataset #6
Dataset #6 processed. Ready for visualization or further analysis.
Processing dataset #7
Dataset #7 processed. Ready for visualization or further analysis.
Processing dataset #8
Dataset #8 processed. Ready for visualization or further analysis.
Processing dataset #9
Dataset #9 processed. Ready for visualization or further analysis.
Processing dataset #10
Dataset #10 processed. Ready for visualization or further analysis.
Processing dataset #11
Dataset #11 processed. Ready for visualization or further analysis.
Processing dataset #12
Dataset #12 processed. Ready for visualization or further analysis.
Processing dataset #13
Dataset #13 processed. Ready for visualization or further analysis.
Processing dataset #14
Dataset #14 processed. Ready for visualization or further analysis.
Processing dataset #15
Dataset #15 processed. Ready for visualization or further analysis.
Processing dataset #16
Dataset #16 processed. Ready for visualization or further analysis.
Processing dataset #17
Dataset #17 processed. Ready for visualization or further analysis.
Processing dataset #18
Dataset #18 processed. Ready for visualization or further analysis.
Processing dataset #19
Dataset #19 processed. Ready for visualization or further analysis.
Processing dataset #20
Dataset #20 processed. Ready for visualization or further analysis.
Processing dataset #21
Dataset #21 processed. Ready for visualization or further analysis.
Processing dataset #22
Dataset #22 processed. Ready for visualization or further analysis.
Processing dataset #23
Dataset #23 processed. Ready for visualization or further analysis.
Processing dataset #24
Dataset #24 processed. Ready for visualization or further analysis.
Processing dataset #25
Dataset #25 processed. Ready for visualization or further analysis.
Processing dataset #26
Dataset #26 processed. Ready for visualization or further analysis.
Processing dataset #27
Dataset #27 processed. Ready for visualization or further analysis.
Processing dataset #28
Dataset #28 processed. Ready for visualization or further analysis.
Processing dataset #29
Dataset #29 processed. Ready for visualization or further analysis.
Processing dataset #30
Dataset #30 processed. Ready for visualization or further analysis.
Processing dataset #31
Dataset #31 processed. Ready for visualization or further analysis.
Processing dataset #32
Dataset #32 processed. Ready for visualization or further analysis.
Processing dataset #33
Dataset #33 processed. Ready for visualization or further analysis.
Processing dataset #34
Dataset #34 processed. Ready for visualization or further analysis.
Processing dataset #35
Dataset #35 processed. Ready for visualization or further analysis.
Processing dataset #36
Dataset #36 processed. Ready for visualization or further analysis.
Processing dataset #37
Dataset #37 processed. Ready for visualization or further analysis.
Processing dataset #38
Dataset #38 processed. Ready for visualization or further analysis.
Processing dataset #39
Dataset #39 processed. Ready for visualization or further analysis.
Processing dataset #40
Dataset #40 processed. Ready for visualization or further analysis.
Processing dataset #41
Dataset #41 processed. Ready for visualization or further analysis.
Processing dataset #42
Dataset #42 processed. Ready for visualization or further analysis.
Processing dataset #43
Dataset #43 processed. Ready for visualization or further analysis.
Processing dataset #44
Dataset #44 processed. Ready for visualization or further analysis.
Processing dataset #45
Dataset #45 processed. Ready for visualization or further analysis.
Processing dataset #46
Dataset #46 processed. Ready for visualization or further analysis.
Processing dataset #47
Dataset #47 processed. Ready for visualization or further analysis.
Processing dataset #48
Dataset #48 processed. Ready for visualization or further analysis.
Processing dataset #49
Dataset #49 processed. Ready for visualization or further analysis.
Processing dataset #50
Dataset #50 processed. Ready for visualization or further analysis.
Processing dataset #51
Dataset #51 processed. Ready for visualization or further analysis.
Processing dataset #52
Dataset #52 processed. Ready for visualization or further analysis.
Processing dataset #53
Dataset #53 processed. Ready for visualization or further analysis.
Processing dataset #54
Dataset #54 processed. Ready for visualization or further analysis.
Processing dataset #55
Dataset #55 processed. Ready for visualization or further analysis.
Processing dataset #56
Dataset #56 processed. Ready for visualization or further analysis.
Processing dataset #57
Dataset #57 processed. Ready for visualization or further analysis.
Processing dataset #58
Dataset #58 processed. Ready for visualization or further analysis.
Processing dataset #59
Dataset #59 processed. Ready for visualization or further analysis.
Processing dataset #60
Dataset #60 processed. Ready for visualization or further analysis.
Processing dataset #61
Dataset #61 processed. Ready for visualization or further analysis.
Processing dataset #62
Dataset #62 processed. Ready for visualization or further analysis.
Processing dataset #63
Dataset #63 processed. Ready for visualization or further analysis.
Processing dataset #64
Dataset #64 processed. Ready for visualization or further analysis.
Processing dataset #65
Dataset #65 processed. Ready for visualization or further analysis.
Processing dataset #66
Dataset #66 processed. Ready for visualization or further analysis.
Processing dataset #67
Dataset #67 processed. Ready for visualization or further analysis.
Processing dataset #68
Dataset #68 processed. Ready for visualization or further analysis.
Processing dataset #69
Dataset #69 processed. Ready for visualization or further analysis.
Processing dataset #70
Dataset #70 processed. Ready for visualization or further analysis.
Processing dataset #71
Dataset #71 processed. Ready for visualization or further analysis.
Processing dataset #72
Dataset #72 processed. Ready for visualization or further analysis.
Processing dataset #73
Dataset #73 processed. Ready for visualization or further analysis.
Processing dataset #74
Dataset #74 processed. Ready for visualization or further analysis.
Processing dataset #75
Dataset #75 processed. Ready for visualization or further analysis.
Processing dataset #76
Dataset #76 processed. Ready for visualization or further analysis.
Processing dataset #77
Dataset #77 processed. Ready for visualization or further analysis.
Processing dataset #78
Dataset #78 processed. Ready for visualization or further analysis.
Processing dataset #79
Dataset #79 processed. Ready for visualization or further analysis.
Processing dataset #80
Dataset #80 processed. Ready for visualization or further analysis.
Processing dataset #81
Dataset #81 processed. Ready for visualization or further analysis.
Processing dataset #82
Dataset #82 processed. Ready for visualization or further analysis.
Processing dataset #83
Dataset #83 processed. Ready for visualization or further analysis.
Processing dataset #84
Dataset #84 processed. Ready for visualization or further analysis.
Processing dataset #85
Dataset #85 processed. Ready for visualization or further analysis.
Processing dataset #86
Dataset #86 processed. Ready for visualization or further analysis.
Processing dataset #87
Dataset #87 processed. Ready for visualization or further analysis.
Processing dataset #88
Dataset #88 processed. Ready for visualization or further analysis.
Processing dataset #89
Dataset #89 processed. Ready for visualization or further analysis.
Processing dataset #90
Dataset #90 processed. Ready for visualization or further analysis.
Processing dataset #91
Dataset #91 processed. Ready for visualization or further analysis.
Processing dataset #92
Dataset #92 processed. Ready for visualization or further analysis.
Processing dataset #93
Dataset #93 processed. Ready for visualization or further analysis.
Processing dataset #94
Dataset #94 processed. Ready for visualization or further analysis.
Processing dataset #95
Dataset #95 processed. Ready for visualization or further analysis.
Processing dataset #96
Dataset #96 processed. Ready for visualization or further analysis.
Processing dataset #97
Dataset #97 processed. Ready for visualization or further analysis.
Processing dataset #98
Dataset #98 processed. Ready for visualization or further analysis.
Processing dataset #99
Dataset #99 processed. Ready for visualization or further analysis.
Processing dataset #100
Dataset #100 processed. Ready for visualization or further analysis.
Processing dataset #101
Dataset #101 processed. Ready for visualization or further analysis.
Processing dataset #102
Dataset #102 processed. Ready for visualization or further analysis.
Processing dataset #103
Dataset #103 processed. Ready for visualization or further analysis.
Processing dataset #104
Dataset #104 processed. Ready for visualization or further analysis.
Processing dataset #105
Dataset #105 processed. Ready for visualization or further analysis.
Processing dataset #106
Dataset #106 processed. Ready for visualization or further analysis.
Processing dataset #107
Dataset #107 processed. Ready for visualization or further analysis.
Processing dataset #108
Dataset #108 processed. Ready for visualization or further analysis.
Processing dataset #109
Dataset #109 processed. Ready for visualization or further analysis.
Processing dataset #110
Dataset #110 processed. Ready for visualization or further analysis.
Processing dataset #111
Dataset #111 processed. Ready for visualization or further analysis.
Processing dataset #112
Dataset #112 processed. Ready for visualization or further analysis.
Processing dataset #113
Dataset #113 processed. Ready for visualization or further analysis.
Processing dataset #114
Dataset #114 processed. Ready for visualization or further analysis.
Processing dataset #115
Dataset #115 processed. Ready for visualization or further analysis.
Processing dataset #116
Dataset #116 processed. Ready for visualization or further analysis.
Processing dataset #117
Dataset #117 processed. Ready for visualization or further analysis.
Processing dataset #118
Dataset #118 processed. Ready for visualization or further analysis.
Processing dataset #119
Dataset #119 processed. Ready for visualization or further analysis.
Processing dataset #120
Dataset #120 processed. Ready for visualization or further analysis.
Processing dataset #121
Dataset #121 processed. Ready for visualization or further analysis.
Processing dataset #122
Dataset #122 processed. Ready for visualization or further analysis.
Processing dataset #123
Dataset #123 processed. Ready for visualization or further analysis.
Processing dataset #124
Dataset #124 processed. Ready for visualization or further analysis.
Processing dataset #125
Dataset #125 processed. Ready for visualization or further analysis.
Processing dataset #126
Dataset #126 processed. Ready for visualization or further analysis.
Processing dataset #127
Dataset #127 processed. Ready for visualization or further analysis.
Processing dataset #128
Dataset #128 processed. Ready for visualization or further analysis.
Processing dataset #129
Dataset #129 processed. Ready for visualization or further analysis.
Processing dataset #130
Dataset #130 processed. Ready for visualization or further analysis.
Processing dataset #131
Dataset #131 processed. Ready for visualization or further analysis.
Processing dataset #132
Dataset #132 processed. Ready for visualization or further analysis.
Processing dataset #133
Dataset #133 processed. Ready for visualization or further analysis.
Processing dataset #134
Dataset #134 processed. Ready for visualization or further analysis.
Processing dataset #135
Dataset #135 processed. Ready for visualization or further analysis.
Processing dataset #136
Dataset #136 processed. Ready for visualization or further analysis.
Processing dataset #137
Dataset #137 processed. Ready for visualization or further analysis.
Processing dataset #138
Dataset #138 processed. Ready for visualization or further analysis.
Processing dataset #139
Dataset #139 processed. Ready for visualization or further analysis.
Processing dataset #140
Dataset #140 processed. Ready for visualization or further analysis.
Processing dataset #141
Dataset #141 processed. Ready for visualization or further analysis.
Processing dataset #142
Dataset #142 processed. Ready for visualization or further analysis.
Processing dataset #143
Dataset #143 processed. Ready for visualization or further analysis.
Processing dataset #144
Dataset #144 processed. Ready for visualization or further analysis.
Processing dataset #145
Dataset #145 processed. Ready for visualization or further analysis.
Processing dataset #146
Dataset #146 processed. Ready for visualization or further analysis.
Processing dataset #147
Dataset #147 processed. Ready for visualization or further analysis.
Processing dataset #148
Dataset #148 processed. Ready for visualization or further analysis.
Processing dataset #149
Dataset #149 processed. Ready for visualization or further analysis.
Processing dataset #150
Dataset #150 processed. Ready for visualization or further analysis.
Processing dataset #151
Dataset #151 processed. Ready for visualization or further analysis.
Processing dataset #152
Dataset #152 processed. Ready for visualization or further analysis.
Processing dataset #153
Dataset #153 processed. Ready for visualization or further analysis.
Processing dataset #154
Dataset #154 processed. Ready for visualization or further analysis.
Processing dataset #155
Dataset #155 processed. Ready for visualization or further analysis.
Processing dataset #156
Dataset #156 processed. Ready for visualization or further analysis.
Processing dataset #157
Dataset #157 processed. Ready for visualization or further analysis.
Processing dataset #158
Dataset #158 processed. Ready for visualization or further analysis.
Processing dataset #159
Dataset #159 processed. Ready for visualization or further analysis.
Processing dataset #160
Dataset #160 processed. Ready for visualization or further analysis.
Processing dataset #161
Dataset #161 processed. Ready for visualization or further analysis.
Processing dataset #162
Dataset #162 processed. Ready for visualization or further analysis.
Processing dataset #163
Dataset #163 processed. Ready for visualization or further analysis.
Processing dataset #164
Dataset #164 processed. Ready for visualization or further analysis.
Processing dataset #165
Dataset #165 processed. Ready for visualization or further analysis.
Processing dataset #166
Dataset #166 processed. Ready for visualization or further analysis.
Processing dataset #167
Dataset #167 processed. Ready for visualization or further analysis.
Processing dataset #168
Dataset #168 processed. Ready for visualization or further analysis.
Processing dataset #169
Dataset #169 processed. Ready for visualization or further analysis.
Processing dataset #170
Dataset #170 processed. Ready for visualization or further analysis.
Processing dataset #171
Dataset #171 processed. Ready for visualization or further analysis.
Processing dataset #172
Dataset #172 processed. Ready for visualization or further analysis.
Processing dataset #173
# %%time
# # Using pelicanfs
# # Loop through each item in the STAC query
# for idx, item in enumerate(items, start=1):
# print(f"Processing dataset #{idx}")
# # Check required assets
# required_assets = ["red", "green", "blue", "scl"]
# if not all(asset in item.assets for asset in required_assets):
# print(f"Skipping dataset #{idx}: Missing required assets.")
# continue
# # Get asset URLs
# red_url = item.assets["red"].href
# green_url = item.assets["green"].href
# blue_url = item.assets["blue"].href
# scl_url = item.assets["scl"].href
# pel_red_url = returnOSDFPath(red_url)
# pel_green_url = returnOSDFPath(green_url)
# pel_blue_url = returnOSDFPath(blue_url)
# pel_scl_url = returnOSDFPath(scl_url)
# pelfs = PelicanFileSystem("pelican://osg-htc.org")
# # Reproject AOI to match raster CRS
# with rasterio.open(pel_red_url,opener=pelfs) as src:
# with rasterio.open(red_url) as src:
# raster_crs = CRS(src.crs)
# aoi_geometry_reprojected = GeoSeries(aoi_geometry).set_crs(gdf.crs).to_crs(raster_crs)
# # Reproject AOI to match raster CRS
# with rasterio.open(pel_red_url, opener=pelfs) as red_src, \
# rasterio.open(pel_green_url, opener=pelfs) as green_src, \
# rasterio.open(pel_blue_url, opener=pelfs) as blue_src, \
# rasterio.open(pel_scl_url, opener=pelfs) as scl_src:
# aoi_geometry_reprojected = GeoSeries(aoi_geometry).set_crs(gdf.crs).to_crs(raster_crs)
# raster_bounds = box(*red_src.bounds)
# if not aoi_geometry_reprojected[0].intersects(raster_bounds):
# print(f"Warning: AOI does not intersect the bounds of {red_url}. Skipping.")
# continue
# # Clip all RGB and SCL bands to AOI
# red_clipped, red_meta = clp(red_src, aoi_geometry_reprojected)
# green_clipped, _ = clp(green_src, aoi_geometry_reprojected)
# blue_clipped, _ = clp(blue_src, aoi_geometry_reprojected)
# scl_clipped, _ = clp(scl_src, aoi_geometry_reprojected)
# print(f"Dataset #{idx} processed. Ready for visualization or further analysis.")
from sklearn.preprocessing import MinMaxScaler
# Function to scale image bands properly
def scale_band(band):
scaler = MinMaxScaler(feature_range=(0, 1))
return scaler.fit_transform(band) # Normalize between 0-1
# Assuming red_clipped, green_clipped, and blue_clipped are 2D NumPy arrays
rgb = np.dstack((scale_band(red_clipped.squeeze()),
scale_band(green_clipped.squeeze()),
scale_band(blue_clipped.squeeze()),
))
# Plot
plt.figure(figsize=(10, 8))
plt.imshow(rgb)
plt.axis("off")
plt.show()
# def visualize_rgb(red_band, green_band, blue_band, title="RGB Composite"):
# """
# Visualizes an RGB composite of raster bands using matplotlib.
# Parameters:
# - red_band, green_band, blue_band (numpy array): Red, Green, and Blue bands.
# - title (str): Title for the plot.
# """
# rgb = np.dstack((
# np.clip(red_band, 0, 1), # Normalize reflectance between 0-1
# np.clip(green_band, 0, 1),
# np.clip(blue_band, 0, 1)
# ))
# plt.figure(figsize=(10, 8))
# plt.imshow(rgb)
# plt.title(title)
# plt.axis("off")
# plt.show()
# # Example: Visualize RGB composite
# visualize_rgb(red_clipped, green_clipped, blue_clipped, title="Processed RGB Composite")