I'm trying to perform temporal compositing on multivariate data cubes. The idea, illustrated by the reprex below is for each temporal aggregation (input data have a frequency of 5 days, to be aggregated to 15 days) to select the best pixel based on the argmax of one of the variables (ndvi in this case, therefore the "greenest" pixel and therefore less likely to be contaminated by clouds, shadows, atmosphere, etc). The approach appears to be working well on in memory/numpy based Datasets, but fail when using dask (see commented line).
Any idea how to handle this in a dask friendly way?
import xarray as xr
import pandas as pd
import numpy as np
# Creating the time range
time = pd.date_range("2021-01-01", "2022-12-31", freq="5D")
# Creating dimensions
x = np.arange(10)
y = np.arange(10)
# Creating a test dataset
ds = xr.Dataset(
{
"red": (["time", "y", "x"], np.random.rand(len(time), len(y), len(x))),
"nir": (["time", "y", "x"], np.random.rand(len(time), len(y), len(x)))
},
coords={
"time": time,
"x": x,
"y": y
}
)
# Add 20% of nan
# Get nan indices
nan_count = int(ds['red'].size * 0.2)
nan_indices = np.unravel_index(
np.random.choice(ds['red'].size, nan_count, replace=False),
ds['red'].shape
)
# Assign NaNs to both 'red' and 'nir' at these indices
ds['red'].values[nan_indices] = np.nan
ds['nir'].values[nan_indices] = np.nan
# Compute ndvi
ds['NDVI'] = (ds.nir - ds.red)/(ds.nir + ds.red)
# ds = ds.chunk({'time': 10, 'y': -1, 'x': -1})
def select_max_ndvi(ds):
max_ndvi = ds.isel(time=ds['NDVI'].fillna(-np.inf).argmax(dim='time'))
return max_ndvi
ds15 = ds.resample(time='15D').map(select_max_ndvi)
print(ds15)