diff --git a/optimi/optimizer.py b/optimi/optimizer.py index f78409f..7f488a8 100644 --- a/optimi/optimizer.py +++ b/optimi/optimizer.py @@ -40,8 +40,11 @@ def __init__(self, params: Iterable[Tensor] | Iterable[dict], defaults: dict[str super().__init__(params, defaults) + # if gradient_release is enabled, disable foreach step so normal optimizer step won't error if self.defaults["gradient_release"]: + self.defaults["foreach"] = False for group in self.param_groups: + group["foreach"] = False for p in group["params"]: self.state[p]["group"] = group