Source code for nabla.nn.optim.schedules

# ===----------------------------------------------------------------------=== #
# Nabla 2025
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===----------------------------------------------------------------------=== #

"""Learning rate schedules."""

import math
from collections.abc import Callable


[docs] def constant_schedule(initial_lr: float = 0.001) -> Callable[[int], float]: """Constant learning rate schedule. Args: initial_lr: The learning rate to maintain Returns: Function that takes epoch and returns learning rate """ def schedule(epoch: int) -> float: return initial_lr return schedule
[docs] def exponential_decay_schedule( initial_lr: float = 0.001, decay_factor: float = 0.95, decay_every: int = 1000, ) -> Callable[[int], float]: """Exponential decay learning rate schedule. Args: initial_lr: Initial learning rate decay_factor: Factor to multiply learning rate by decay_every: Apply decay every N epochs Returns: Function that takes epoch and returns learning rate """ def schedule(epoch: int) -> float: return initial_lr * (decay_factor ** (epoch // decay_every)) return schedule
[docs] def step_decay_schedule( initial_lr: float = 0.001, decay_factor: float = 0.1, step_size: int = 30, ) -> Callable[[int], float]: """Step decay learning rate schedule. Args: initial_lr: Initial learning rate decay_factor: Factor to multiply learning rate by at each step step_size: Number of epochs between each decay step Returns: Function that takes epoch and returns learning rate """ def schedule(epoch: int) -> float: return initial_lr * (decay_factor ** (epoch // step_size)) return schedule
[docs] def cosine_annealing_schedule( initial_lr: float = 0.001, min_lr: float = 1e-6, period: int = 1000, ) -> Callable[[int], float]: """Cosine annealing learning rate schedule. Args: initial_lr: Initial learning rate min_lr: Minimum learning rate period: Number of epochs for one complete cosine cycle Returns: Function that takes epoch and returns learning rate """ def schedule(epoch: int) -> float: cycle_position = epoch % period cosine_factor = 0.5 * (1 + math.cos(math.pi * cycle_position / period)) return min_lr + (initial_lr - min_lr) * cosine_factor return schedule
[docs] def warmup_cosine_schedule( initial_lr: float = 0.001, warmup_epochs: int = 100, total_epochs: int = 1000, min_lr: float = 1e-6, ) -> Callable[[int], float]: """Warmup followed by cosine annealing schedule. Args: initial_lr: Peak learning rate after warmup warmup_epochs: Number of epochs for linear warmup total_epochs: Total number of training epochs min_lr: Minimum learning rate Returns: Function that takes epoch and returns learning rate """ def schedule(epoch: int) -> float: if epoch < warmup_epochs: # Linear warmup return initial_lr * epoch / warmup_epochs else: # Cosine annealing progress = (epoch - warmup_epochs) / (total_epochs - warmup_epochs) cosine_factor = 0.5 * (1 + math.cos(math.pi * progress)) return min_lr + (initial_lr - min_lr) * cosine_factor return schedule
# Legacy function for backward compatibility
[docs] def learning_rate_schedule( epoch: int, initial_lr: float = 0.001, decay_factor: float = 0.95, decay_every: int = 1000, ) -> float: """Learning rate schedule for complex function learning. This is the original function from mlp_train_jit.py for backward compatibility. Consider using exponential_decay_schedule instead for new code. Args: epoch: Current epoch number initial_lr: Initial learning rate decay_factor: Factor to multiply learning rate by decay_every: Apply decay every N epochs Returns: Learning rate for the current epoch """ return initial_lr * (decay_factor ** (epoch // decay_every))