Source code for nabla.nn.optim.schedules

# ===----------------------------------------------------------------------=== #
# Nabla 2025
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===----------------------------------------------------------------------=== #

"""Learning rate schedules."""

import math
from collections.abc import Callable



[docs]
def constant_schedule(initial_lr: float = 0.001) -> Callable[[int], float]:
    """Constant learning rate schedule.

    Args:
        initial_lr: The learning rate to maintain

    Returns:
        Function that takes epoch and returns learning rate
    """

    def schedule(epoch: int) -> float:
        return initial_lr

    return schedule




[docs]
def exponential_decay_schedule(
    initial_lr: float = 0.001,
    decay_factor: float = 0.95,
    decay_every: int = 1000,
) -> Callable[[int], float]:
    """Exponential decay learning rate schedule.

    Args:
        initial_lr: Initial learning rate
        decay_factor: Factor to multiply learning rate by
        decay_every: Apply decay every N epochs

    Returns:
        Function that takes epoch and returns learning rate
    """

    def schedule(epoch: int) -> float:
        return initial_lr * (decay_factor ** (epoch // decay_every))

    return schedule




[docs]
def step_decay_schedule(
    initial_lr: float = 0.001,
    decay_factor: float = 0.1,
    step_size: int = 30,
) -> Callable[[int], float]:
    """Step decay learning rate schedule.

    Args:
        initial_lr: Initial learning rate
        decay_factor: Factor to multiply learning rate by at each step
        step_size: Number of epochs between each decay step

    Returns:
        Function that takes epoch and returns learning rate
    """

    def schedule(epoch: int) -> float:
        return initial_lr * (decay_factor ** (epoch // step_size))

    return schedule




[docs]
def cosine_annealing_schedule(
    initial_lr: float = 0.001,
    min_lr: float = 1e-6,
    period: int = 1000,
) -> Callable[[int], float]:
    """Cosine annealing learning rate schedule.

    Args:
        initial_lr: Initial learning rate
        min_lr: Minimum learning rate
        period: Number of epochs for one complete cosine cycle

    Returns:
        Function that takes epoch and returns learning rate
    """

    def schedule(epoch: int) -> float:
        cycle_position = epoch % period
        cosine_factor = 0.5 * (1 + math.cos(math.pi * cycle_position / period))
        return min_lr + (initial_lr - min_lr) * cosine_factor

    return schedule




[docs]
def warmup_cosine_schedule(
    initial_lr: float = 0.001,
    warmup_epochs: int = 100,
    total_epochs: int = 1000,
    min_lr: float = 1e-6,
) -> Callable[[int], float]:
    """Warmup followed by cosine annealing schedule.

    Args:
        initial_lr: Peak learning rate after warmup
        warmup_epochs: Number of epochs for linear warmup
        total_epochs: Total number of training epochs
        min_lr: Minimum learning rate

    Returns:
        Function that takes epoch and returns learning rate
    """

    def schedule(epoch: int) -> float:
        if epoch < warmup_epochs:
            # Linear warmup
            return initial_lr * epoch / warmup_epochs
        else:
            # Cosine annealing
            progress = (epoch - warmup_epochs) / (total_epochs - warmup_epochs)
            cosine_factor = 0.5 * (1 + math.cos(math.pi * progress))
            return min_lr + (initial_lr - min_lr) * cosine_factor

    return schedule



# Legacy function for backward compatibility

[docs]
def learning_rate_schedule(
    epoch: int,
    initial_lr: float = 0.001,
    decay_factor: float = 0.95,
    decay_every: int = 1000,
) -> float:
    """Learning rate schedule for complex function learning.

    This is the original function from mlp_train_jit.py for backward compatibility.
    Consider using exponential_decay_schedule instead for new code.

    Args:
        epoch: Current epoch number
        initial_lr: Initial learning rate
        decay_factor: Factor to multiply learning rate by
        decay_every: Apply decay every N epochs

    Returns:
        Learning rate for the current epoch
    """
    return initial_lr * (decay_factor ** (epoch // decay_every))