Source code for nabla.nn.layers.activations

# ===----------------------------------------------------------------------=== #
# Nabla 2025
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===----------------------------------------------------------------------=== #

"""Activation functions for neural networks."""

import numpy as np

import nabla as nb



[docs]
def relu(x: nb.Array) -> nb.Array:
    """Rectified Linear Unit activation function.

    Args:
        x: Input array

    Returns:
        Array with ReLU applied element-wise
    """
    return nb.maximum(x, 0)




[docs]
def leaky_relu(x: nb.Array, negative_slope: float = 0.01) -> nb.Array:
    """Leaky ReLU activation function.

    Args:
        x: Input array
        negative_slope: Slope for negative values

    Returns:
        Array with Leaky ReLU applied element-wise
    """
    zeros = nb.zeros_like(x)
    positive_part = nb.maximum(x, zeros)
    negative_part = nb.minimum(x, zeros) * negative_slope
    return positive_part + negative_part




[docs]
def sigmoid(x: nb.Array) -> nb.Array:
    """Sigmoid activation function.

    Args:
        x: Input array

    Returns:
        Array with sigmoid applied element-wise
    """
    # sigmoid(x) = 1 / (1 + exp(-x))
    # For numerical stability, use:
    # sigmoid(x) = exp(x) / (1 + exp(x)) for x >= 0
    # sigmoid(x) = 1 / (1 + exp(-x)) for x < 0

    zeros = nb.zeros_like(x)
    positive_mask = x >= zeros

    # For positive values: exp(x) / (1 + exp(x))
    exp_x = nb.exp(x)
    positive_part = exp_x / (nb.ones_like(x) + exp_x)

    # For negative values: 1 / (1 + exp(-x))
    exp_neg_x = nb.exp(-x)
    negative_part = nb.ones_like(x) / (nb.ones_like(x) + exp_neg_x)

    # Combine using where-like operation
    positive_mask_float = positive_mask.astype(x.dtype)
    negative_mask_float = nb.ones_like(positive_mask_float) - positive_mask_float

    return positive_mask_float * positive_part + negative_mask_float * negative_part




[docs]
def tanh(x: nb.Array) -> nb.Array:
    """Hyperbolic tangent activation function.

    Args:
        x: Input array

    Returns:
        Array with tanh applied element-wise
    """
    return nb.tanh(x)




[docs]
def gelu(x: nb.Array) -> nb.Array:
    """Gaussian Error Linear Unit activation function.

    GELU(x) = x * Φ(x) where Φ(x) is the CDF of standard normal distribution.
    Approximation: GELU(x) ≈ 0.5 * x * (1 + tanh(√(2/π) * (x + 0.044715 * x^3)))

    Args:
        x: Input array

    Returns:
        Array with GELU applied element-wise
    """
    # Constants for GELU approximation
    sqrt_2_over_pi = np.sqrt(2.0 / np.pi)

    # GELU approximation
    x_cubed = x * x * x
    tanh_input = sqrt_2_over_pi * (x + 0.044715 * x_cubed)
    tanh_result = tanh(tanh_input)

    half = nb.full_like(x, 0.5)
    one = nb.ones_like(x)

    return half * x * (one + tanh_result)




[docs]
def swish(x: nb.Array, beta: float = 1.0) -> nb.Array:
    """Swish (SiLU) activation function.

    Swish(x) = x * sigmoid(β * x)
    When β = 1, this is SiLU (Sigmoid Linear Unit).

    Args:
        x: Input array
        beta: Scaling factor for sigmoid

    Returns:
        Array with Swish applied element-wise
    """
    scaled_x = x * beta if beta != 1.0 else x

    return x * sigmoid(scaled_x)




[docs]
def silu(x: nb.Array) -> nb.Array:
    """Sigmoid Linear Unit (SiLU) activation function.

    SiLU(x) = x * sigmoid(x) = Swish(x, β=1)

    Args:
        x: Input array

    Returns:
        Array with SiLU applied element-wise
    """
    return swish(x, beta=1.0)




[docs]
def softmax(x: nb.Array, axis: int = -1) -> nb.Array:
    """Softmax activation function.

    Args:
        x: Input array
        axis: Axis along which to compute softmax

    Returns:
        Array with softmax applied along specified axis
    """
    from ...ops.special import softmax as special_softmax

    return special_softmax(x, axis=axis)




[docs]
def log_softmax(x: nb.Array, axis: int = -1) -> nb.Array:
    """Log-softmax activation function.

    Args:
        x: Input array
        axis: Axis along which to compute log-softmax

    Returns:
        Array with log-softmax applied along specified axis
    """
    from ...ops.special import logsumexp

    log_sum_exp = logsumexp(x, axis=axis, keep_dims=True)
    return x - log_sum_exp



# Activation function registry for easy lookup
ACTIVATION_FUNCTIONS = {
    "relu": relu,
    "leaky_relu": leaky_relu,
    "sigmoid": sigmoid,
    "tanh": tanh,
    "gelu": gelu,
    "swish": swish,
    "silu": silu,
    "softmax": softmax,
    "log_softmax": log_softmax,
}



[docs]
def get_activation(name: str):
    """Get activation function by name.

    Args:
        name: Name of the activation function

    Returns:
        Activation function

    Raises:
        ValueError: If activation function is not found
    """
    if name not in ACTIVATION_FUNCTIONS:
        available = ", ".join(ACTIVATION_FUNCTIONS.keys())
        raise ValueError(
            f"Unknown activation function '{name}'. Available: {available}"
        )

    return ACTIVATION_FUNCTIONS[name]



__all__ = [
    "relu",
    "leaky_relu",
    "sigmoid",
    "tanh",
    "gelu",
    "swish",
    "silu",
    "softmax",
    "log_softmax",
    "get_activation",
    "ACTIVATION_FUNCTIONS",
]