Source code for nabla.nn.layers.activations

# ===----------------------------------------------------------------------=== #
# Nabla 2025
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===----------------------------------------------------------------------=== #

"""Activation functions for neural networks."""

import numpy as np

import nabla as nb


[docs] def relu(x: nb.Array) -> nb.Array: """Rectified Linear Unit activation function. Args: x: Input array Returns: Array with ReLU applied element-wise """ return nb.maximum(x, 0)
[docs] def leaky_relu(x: nb.Array, negative_slope: float = 0.01) -> nb.Array: """Leaky ReLU activation function. Args: x: Input array negative_slope: Slope for negative values Returns: Array with Leaky ReLU applied element-wise """ zeros = nb.zeros_like(x) positive_part = nb.maximum(x, zeros) negative_part = nb.minimum(x, zeros) * negative_slope return positive_part + negative_part
[docs] def sigmoid(x: nb.Array) -> nb.Array: """Sigmoid activation function. Args: x: Input array Returns: Array with sigmoid applied element-wise """ # sigmoid(x) = 1 / (1 + exp(-x)) # For numerical stability, use: # sigmoid(x) = exp(x) / (1 + exp(x)) for x >= 0 # sigmoid(x) = 1 / (1 + exp(-x)) for x < 0 zeros = nb.zeros_like(x) positive_mask = x >= zeros # For positive values: exp(x) / (1 + exp(x)) exp_x = nb.exp(x) positive_part = exp_x / (nb.ones_like(x) + exp_x) # For negative values: 1 / (1 + exp(-x)) exp_neg_x = nb.exp(-x) negative_part = nb.ones_like(x) / (nb.ones_like(x) + exp_neg_x) # Combine using where-like operation positive_mask_float = positive_mask.astype(x.dtype) negative_mask_float = nb.ones_like(positive_mask_float) - positive_mask_float return positive_mask_float * positive_part + negative_mask_float * negative_part
[docs] def tanh(x: nb.Array) -> nb.Array: """Hyperbolic tangent activation function. Args: x: Input array Returns: Array with tanh applied element-wise """ return nb.tanh(x)
[docs] def gelu(x: nb.Array) -> nb.Array: """Gaussian Error Linear Unit activation function. GELU(x) = x * Φ(x) where Φ(x) is the CDF of standard normal distribution. Approximation: GELU(x) ≈ 0.5 * x * (1 + tanh(√(2/π) * (x + 0.044715 * x^3))) Args: x: Input array Returns: Array with GELU applied element-wise """ # Constants for GELU approximation sqrt_2_over_pi = np.sqrt(2.0 / np.pi) # GELU approximation x_cubed = x * x * x tanh_input = sqrt_2_over_pi * (x + 0.044715 * x_cubed) tanh_result = tanh(tanh_input) half = nb.full_like(x, 0.5) one = nb.ones_like(x) return half * x * (one + tanh_result)
[docs] def swish(x: nb.Array, beta: float = 1.0) -> nb.Array: """Swish (SiLU) activation function. Swish(x) = x * sigmoid(β * x) When β = 1, this is SiLU (Sigmoid Linear Unit). Args: x: Input array beta: Scaling factor for sigmoid Returns: Array with Swish applied element-wise """ scaled_x = x * beta if beta != 1.0 else x return x * sigmoid(scaled_x)
[docs] def silu(x: nb.Array) -> nb.Array: """Sigmoid Linear Unit (SiLU) activation function. SiLU(x) = x * sigmoid(x) = Swish(x, β=1) Args: x: Input array Returns: Array with SiLU applied element-wise """ return swish(x, beta=1.0)
[docs] def softmax(x: nb.Array, axis: int = -1) -> nb.Array: """Softmax activation function. Args: x: Input array axis: Axis along which to compute softmax Returns: Array with softmax applied along specified axis """ from ...ops.special import softmax as special_softmax return special_softmax(x, axis=axis)
[docs] def log_softmax(x: nb.Array, axis: int = -1) -> nb.Array: """Log-softmax activation function. Args: x: Input array axis: Axis along which to compute log-softmax Returns: Array with log-softmax applied along specified axis """ from ...ops.special import logsumexp log_sum_exp = logsumexp(x, axis=axis, keep_dims=True) return x - log_sum_exp
# Activation function registry for easy lookup ACTIVATION_FUNCTIONS = { "relu": relu, "leaky_relu": leaky_relu, "sigmoid": sigmoid, "tanh": tanh, "gelu": gelu, "swish": swish, "silu": silu, "softmax": softmax, "log_softmax": log_softmax, }
[docs] def get_activation(name: str): """Get activation function by name. Args: name: Name of the activation function Returns: Activation function Raises: ValueError: If activation function is not found """ if name not in ACTIVATION_FUNCTIONS: available = ", ".join(ACTIVATION_FUNCTIONS.keys()) raise ValueError( f"Unknown activation function '{name}'. Available: {available}" ) return ACTIVATION_FUNCTIONS[name]
__all__ = [ "relu", "leaky_relu", "sigmoid", "tanh", "gelu", "swish", "silu", "softmax", "log_softmax", "get_activation", "ACTIVATION_FUNCTIONS", ]