Source code for nabla.nn.layers.activations
# ===----------------------------------------------------------------------=== #
# Nabla 2025
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===----------------------------------------------------------------------=== #
"""Activation functions for neural networks."""
import numpy as np
import nabla as nb
[docs]
def relu(x: nb.Array) -> nb.Array:
"""Rectified Linear Unit activation function.
Args:
x: Input array
Returns:
Array with ReLU applied element-wise
"""
return nb.maximum(x, 0)
[docs]
def leaky_relu(x: nb.Array, negative_slope: float = 0.01) -> nb.Array:
"""Leaky ReLU activation function.
Args:
x: Input array
negative_slope: Slope for negative values
Returns:
Array with Leaky ReLU applied element-wise
"""
zeros = nb.zeros_like(x)
positive_part = nb.maximum(x, zeros)
negative_part = nb.minimum(x, zeros) * negative_slope
return positive_part + negative_part
[docs]
def sigmoid(x: nb.Array) -> nb.Array:
"""Sigmoid activation function.
Args:
x: Input array
Returns:
Array with sigmoid applied element-wise
"""
# sigmoid(x) = 1 / (1 + exp(-x))
# For numerical stability, use:
# sigmoid(x) = exp(x) / (1 + exp(x)) for x >= 0
# sigmoid(x) = 1 / (1 + exp(-x)) for x < 0
zeros = nb.zeros_like(x)
positive_mask = x >= zeros
# For positive values: exp(x) / (1 + exp(x))
exp_x = nb.exp(x)
positive_part = exp_x / (nb.ones_like(x) + exp_x)
# For negative values: 1 / (1 + exp(-x))
exp_neg_x = nb.exp(-x)
negative_part = nb.ones_like(x) / (nb.ones_like(x) + exp_neg_x)
# Combine using where-like operation
positive_mask_float = positive_mask.astype(x.dtype)
negative_mask_float = nb.ones_like(positive_mask_float) - positive_mask_float
return positive_mask_float * positive_part + negative_mask_float * negative_part
[docs]
def tanh(x: nb.Array) -> nb.Array:
"""Hyperbolic tangent activation function.
Args:
x: Input array
Returns:
Array with tanh applied element-wise
"""
return nb.tanh(x)
[docs]
def gelu(x: nb.Array) -> nb.Array:
"""Gaussian Error Linear Unit activation function.
GELU(x) = x * Φ(x) where Φ(x) is the CDF of standard normal distribution.
Approximation: GELU(x) ≈ 0.5 * x * (1 + tanh(√(2/π) * (x + 0.044715 * x^3)))
Args:
x: Input array
Returns:
Array with GELU applied element-wise
"""
# Constants for GELU approximation
sqrt_2_over_pi = np.sqrt(2.0 / np.pi)
# GELU approximation
x_cubed = x * x * x
tanh_input = sqrt_2_over_pi * (x + 0.044715 * x_cubed)
tanh_result = tanh(tanh_input)
half = nb.full_like(x, 0.5)
one = nb.ones_like(x)
return half * x * (one + tanh_result)
[docs]
def swish(x: nb.Array, beta: float = 1.0) -> nb.Array:
"""Swish (SiLU) activation function.
Swish(x) = x * sigmoid(β * x)
When β = 1, this is SiLU (Sigmoid Linear Unit).
Args:
x: Input array
beta: Scaling factor for sigmoid
Returns:
Array with Swish applied element-wise
"""
scaled_x = x * beta if beta != 1.0 else x
return x * sigmoid(scaled_x)
[docs]
def silu(x: nb.Array) -> nb.Array:
"""Sigmoid Linear Unit (SiLU) activation function.
SiLU(x) = x * sigmoid(x) = Swish(x, β=1)
Args:
x: Input array
Returns:
Array with SiLU applied element-wise
"""
return swish(x, beta=1.0)
[docs]
def softmax(x: nb.Array, axis: int = -1) -> nb.Array:
"""Softmax activation function.
Args:
x: Input array
axis: Axis along which to compute softmax
Returns:
Array with softmax applied along specified axis
"""
from ...ops.special import softmax as special_softmax
return special_softmax(x, axis=axis)
[docs]
def log_softmax(x: nb.Array, axis: int = -1) -> nb.Array:
"""Log-softmax activation function.
Args:
x: Input array
axis: Axis along which to compute log-softmax
Returns:
Array with log-softmax applied along specified axis
"""
from ...ops.special import logsumexp
log_sum_exp = logsumexp(x, axis=axis, keep_dims=True)
return x - log_sum_exp
# Activation function registry for easy lookup
ACTIVATION_FUNCTIONS = {
"relu": relu,
"leaky_relu": leaky_relu,
"sigmoid": sigmoid,
"tanh": tanh,
"gelu": gelu,
"swish": swish,
"silu": silu,
"softmax": softmax,
"log_softmax": log_softmax,
}
[docs]
def get_activation(name: str):
"""Get activation function by name.
Args:
name: Name of the activation function
Returns:
Activation function
Raises:
ValueError: If activation function is not found
"""
if name not in ACTIVATION_FUNCTIONS:
available = ", ".join(ACTIVATION_FUNCTIONS.keys())
raise ValueError(
f"Unknown activation function '{name}'. Available: {available}"
)
return ACTIVATION_FUNCTIONS[name]
__all__ = [
"relu",
"leaky_relu",
"sigmoid",
"tanh",
"gelu",
"swish",
"silu",
"softmax",
"log_softmax",
"get_activation",
"ACTIVATION_FUNCTIONS",
]