Tensors#
Tensor#
class Tensor(*, buffers: 'driver.Buffer | None' = None, value: 'graph.BufferValue | graph.TensorValue | None' = None, impl: 'TensorImpl | None' = None, is_traced: 'bool' = False) -> 'None':
Multi-dimensional array with eager execution and automatic compilation.
Methods#
abs#
def abs(self):
acos#
def acos(self):
argmax#
def argmax(self, axis: 'int | None' = None, keepdims: 'bool' = False):
argmin#
def argmin(self, axis: 'int | None' = None, keepdims: 'bool' = False):
atanh#
def atanh(self):
backward#
def backward(self, gradient: 'Tensor | None' = None, retain_graph: 'bool' = False, create_graph: 'bool' = False) -> 'None':
Compute gradients of this tensor w.r.t. graph leaves (PyTorch style).
Populates .grad on all tensors with requires_grad=True that this tensor depends on. All gradients are batch-realized for efficiency.
Parameters
gradient– Gradient w.r.t. this tensor. Required for non-scalar tensors.retain_graph– Unused (maintained for PyTorch API compatibility).create_graph– If True, graph of the derivatives will be constructed, allowing to compute higher order derivatives.
broadcast_to#
def broadcast_to(self, shape: 'ShapeLike') -> 'Tensor':
cast#
def cast(self, dtype: 'DType'):
cos#
def cos(self):
cpu#
def cpu(self) -> 'Tensor':
Move tensor to CPU, gathering shards if needed.
For sharded tensors, this first gathers all shards to a single device, then transfers to CPU. For unsharded tensors, it returns self if already on CPU, otherwise creates a new tensor on CPU.
Returns
– Tensor on CPU with all data gathered.
cuda#
def cuda(self, device: 'int | str' = 0) -> 'Tensor':
Move tensor to GPU (shortcut for PyTorch users).
cumsum#
def cumsum(self, axis: 'int'):
detach#
def detach(self) -> 'Tensor':
Returns a new Tensor, detached from the current graph (PyTorch style).
dim#
def dim(self) -> 'int':
Alias for rank (PyTorch style).
erf#
def erf(self):
exp#
def exp(self):
expand#
def expand(self, *shape: 'int') -> 'Tensor':
Alias for broadcast_to (PyTorch style).
flatten#
def flatten(self, start_dim: 'int' = 0, end_dim: 'int' = -1) -> 'Tensor':
flip#
def flip(self, axis: 'int | tuple[int, ...]') -> 'Tensor':
floor#
def floor(self):
gather#
def gather(self) -> 'Tensor':
Gather shards into a single global tensor if needed (lazy).
gelu#
def gelu(self, approximate: 'str | bool' = 'none'):
hydrate#
def hydrate(self) -> 'Tensor':
Populate graph values from buffers for realized tensors.
If the tensor is already registered as a graph input, uses that. In EAGER_MAX_GRAPH mode, adds buffer data as a constant for intermediate tensors accessed during eager graph building.
is_inf#
def is_inf(self):
is_nan#
def is_nan(self):
item#
def item(self) -> 'float | int | bool':
log#
def log(self):
log1p#
def log1p(self):
logical_local_shape#
def logical_local_shape(self, shard_idx: 'int' = 0) -> 'graph.Shape | None':
logsoftmax#
def logsoftmax(self, axis: 'int' = -1):
max#
def max(self, axis: 'int | tuple[int, ...] | None' = None, keepdims: 'bool' = False):
mean#
def mean(self, axis: 'int | tuple[int, ...] | None' = None, keepdims: 'bool' = False):
min#
def min(self, axis: 'int | tuple[int, ...] | None' = None, keepdims: 'bool' = False):
new_empty#
def new_empty(self, shape: 'ShapeLike', *, dtype: 'DType | None' = None, device: 'Device | None' = None) -> 'Tensor':
Create a new uninitialized tensor (defaults to zeros in Nabla).
new_full#
def new_full(self, shape: 'ShapeLike', fill_value: 'Number', *, dtype: 'DType | None' = None, device: 'Device | None' = None) -> 'Tensor':
Create a new tensor filled with value with same device/dtype as self by default.
new_ones#
def new_ones(self, shape: 'ShapeLike', *, dtype: 'DType | None' = None, device: 'Device | None' = None) -> 'Tensor':
Create a new tensor of ones with same device/dtype as self by default.
new_zeros#
def new_zeros(self, shape: 'ShapeLike', *, dtype: 'DType | None' = None, device: 'Device | None' = None) -> 'Tensor':
Create a new tensor of zeros with same device/dtype as self by default.
num_elements#
def num_elements(self) -> 'int':
numel#
def numel(self) -> 'int':
Alias for num_elements() (PyTorch style).
numpy#
def numpy(self) -> 'np.ndarray':
Convert tensor to numpy array.
permute#
def permute(self, *order: 'int') -> 'Tensor':
physical_local_shape#
def physical_local_shape(self, shard_idx: 'int' = 0) -> 'graph.Shape | None':
physical_local_shape_ints#
def physical_local_shape_ints(self, shard_idx: 'int' = 0) -> 'tuple[int, ...] | None':
Int-tuple shape for a specific shard (avoids creating Shape/Dim objects).
realize#
def realize(self) -> 'Tensor':
Force immediate realization (blocking).
relu#
def relu(self):
requires_grad_#
def requires_grad_(self, value: 'bool' = True) -> 'Tensor':
In-place style alias for setting requires_grad (PyTorch style).
reshape#
def reshape(self, shape: 'ShapeLike') -> 'Tensor':
round#
def round(self):
rsqrt#
def rsqrt(self):
sigmoid#
def sigmoid(self):
silu#
def silu(self):
sin#
def sin(self):
size#
def size(self, dim: 'int | None' = None) -> 'graph.Shape | int':
Returns the shape or size of a specific dimension (PyTorch style).
softmax#
def softmax(self, axis: 'int' = -1):
sqrt#
def sqrt(self):
squeeze#
def squeeze(self, axis: 'int | tuple[int, ...] | None' = None) -> 'Tensor':
sum#
def sum(self, axis: 'int | tuple[int, ...] | None' = None, keepdims: 'bool' = False):
swap_axes#
def swap_axes(self, axis1: 'int', axis2: 'int') -> 'Tensor':
tanh#
def tanh(self):
to#
def to(self, target: 'Device | str | DType') -> 'Tensor':
Move tensor to a device or cast to a dtype.
Parameters
target– Target Device object, device string (e.g. ‘cpu’, ‘gpu:0’), or DType.
to_numpy#
def to_numpy(self) -> 'np.ndarray':
Convert tensor to numpy array.
to_numpy_all#
def to_numpy_all(*tensors: 'Tensor') -> 'tuple[np.ndarray, ...]':
Convert multiple tensors to numpy arrays in a single batched compilation.
This is more efficient than calling .to_numpy() on each tensor individually,
as it combines all gather and realize operations into a single compilation.
Parameters
*tensors– Variable number of tensors to convert.
Returns
– Tuple of numpy arrays, one per input tensor.
tolist#
def tolist(self) -> 'list[Any]':
Convert tensor to a Python list (PyTorch style).
trace#
def trace(self) -> 'Tensor':
Enable tracing on this tensor for autograd.
transpose#
def transpose(self, axis1: 'int', axis2: 'int') -> 'Tensor':
trunc#
def trunc(self):
type_as#
def type_as(self, other: 'Tensor') -> 'Tensor':
Cast this tensor to the same dtype as other.
unsqueeze#
def unsqueeze(self, axis: 'int') -> 'Tensor':
view#
def view(self, *shape: 'int | ShapeLike') -> 'Tensor':
Alias for reshape() (PyTorch style).
with_sharding#
def with_sharding(self, mesh: 'DeviceMesh', dim_specs: 'list[ShardingSpec | str | list[str] | None]', replicated_axes: 'set[str] | None' = None) -> 'Tensor':
Apply sharding constraint, resharding if needed.
with_sharding_constraint#
def with_sharding_constraint(self, mesh: 'DeviceMesh', dim_specs: 'list[Any]', replicated_axes: 'set[str] | None' = None) -> 'Tensor':
Apply sharding constraint for global optimization; no immediate resharding.
realize_all#
def realize_all(*tensors: 'Tensor') -> 'tuple[Tensor, ...]':
Realize multiple tensors in a single batched compilation.
This is more efficient than calling .realize() on each tensor individually,
as it combines all pending computations into a single graph compilation.
Parameters
*tensors– Variable number of tensors to realize.
Returns
– Tuple of realized tensors (same tensors, now with computed values).
is_tensor#
def is_tensor(obj: 'Any') -> 'bool':