Tensors#

Tensor#

class Tensor(*, buffers: 'driver.Buffer | None' = None, value: 'graph.BufferValue | graph.TensorValue | None' = None, impl: 'TensorImpl | None' = None, is_traced: 'bool' = False) -> 'None':

Multi-dimensional array with eager execution and automatic compilation.

Methods#

abs#

def abs(self):

acos#

def acos(self):

argmax#

def argmax(self, axis: 'int | None' = None, keepdims: 'bool' = False):

argmin#

def argmin(self, axis: 'int | None' = None, keepdims: 'bool' = False):

atanh#

def atanh(self):

backward#

def backward(self, gradient: 'Tensor | None' = None, retain_graph: 'bool' = False, create_graph: 'bool' = False) -> 'None':

Compute gradients of this tensor w.r.t. graph leaves (PyTorch style).

Populates .grad on all tensors with requires_grad=True that this tensor depends on. All gradients are batch-realized for efficiency.

Parameters

  • gradient – Gradient w.r.t. this tensor. Required for non-scalar tensors.

  • retain_graph – Unused (maintained for PyTorch API compatibility).

  • create_graph – If True, graph of the derivatives will be constructed, allowing to compute higher order derivatives.

broadcast_to#

def broadcast_to(self, shape: 'ShapeLike') -> 'Tensor':

cast#

def cast(self, dtype: 'DType'):

cos#

def cos(self):

cpu#

def cpu(self) -> 'Tensor':

Move tensor to CPU, gathering shards if needed.

For sharded tensors, this first gathers all shards to a single device, then transfers to CPU. For unsharded tensors, it returns self if already on CPU, otherwise creates a new tensor on CPU.

Returns

– Tensor on CPU with all data gathered.

cuda#

def cuda(self, device: 'int | str' = 0) -> 'Tensor':

Move tensor to GPU (shortcut for PyTorch users).

cumsum#

def cumsum(self, axis: 'int'):

detach#

def detach(self) -> 'Tensor':

Returns a new Tensor, detached from the current graph (PyTorch style).

dim#

def dim(self) -> 'int':

Alias for rank (PyTorch style).

erf#

def erf(self):

exp#

def exp(self):

expand#

def expand(self, *shape: 'int') -> 'Tensor':

Alias for broadcast_to (PyTorch style).

flatten#

def flatten(self, start_dim: 'int' = 0, end_dim: 'int' = -1) -> 'Tensor':

flip#

def flip(self, axis: 'int | tuple[int, ...]') -> 'Tensor':

floor#

def floor(self):

gather#

def gather(self) -> 'Tensor':

Gather shards into a single global tensor if needed (lazy).

gelu#

def gelu(self, approximate: 'str | bool' = 'none'):

hydrate#

def hydrate(self) -> 'Tensor':

Populate graph values from buffers for realized tensors.

If the tensor is already registered as a graph input, uses that. In EAGER_MAX_GRAPH mode, adds buffer data as a constant for intermediate tensors accessed during eager graph building.

is_inf#

def is_inf(self):

is_nan#

def is_nan(self):

item#

def item(self) -> 'float | int | bool':

log#

def log(self):

log1p#

def log1p(self):

logical_local_shape#

def logical_local_shape(self, shard_idx: 'int' = 0) -> 'graph.Shape | None':

logsoftmax#

def logsoftmax(self, axis: 'int' = -1):

max#

def max(self, axis: 'int | tuple[int, ...] | None' = None, keepdims: 'bool' = False):

mean#

def mean(self, axis: 'int | tuple[int, ...] | None' = None, keepdims: 'bool' = False):

min#

def min(self, axis: 'int | tuple[int, ...] | None' = None, keepdims: 'bool' = False):

new_empty#

def new_empty(self, shape: 'ShapeLike', *, dtype: 'DType | None' = None, device: 'Device | None' = None) -> 'Tensor':

Create a new uninitialized tensor (defaults to zeros in Nabla).

new_full#

def new_full(self, shape: 'ShapeLike', fill_value: 'Number', *, dtype: 'DType | None' = None, device: 'Device | None' = None) -> 'Tensor':

Create a new tensor filled with value with same device/dtype as self by default.

new_ones#

def new_ones(self, shape: 'ShapeLike', *, dtype: 'DType | None' = None, device: 'Device | None' = None) -> 'Tensor':

Create a new tensor of ones with same device/dtype as self by default.

new_zeros#

def new_zeros(self, shape: 'ShapeLike', *, dtype: 'DType | None' = None, device: 'Device | None' = None) -> 'Tensor':

Create a new tensor of zeros with same device/dtype as self by default.

num_elements#

def num_elements(self) -> 'int':

numel#

def numel(self) -> 'int':

Alias for num_elements() (PyTorch style).

numpy#

def numpy(self) -> 'np.ndarray':

Convert tensor to numpy array.

permute#

def permute(self, *order: 'int') -> 'Tensor':

physical_local_shape#

def physical_local_shape(self, shard_idx: 'int' = 0) -> 'graph.Shape | None':

physical_local_shape_ints#

def physical_local_shape_ints(self, shard_idx: 'int' = 0) -> 'tuple[int, ...] | None':

Int-tuple shape for a specific shard (avoids creating Shape/Dim objects).

realize#

def realize(self) -> 'Tensor':

Force immediate realization (blocking).

relu#

def relu(self):

requires_grad_#

def requires_grad_(self, value: 'bool' = True) -> 'Tensor':

In-place style alias for setting requires_grad (PyTorch style).

reshape#

def reshape(self, shape: 'ShapeLike') -> 'Tensor':

round#

def round(self):

rsqrt#

def rsqrt(self):

shard#

def shard(self, mesh: 'DeviceMesh', dim_specs: 'list[ShardingSpec | str | list[str] | None]', replicated_axes: 'set[str] | None' = None) -> 'Tensor':

Shard this tensor across a device mesh, handling resharding and vmap batch dims.

shard_shape#

def shard_shape(self, shard_idx: 'int' = 0) -> 'graph.Shape':

Returns the shape of a specific shard.

sigmoid#

def sigmoid(self):

silu#

def silu(self):

sin#

def sin(self):

size#

def size(self, dim: 'int | None' = None) -> 'graph.Shape | int':

Returns the shape or size of a specific dimension (PyTorch style).

softmax#

def softmax(self, axis: 'int' = -1):

sqrt#

def sqrt(self):

squeeze#

def squeeze(self, axis: 'int | tuple[int, ...] | None' = None) -> 'Tensor':

sum#

def sum(self, axis: 'int | tuple[int, ...] | None' = None, keepdims: 'bool' = False):

swap_axes#

def swap_axes(self, axis1: 'int', axis2: 'int') -> 'Tensor':

tanh#

def tanh(self):

to#

def to(self, target: 'Device | str | DType') -> 'Tensor':

Move tensor to a device or cast to a dtype.

Parameters

  • target – Target Device object, device string (e.g. ‘cpu’, ‘gpu:0’), or DType.

to_numpy#

def to_numpy(self) -> 'np.ndarray':

Convert tensor to numpy array.

to_numpy_all#

def to_numpy_all(*tensors: 'Tensor') -> 'tuple[np.ndarray, ...]':

Convert multiple tensors to numpy arrays in a single batched compilation.

This is more efficient than calling .to_numpy() on each tensor individually, as it combines all gather and realize operations into a single compilation.

Parameters

  • *tensors – Variable number of tensors to convert.

Returns

– Tuple of numpy arrays, one per input tensor.

tolist#

def tolist(self) -> 'list[Any]':

Convert tensor to a Python list (PyTorch style).

trace#

def trace(self) -> 'Tensor':

Enable tracing on this tensor for autograd.

transpose#

def transpose(self, axis1: 'int', axis2: 'int') -> 'Tensor':

trunc#

def trunc(self):

type_as#

def type_as(self, other: 'Tensor') -> 'Tensor':

Cast this tensor to the same dtype as other.

unsqueeze#

def unsqueeze(self, axis: 'int') -> 'Tensor':

view#

def view(self, *shape: 'int | ShapeLike') -> 'Tensor':

Alias for reshape() (PyTorch style).

with_sharding#

def with_sharding(self, mesh: 'DeviceMesh', dim_specs: 'list[ShardingSpec | str | list[str] | None]', replicated_axes: 'set[str] | None' = None) -> 'Tensor':

Apply sharding constraint, resharding if needed.

with_sharding_constraint#

def with_sharding_constraint(self, mesh: 'DeviceMesh', dim_specs: 'list[Any]', replicated_axes: 'set[str] | None' = None) -> 'Tensor':

Apply sharding constraint for global optimization; no immediate resharding.


realize_all#

def realize_all(*tensors: 'Tensor') -> 'tuple[Tensor, ...]':

Realize multiple tensors in a single batched compilation.

This is more efficient than calling .realize() on each tensor individually, as it combines all pending computations into a single graph compilation.

Parameters

  • *tensors – Variable number of tensors to realize.

Returns

– Tuple of realized tensors (same tensors, now with computed values).


is_tensor#

def is_tensor(obj: 'Any') -> 'bool':