diff --git a/doc/python/installation.rst b/doc/python/installation.rst index 60e5c5247..544769401 100644 --- a/doc/python/installation.rst +++ b/doc/python/installation.rst @@ -29,6 +29,29 @@ Using pip (recommended) 3. Test your installation, as described in the :ref:`next section `. + +Using Conda (reproducible, Linux with CUDA support only) +======================= + +Anaconda (and its alternatives such as Miniconda) provide a distribution of Python and R languages for scientific programming. The package management utilities that come along make installing software packages with multiple dependencies easier. + +If you have anaconda installed in your computer, you can set up a keops-capable environment in seconds. + +2. In a terminal, type: + + .. code-block:: bash + wget https://gist.githubusercontent.com/hypnopump/9be7f345b621fdec44982a352f5da2c0/raw/aacf9a5e3f0047d88b77ed36745057ad5b197528/keops_151.yml + conda env create --file keops_151.yml --yes + rm keops_151.yml + + + Note that compiled shared objects (``*.so`` files) will be stored in the folder ``~/.cache/libkeops-$version``, where ``~`` is the path to your home folder and ``$version`` is the package version number. + +3. Test your installation, as described in the :ref:`next section `. + +4. Be sure to activate your conda environment by typing ``conda activate keops-151`` before running your python scripts. + + On Google Colab =============== diff --git a/pykeops/torch/high_level/nn.py b/pykeops/torch/high_level/nn.py new file mode 100644 index 000000000..cdc8f25f4 --- /dev/null +++ b/pykeops/torch/high_level/nn.py @@ -0,0 +1,107 @@ +import torch +from pykeops.torch import LazyTensor + + +# define activations - not efficient. Will prob pass this to hpc code +ACTS = {} +ACTS["relu"] = lambda x: x.relu() +ACTS["sigmoid"] = lambda x: 1 / ( 1 + (-x).exp() ) +ACTS["tanh"] = lambda x: ( x.exp()-(-x).exp() ) / ( x.exp()+(-x).exp() ) +ACTS["bisigmoid"] = lambda x: -1 + 2 * ACTS["sigmoid"](x) +ACTS["silu"] = lambda x: x * ACTS["sigmoid"](x) +ACTS["gelu_fast"] = lambda x: x * ACTS["sigmoid"](1.7206*x) +# parametrized +ACTS["elu"] = lambda x: x.relu() + a * ( x.clamp(float("-inf"), 0).exp()-1 ) +ACTS["leaky_relu"] = lambda x,a=0.3: x.relu() + a * x.clamp(float("-inf"), 0) + + +def kernel_linear(x, linear=None, w=None, b=None, act=None): + """ Applies a PyTorch nn.Linear layer to a keops LazyTensor. + Inputs: + * x: (..., D) LazyTensor to apply the MLP on + * linear: torch.nn.Linear, or any class with "weight" and "bias" attrs + * w: (M, N): a weights matrix + * b: (N,): a bias vector + * act: func. a non-linearity. already prepared for LazyTensors + Output: (..., N) + """ + assert linear is not None or w is not None or b is not None, + "A torch.nn.Linear or weights and biases must be passed" + + # get params from linear if not passed directly + if linear is not None and w is None: + w = linear.weight + b = linear.bias + + # reshape to last dim + w_ = keops_torch.LazyTensor(w.view(1, 1, -1)) + b_ = keops_torch.LazyTensor(b.view(1, 1, -1)) if b is not None else 0 + + # computation + out = w_.matvecmult(x) + b_ + if act is not None: + out = act(out) + + return out + + +## ATTENTION CLASSES + +class Multihead_Attention(torch.nn.Module): + def __init__(self, dim, heads=1, dim_head = 64, bias=True, **kwargs): + """ Implements Mutihead attention. For self-attention, + use same inputs and context. + * dim: int. input dim. + * heads: int. head number. + * dim_head: int. head dimension for inner product + * bias: bool. whether to use bias in MLPs + """ + super().__init__() + inner_dim = heads * dim_head + self.heads = heads + self.scale = dim_head ** -0.5 + + self.to_q = torch.nn.Linear(dim, inner_dim, bias = bias) + self.to_k = torch.nn.Linear(dim, inner_dim, bias = bias) + self.to_v = torch.nn.Linear(dim, inner_dim, bias = bias) + self.to_out = torch.nn.Linear(inner_dim, dim) + + + def forward(self, x, context, mask = None, **kwargs): + """ Inputs: + * x: (..., N, dim). Generate queries from. + * context: (..., N, dim). Generate keys and values. + * mask: (..., N). for masked attention. + + """ + # rearrange( self.to_q(x), '... n (h d) -> ... h n () d', h=heads ) + q = self.to_q(x) * self.scale + q.reshape_(*q.shape[:-1], self.heads, -1) + q.transpose_(-2, -3) + q.unsqueeze_(-2) + # rearrange( self.to_k(context), '... n (h d) -> ... h () n d', h=heads ) + k = self.to_k(context) + k.reshape_(*k.shape[:-1], self.heads, -1) + k.transpose_(-2, -3) + k.unsqueeze_(-3) + # rearrange( self.to_v(context), '... n (h d) -> ... h () n d', h=heads ) + v = self.to_k(context) + v.reshape_(*v.shape[:-1], self.heads, -1) + v.transpose_(-2, -3) + v.unsqueeze_(-3) + + # kernelized inner prod + q,k,v = map( lambda t: keops_torch.LazyTensor(t), (q,k,v) ) + attn = (q*k).sum(dim=-1) # way faster than (q | k) # (B, H, N, N, 1) + + if mask is not None: + mask = LazyTensor( mask.unsqueeze(-1) ) + attn += mask + + out = attn.sumsoftmaxweight(v, dim=len(x.shape)) # (B H N N) · (B H N D) + out.transpose_(-3, -2) + out.reshape_(*out.shape[:-2], -1) + + return self.to_out( out ) + +