diff --git a/backends/mlir/cpu/KernelBench/level3/13_DenseNet121TransitionLayer.py b/backends/mlir/cpu/KernelBench/level3/13_DenseNet121TransitionLayer.py new file mode 100644 index 0000000..c871c6a --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/13_DenseNet121TransitionLayer.py @@ -0,0 +1,31 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, num_input_features: int, num_output_features: int): + """ + :param num_input_features: The number of input feature maps + :param num_output_features: The number of output feature maps + """ + super(Model, self).__init__() + self.transition = nn.Sequential( + nn.BatchNorm2d(num_input_features), + nn.ReLU(inplace=True), + nn.Conv2d( + num_input_features, num_output_features, kernel_size=1, bias=False + ), + nn.AvgPool2d(kernel_size=2, stride=2), + ) + + def forward(self, x): + """ + :param x: Input tensor of shape (batch_size, num_input_features, height, width) + :return: Downsampled tensor with reduced number of feature maps + """ + return self.transition(x) diff --git a/backends/mlir/cpu/KernelBench/level3/14_DenseNet121DenseBlock.py b/backends/mlir/cpu/KernelBench/level3/14_DenseNet121DenseBlock.py new file mode 100644 index 0000000..a00659d --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/14_DenseNet121DenseBlock.py @@ -0,0 +1,46 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, num_layers: int, num_input_features: int, growth_rate: int): + """ + :param num_layers: The number of layers in the dense block + :param num_input_features: The number of input feature maps + :param growth_rate: The growth rate for the dense block (new features added per layer) + """ + super(Model, self).__init__() + layers = [] + for i in range(num_layers): + layers.append( + self._make_layer(num_input_features + i * growth_rate, growth_rate) + ) + self.layers = nn.ModuleList(layers) + + def _make_layer(self, in_features: int, growth_rate: int): + """ + Creates a single layer with BatchNorm, ReLU, Conv2D, and Dropout. + """ + return nn.Sequential( + nn.BatchNorm2d(in_features), + nn.ReLU(inplace=True), + nn.Conv2d(in_features, growth_rate, kernel_size=3, padding=1, bias=False), + nn.Dropout(0.0), + ) + + def forward(self, x): + """ + :param x: Input tensor of shape (batch_size, num_input_features, height, width) + :return: Concatenated output tensor with shape (batch_size, num_output_features, height, width) + """ + features = [x] + for layer in self.layers: + new_feature = layer(x) + features.append(new_feature) + x = torch.cat(features, 1) # Concatenate along channel axis + return x diff --git a/backends/mlir/cpu/KernelBench/level3/17_SqueezeNetFireModule.py b/backends/mlir/cpu/KernelBench/level3/17_SqueezeNetFireModule.py new file mode 100644 index 0000000..dcd91c4 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/17_SqueezeNetFireModule.py @@ -0,0 +1,45 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__( + self, in_channels, squeeze_channels, expand1x1_channels, expand3x3_channels + ): + """ + :param in_channels: Number of input channels + :param squeeze_channels: Number of output channels for the squeeze layer + :param expand1x1_channels: Number of output channels for the 1x1 expand layer + :param expand3x3_channels: Number of output channels for the 3x3 expand layer + """ + super(Model, self).__init__() + + self.squeeze = nn.Conv2d(in_channels, squeeze_channels, kernel_size=1) + self.squeeze_activation = nn.ReLU(inplace=True) + + self.expand1x1 = nn.Conv2d(squeeze_channels, expand1x1_channels, kernel_size=1) + self.expand1x1_activation = nn.ReLU(inplace=True) + + self.expand3x3 = nn.Conv2d( + squeeze_channels, expand3x3_channels, kernel_size=3, padding=1 + ) + self.expand3x3_activation = nn.ReLU(inplace=True) + + def forward(self, x): + """ + :param x: Input tensor, shape (batch_size, in_channels, height, width) + :return: Output tensor, shape (batch_size, expand1x1_channels + expand3x3_channels, height, width) + """ + x = self.squeeze_activation(self.squeeze(x)) + return torch.cat( + [ + self.expand1x1_activation(self.expand1x1(x)), + self.expand3x3_activation(self.expand3x3(x)), + ], + 1, + ) diff --git a/backends/mlir/cpu/KernelBench/level3/18_SqueezeNet.py b/backends/mlir/cpu/KernelBench/level3/18_SqueezeNet.py new file mode 100644 index 0000000..b111971 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/18_SqueezeNet.py @@ -0,0 +1,85 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +class FireModule(nn.Module): + def __init__( + self, in_channels, squeeze_channels, expand1x1_channels, expand3x3_channels + ): + """ + :param in_channels: Number of input channels + :param squeeze_channels: Number of output channels for the squeeze layer + :param expand1x1_channels: Number of output channels for the 1x1 expand layer + :param expand3x3_channels: Number of output channels for the 3x3 expand layer + """ + super(FireModule, self).__init__() + + self.squeeze = nn.Conv2d(in_channels, squeeze_channels, kernel_size=1) + self.squeeze_activation = nn.ReLU(inplace=True) + + self.expand1x1 = nn.Conv2d(squeeze_channels, expand1x1_channels, kernel_size=1) + self.expand1x1_activation = nn.ReLU(inplace=True) + + self.expand3x3 = nn.Conv2d( + squeeze_channels, expand3x3_channels, kernel_size=3, padding=1 + ) + self.expand3x3_activation = nn.ReLU(inplace=True) + + def forward(self, x): + """ + :param x: Input tensor, shape (batch_size, in_channels, height, width) + :return: Output tensor, shape (batch_size, expand1x1_channels + expand3x3_channels, height, width) + """ + x = self.squeeze_activation(self.squeeze(x)) + return torch.cat( + [ + self.expand1x1_activation(self.expand1x1(x)), + self.expand3x3_activation(self.expand3x3(x)), + ], + 1, + ) + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, num_classes=1000): + """ + :param num_classes: Number of output classes + """ + super(Model, self).__init__() + + self.features = nn.Sequential( + nn.Conv2d(3, 96, kernel_size=7, stride=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + FireModule(96, 16, 64, 64), + FireModule(128, 16, 64, 64), + FireModule(128, 32, 128, 128), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + FireModule(256, 32, 128, 128), + FireModule(256, 48, 192, 192), + FireModule(384, 48, 192, 192), + FireModule(384, 64, 256, 256), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + FireModule(512, 64, 256, 256), + ) + + self.classifier = nn.Sequential( + nn.Dropout(p=0.0), + nn.Conv2d(512, num_classes, kernel_size=1), + nn.ReLU(inplace=True), + nn.AdaptiveAvgPool2d((1, 1)), + ) + + def forward(self, x): + """ + :param x: Input tensor, shape (batch_size, 3, height, width) + :return: Output tensor, shape (batch_size, num_classes) + """ + x = self.features(x) + x = self.classifier(x) + return torch.flatten(x, 1) diff --git a/backends/mlir/cpu/KernelBench/level3/1_MLP.py b/backends/mlir/cpu/KernelBench/level3/1_MLP.py new file mode 100644 index 0000000..61574ed --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/1_MLP.py @@ -0,0 +1,36 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, input_size, layer_sizes, output_size): + """ + :param input_size: The number of input features + :param layer_sizes: A list of ints containing the sizes of each hidden layer + :param output_size: The number of output features + """ + super(Model, self).__init__() + + layers = [] + current_input_size = input_size + + for layer_size in layer_sizes: + layers.append(nn.Linear(current_input_size, layer_size)) + layers.append(nn.ReLU()) + current_input_size = layer_size + + layers.append(nn.Linear(current_input_size, output_size)) + + self.network = nn.Sequential(*layers) + + def forward(self, x): + """ + :param x: The input tensor, shape (batch_size, input_size) + :return: The output tensor, shape (batch_size, output_size) + """ + return self.network(x) diff --git a/backends/mlir/cpu/KernelBench/level3/21_EfficientNetMBConv.py b/backends/mlir/cpu/KernelBench/level3/21_EfficientNetMBConv.py new file mode 100644 index 0000000..85e8a4e --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/21_EfficientNetMBConv.py @@ -0,0 +1,79 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride, expand_ratio): + """ + MBConv block implementation. + + :param in_channels: Number of input channels. + :param out_channels: Number of output channels. + :param kernel_size: Kernel size for the depthwise convolution. + :param stride: Stride for the depthwise convolution. + :param expand_ratio: Expansion ratio for the intermediate channels. + """ + super(Model, self).__init__() + + self.use_residual = stride == 1 and in_channels == out_channels + hidden_dim = in_channels * expand_ratio + + if expand_ratio != 1: + self.expand_conv = nn.Sequential( + nn.Conv2d( + in_channels, + hidden_dim, + kernel_size=1, + stride=1, + padding=0, + bias=False, + ), + nn.BatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + ) + + self.depthwise_conv = nn.Sequential( + nn.Conv2d( + hidden_dim, + hidden_dim, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2, + groups=hidden_dim, + bias=False, + ), + nn.BatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + ) + + self.project_conv = nn.Sequential( + nn.Conv2d( + hidden_dim, out_channels, kernel_size=1, stride=1, padding=0, bias=False + ), + nn.BatchNorm2d(out_channels), + ) + + def forward(self, x): + """ + Forward pass of the MBConv block. + + :param x: The input tensor, shape (batch_size, in_channels, H, W) + :return: The output tensor, shape (batch_size, out_channels, H', W') + """ + identity = x + + if hasattr(self, "expand_conv"): + x = self.expand_conv(x) + + x = self.depthwise_conv(x) + x = self.project_conv(x) + + if self.use_residual: + x += identity + + return x diff --git a/backends/mlir/cpu/KernelBench/level3/25_ShuffleNetUnit.py b/backends/mlir/cpu/KernelBench/level3/25_ShuffleNetUnit.py new file mode 100644 index 0000000..af8c23e --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/25_ShuffleNetUnit.py @@ -0,0 +1,126 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, in_channels, out_channels, groups=3): + """ + ShuffleNet unit implementation. + + :param in_channels: Number of input channels. + :param out_channels: Number of output channels. + :param groups: Number of groups for group convolution. + """ + super(Model, self).__init__() + + # Ensure the output channels are divisible by groups + assert out_channels % 4 == 0 + mid_channels = out_channels // 4 + + # First 1x1 group convolution + self.conv1 = nn.Conv2d( + in_channels, + mid_channels, + kernel_size=1, + stride=1, + padding=0, + groups=groups, + bias=False, + ) + self.bn1 = nn.BatchNorm2d(mid_channels) + + # Depthwise 3x3 convolution + self.conv2 = nn.Conv2d( + mid_channels, + mid_channels, + kernel_size=3, + stride=1, + padding=1, + groups=mid_channels, + bias=False, + ) + self.bn2 = nn.BatchNorm2d(mid_channels) + + # Second 1x1 group convolution + self.conv3 = nn.Conv2d( + mid_channels, + out_channels, + kernel_size=1, + stride=1, + padding=0, + groups=groups, + bias=False, + ) + self.bn3 = nn.BatchNorm2d(out_channels) + + # Shuffle operation + self.shuffle = ChannelShuffle(groups) + + # Shortcut connection if input and output channels are the same + if in_channels == out_channels: + self.shortcut = nn.Sequential() + else: + self.shortcut = nn.Sequential( + nn.Conv2d( + in_channels, + out_channels, + kernel_size=1, + stride=1, + padding=0, + bias=False, + ), + nn.BatchNorm2d(out_channels), + ) + + def forward(self, x): + """ + Forward pass for ShuffleNet unit. + + :param x: Input tensor, shape (batch_size, in_channels, height, width) + :return: Output tensor, shape (batch_size, out_channels, height, width) + """ + out = F.relu(self.bn1(self.conv1(x))) + out = self.bn2(self.conv2(out)) + out = self.shuffle(out) + out = F.relu(self.bn3(self.conv3(out))) + + out += self.shortcut(x) + return out + + +class ChannelShuffle(nn.Module): + def __init__(self, groups): + """ + Channel shuffle operation. + + :param groups: Number of groups for shuffling. + """ + super(ChannelShuffle, self).__init__() + self.groups = groups + + def forward(self, x): + """ + Forward pass for channel shuffle. + + :param x: Input tensor, shape (batch_size, channels, height, width) + :return: Output tensor, shape (batch_size, channels, height, width) + """ + batch_size, channels, height, width = x.size() + channels_per_group = channels // self.groups + + # Reshape + x = x.view(batch_size, self.groups, channels_per_group, height, width) + + # Transpose + x = x.transpose(1, 2).contiguous() + + # Flatten + x = x.view(batch_size, -1, height, width) + + return x diff --git a/backends/mlir/cpu/KernelBench/level3/27_RegNet.py b/backends/mlir/cpu/KernelBench/level3/27_RegNet.py new file mode 100644 index 0000000..164ceca --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/27_RegNet.py @@ -0,0 +1,62 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, input_channels, stages, block_widths, output_classes): + """ + :param input_channels: int, Number of input channels for the first layer + :param stages: int, Number of stages in the RegNet architecture + :param block_widths: List[int], Width (number of channels) for each block in the stages + :param output_classes: int, Number of output classes for classification + """ + super(Model, self).__init__() + + self.stages = stages + self.block_widths = block_widths + + layers = [] + current_channels = input_channels + + # Construct the stages with their respective blocks + for i in range(stages): + layers.append(self._make_stage(current_channels, block_widths[i])) + current_channels = block_widths[i] + + self.feature_extractor = nn.Sequential(*layers) + + # Final fully connected layer for classification + self.fc = nn.Linear(block_widths[-1], output_classes) + + def _make_stage(self, in_channels, out_channels): + """ + Creates a simple block for each stage. + :param in_channels: int, number of input channels + :param out_channels: int, number of output channels + :return: nn.Sequential block with convolutional layers + """ + return nn.Sequential( + nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), + nn.BatchNorm2d(out_channels), + nn.ReLU(), + nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), + nn.BatchNorm2d(out_channels), + nn.ReLU(), + nn.MaxPool2d(kernel_size=2, stride=2), + ) + + def forward(self, x): + """ + Forward pass through the RegNet model. + :param x: torch.Tensor of shape (batch_size, input_channels, height, width) + :return: torch.Tensor of shape (batch_size, output_classes) + """ + x = self.feature_extractor(x) + x = torch.mean(x, dim=[2, 3]) # Global Average Pooling + x = self.fc(x) + return x diff --git a/backends/mlir/cpu/KernelBench/level3/28_VisionTransformer.py b/backends/mlir/cpu/KernelBench/level3/28_VisionTransformer.py new file mode 100644 index 0000000..18dd297 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/28_VisionTransformer.py @@ -0,0 +1,91 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__( + self, + image_size, + patch_size, + num_classes, + dim, + depth, + heads, + mlp_dim, + channels=3, + dropout=0.1, + emb_dropout=0.1, + ): + """ + Vision Transformer (ViT) model. + + :param image_size: The size of the input image (assumed to be square). + :param patch_size: The size of each patch (assumed to be square). + :param num_classes: The number of output classes. + :param dim: The dimensionality of the embedding space. + :param depth: The number of transformer layers. + :param heads: The number of attention heads. + :param mlp_dim: The dimensionality of the MLP (Multi-Layer Perceptron) in the transformer. + :param channels: The number of channels in the input image (default is 3 for RGB). + :param dropout: Dropout rate applied in the MLP. + :param emb_dropout: Dropout rate applied to the embedded patches. + """ + super(Model, self).__init__() + + assert image_size % patch_size == 0, ( + "Image dimensions must be divisible by the patch size." + ) + num_patches = (image_size // patch_size) ** 2 + patch_dim = channels * patch_size**2 + + self.patch_size = patch_size + self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim)) + self.patch_to_embedding = nn.Linear(patch_dim, dim) + self.cls_token = nn.Parameter(torch.randn(1, 1, dim)) + self.dropout = nn.Dropout(emb_dropout) + + self.transformer = nn.TransformerEncoder( + nn.TransformerEncoderLayer( + d_model=dim, nhead=heads, dim_feedforward=mlp_dim, dropout=dropout + ), + num_layers=depth, + ) + + self.to_cls_token = nn.Identity() + self.mlp_head = nn.Sequential( + nn.Linear(dim, mlp_dim), + nn.GELU(), + nn.Dropout(dropout), + nn.Linear(mlp_dim, num_classes), + ) + + def forward(self, img): + """ + Forward pass of the Vision Transformer. + + :param img: The input image tensor, shape (batch_size, channels, image_size, image_size). + :return: The output tensor, shape (batch_size, num_classes). + """ + p = self.patch_size + + x = ( + img.unfold(2, p, p) + .unfold(3, p, p) + .reshape(img.shape[0], -1, p * p * img.shape[1]) + ) + x = self.patch_to_embedding(x) + + cls_tokens = self.cls_token.expand(img.shape[0], -1, -1) + x = torch.cat((cls_tokens, x), dim=1) + x += self.pos_embedding + x = self.dropout(x) + + x = self.transformer(x) + + x = self.to_cls_token(x[:, 0]) + return self.mlp_head(x) diff --git a/backends/mlir/cpu/KernelBench/level3/2_ShallowWideMLP.py b/backends/mlir/cpu/KernelBench/level3/2_ShallowWideMLP.py new file mode 100644 index 0000000..32a34d1 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/2_ShallowWideMLP.py @@ -0,0 +1,36 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, input_size, hidden_layer_sizes, output_size): + """ + :param input_size: The number of input features + :param hidden_layer_sizes: A list of ints containing the sizes of each hidden layer + :param output_size: The number of output features + """ + super(Model, self).__init__() + + layers = [] + current_input_size = input_size + + for hidden_size in hidden_layer_sizes: + layers.append(nn.Linear(current_input_size, hidden_size)) + layers.append(nn.ReLU()) + current_input_size = hidden_size + + layers.append(nn.Linear(current_input_size, output_size)) + + self.network = nn.Sequential(*layers) + + def forward(self, x): + """ + :param x: The input tensor, shape (batch_size, input_size) + :return: The output tensor, shape (batch_size, output_size) + """ + return self.network(x) diff --git a/backends/mlir/cpu/KernelBench/level3/31_VisionAttention.py b/backends/mlir/cpu/KernelBench/level3/31_VisionAttention.py new file mode 100644 index 0000000..537d715 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/31_VisionAttention.py @@ -0,0 +1,32 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, embed_dim, num_heads): + """ + Attention Block using Multihead Self-Attention. + :param embed_dim: Embedding dimension (the number of channels) + :param num_heads: Number of attention heads + """ + super(Model, self).__init__() + self.attn = nn.MultiheadAttention(embed_dim, num_heads) + self.norm = nn.LayerNorm(embed_dim) + + def forward(self, x): + """ + Forward pass of the AttentionBlock. + :param x: Input tensor of shape (B, C, H, W) + :return: Output tensor of the same shape (B, C, H, W) + """ + B, C, H, W = x.shape + x = x.view(B, C, H * W).permute(2, 0, 1) # (seq_len, batch_size, embed_dim) + attn_output, _ = self.attn(x, x, x) + x = self.norm(attn_output + x) # (seq_len, batch_size, embed_dim) + x = x.permute(1, 2, 0).view(B, C, H, W) + return x diff --git a/backends/mlir/cpu/KernelBench/level3/33_VanillaRNN.py b/backends/mlir/cpu/KernelBench/level3/33_VanillaRNN.py new file mode 100644 index 0000000..5baab18 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/33_VanillaRNN.py @@ -0,0 +1,49 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, input_size: int, hidden_size: int, output_size: int): + """ + Initialize the Vanilla RNN model. + + :param input_size: The number of input features (int). + :param hidden_size: The size of the hidden state (int). + :param output_size: The number of output features (int). + """ + super(Model, self).__init__() + self.input_size = input_size + self.hidden_size = hidden_size + self.output_size = output_size + self.hidden = torch.randn((batch_size, hidden_size)) + + # Define the RNN cell components (input to hidden, hidden to hidden, and hidden to output) + self.i2h = nn.Linear(input_size + hidden_size, hidden_size) # Input to hidden + self.h2o = nn.Linear(hidden_size, output_size) # Hidden to output + self.tanh = nn.Tanh() # Activation function for hidden state + + def forward(self, x: torch.Tensor, initial_hidden=None) -> torch.Tensor: + """ + Forward pass of the Vanilla RNN. + + :param x: Input tensor of shape (batch_size, input_size). + :param hidden: Hidden state tensor of shape (batch_size, hidden_size). + :return: Output tensor of shape (batch_size, output_size), and the new hidden state. + """ + if initial_hidden is not None: + self.hidden.copy_(initial_hidden) + self.hidden = self.hidden.to(x.device) + combined = torch.cat( + (x, self.hidden), dim=1 + ) # Concatenate input and hidden state + self.hidden = self.tanh(self.i2h(combined)) # Update hidden state + output = self.h2o(self.hidden) # Compute output + return output + + +batch_size = 256 diff --git a/backends/mlir/cpu/KernelBench/level3/34_VanillaRNNHidden.py b/backends/mlir/cpu/KernelBench/level3/34_VanillaRNNHidden.py new file mode 100644 index 0000000..13ddb88 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/34_VanillaRNNHidden.py @@ -0,0 +1,49 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, input_size: int, hidden_size: int, output_size: int): + """ + Initialize the Vanilla RNN model. + + :param input_size: The number of input features (int). + :param hidden_size: The size of the hidden state (int). + :param output_size: The number of output features (int). + """ + super(Model, self).__init__() + self.input_size = input_size + self.hidden_size = hidden_size + self.output_size = output_size + + # Define the RNN cell components (input to hidden, hidden to hidden, and hidden to output) + self.i2h = nn.Linear(input_size + hidden_size, hidden_size) # Input to hidden + self.h2o = nn.Linear(hidden_size, output_size) # Hidden to output + self.tanh = nn.Tanh() # Activation function for hidden state + + def forward(self, x: torch.Tensor, h0: torch.Tensor) -> torch.Tensor: + """ + Forward pass of the Vanilla RNN. + + :param x: Input tensor of shape (seq_len, batch_size, input_size) + :param h0: Initial hidden state tensor of shape (batch_size, hidden_size) + :return: Output tensor of shape (seq_len, batch_size, output_size) + """ + seq_len, batch_size, _ = x.size() + hidden = h0.to(x.device) + outputs = [] + + for t in range(seq_len): + combined = torch.cat( + (x[t], hidden), dim=1 + ) # Concatenate input and hidden state + hidden = self.tanh(self.i2h(combined)) # Update hidden state + output = self.h2o(hidden) # Compute output + outputs.append(output) + + return torch.stack(outputs, dim=0) # (seq_len, batch_size, output_size) diff --git a/backends/mlir/cpu/KernelBench/level3/35_LSTM.py b/backends/mlir/cpu/KernelBench/level3/35_LSTM.py new file mode 100644 index 0000000..85d1ad5 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/35_LSTM.py @@ -0,0 +1,57 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.0): + """ + Initialize the LSTM model. + + :param input_size: The number of expected features in the input `x` + :param hidden_size: The number of features in the hidden state `h` + :param num_layers: Number of recurrent layers + :param output_size: The number of output features + :param dropout: If non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer + """ + super(Model, self).__init__() + self.hidden_size = hidden_size + self.num_layers = num_layers + self.lstm = nn.LSTM( + input_size, + hidden_size, + num_layers, + batch_first=True, + dropout=dropout, + bidirectional=False, + ) + self.fc = nn.Linear(hidden_size, output_size) + + def forward(self, x, h0=None, c0=None): + """ + Forward pass through the LSTM model. + + :param x: The input tensor, shape (batch_size, sequence_length, input_size) + :param h0: Optional initial hidden state (num_layers, batch_size, hidden_size) + :param c0: Optional initial cell state (num_layers, batch_size, hidden_size) + :return: The output tensor, shape (batch_size, output_size) + """ + batch_size = x.size(0) + + if h0 is None: + h0 = torch.randn( + self.num_layers, batch_size, self.hidden_size, device=x.device + ) + if c0 is None: + c0 = torch.randn( + self.num_layers, batch_size, self.hidden_size, device=x.device + ) + + out, _ = self.lstm(x, (h0, c0)) # out: (batch_size, seq_length, hidden_size) + out = self.fc(out[:, -1, :]) # out: (batch_size, output_size) + + return out diff --git a/backends/mlir/cpu/KernelBench/level3/36_LSTMHn.py b/backends/mlir/cpu/KernelBench/level3/36_LSTMHn.py new file mode 100644 index 0000000..bc33b52 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/36_LSTMHn.py @@ -0,0 +1,49 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.0): + """ + Initialize the LSTM model. + + :param input_size: The number of expected features in the input `x` + :param hidden_size: The number of features in the hidden state `h` + :param num_layers: Number of recurrent layers + :param output_size: The number of output features + :param dropout: If non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer, with dropout probability equal to `dropout` + """ + super(Model, self).__init__() + # Initialize hidden state with random values + self.lstm = nn.LSTM( + input_size, + hidden_size, + num_layers, + batch_first=True, + dropout=dropout, + bidirectional=False, + ) + self.fc = nn.Linear(hidden_size, output_size) + + def forward(self, x, h0, c0): + """ + Forward pass through the LSTM model. + + :param x: The input tensor, shape (batch_size, sequence_length, input_size) + :return: The output tensor, shape (batch_size, sequence_length, output_size) + """ + + # Forward propagate LSTM + out, state = self.lstm( + x, (h0, c0) + ) # out: tensor of shape (batch_size, seq_length, hidden_size) + + # Decode the hidden state of the last time step + out = self.fc(out[:, -1, :]) # out: tensor of shape (batch_size, output_size) + + return state[0] diff --git a/backends/mlir/cpu/KernelBench/level3/37_LSTMCn.py b/backends/mlir/cpu/KernelBench/level3/37_LSTMCn.py new file mode 100644 index 0000000..f94d39a --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/37_LSTMCn.py @@ -0,0 +1,49 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.0): + """ + Initialize the LSTM model. + + :param input_size: The number of expected features in the input `x` + :param hidden_size: The number of features in the hidden state `h` + :param num_layers: Number of recurrent layers + :param output_size: The number of output features + :param dropout: If non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer, with dropout probability equal to `dropout` + """ + super(Model, self).__init__() + # Initialize hidden state with random values + self.lstm = nn.LSTM( + input_size, + hidden_size, + num_layers, + batch_first=True, + dropout=dropout, + bidirectional=False, + ) + self.fc = nn.Linear(hidden_size, output_size) + + def forward(self, x, h0, c0): + """ + Forward pass through the LSTM model. + + :param x: The input tensor, shape (batch_size, sequence_length, input_size) + :return: The output tensor, shape (batch_size, sequence_length, output_size) + """ + + # Forward propagate LSTM + out, state = self.lstm( + x, (h0, c0) + ) # out: tensor of shape (batch_size, seq_length, hidden_size) + + # Decode the hidden state of the last time step + out = self.fc(out[:, -1, :]) # out: tensor of shape (batch_size, output_size) + + return state[1] diff --git a/backends/mlir/cpu/KernelBench/level3/38_LSTMBidirectional.py b/backends/mlir/cpu/KernelBench/level3/38_LSTMBidirectional.py new file mode 100644 index 0000000..39c7746 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/38_LSTMBidirectional.py @@ -0,0 +1,48 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.0): + """ + Initialize the LSTM model. + + :param input_size: The number of expected features in the input `x` + :param hidden_size: The number of features in the hidden state `h` + :param num_layers: Number of recurrent layers + :param output_size: The number of output features + :param dropout: If non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer, with dropout probability equal to `dropout` + """ + super(Model, self).__init__() + # Initialize hidden state with random values + self.lstm = nn.LSTM( + input_size, + hidden_size, + num_layers, + batch_first=True, + dropout=dropout, + bidirectional=True, + ) + self.fc = nn.Linear(hidden_size * 2, output_size) + + def forward(self, x, h0, c0): + """ + Forward pass through the LSTM model. + + :param x: The input tensor, shape (batch_size, sequence_length, input_size) + :return: The output tensor, shape (batch_size, sequence_length, output_size) + """ + # Forward propagate LSTM + out, hn = self.lstm( + x, (h0, c0) + ) # out: tensor of shape (batch_size, seq_length, hidden_size) + + # Decode the hidden state of the last time step + out = self.fc(out[:, -1, :]) # out: tensor of shape (batch_size, output_size) + + return out diff --git a/backends/mlir/cpu/KernelBench/level3/39_GRU.py b/backends/mlir/cpu/KernelBench/level3/39_GRU.py new file mode 100644 index 0000000..8631126 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/39_GRU.py @@ -0,0 +1,42 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__( + self, input_size, hidden_size, num_layers=3, bias=True, batch_first=False + ): + """ + :param input_size: The number of expected features in the input x + :param hidden_size: The number of features in the hidden state h + :param num_layers: Number of recurrent layers (default: 1) + :param bias: If False, then the layer does not use bias weights b_ih and b_hh (default: True) + :param batch_first: If True, then the input and output tensors are provided as (batch, seq, feature) (default: False) + """ + super(Model, self).__init__() + + self.gru = nn.GRU( + input_size, + hidden_size, + num_layers, + bias, + batch_first, + dropout=0, + bidirectional=False, + ) + + def forward(self, x, h0): + """ + :param x: The input tensor, shape (seq_len, batch_size, input_size) if batch_first=False, otherwise (batch_size, seq_len, input_size) + :param h_0: The initial hidden state for the input sequence, shape (num_layers * num_directions, batch_size, hidden_size) (default: None) + :return: output, h_n + - output: The output features (h_t) from the last layer of the GRU, for each t, shape (seq_len, batch_size, num_directions * hidden_size) if batch_first=False, otherwise (batch_size, seq_len, num_directions * hidden_size) + - h_n: The hidden state for t = seq_len, shape (num_layers * num_directions, batch_size, hidden_size) + """ + output, h_n = self.gru(x, h0) + return output diff --git a/backends/mlir/cpu/KernelBench/level3/3_DeepNarrowMLP.py b/backends/mlir/cpu/KernelBench/level3/3_DeepNarrowMLP.py new file mode 100644 index 0000000..32a34d1 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/3_DeepNarrowMLP.py @@ -0,0 +1,36 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, input_size, hidden_layer_sizes, output_size): + """ + :param input_size: The number of input features + :param hidden_layer_sizes: A list of ints containing the sizes of each hidden layer + :param output_size: The number of output features + """ + super(Model, self).__init__() + + layers = [] + current_input_size = input_size + + for hidden_size in hidden_layer_sizes: + layers.append(nn.Linear(current_input_size, hidden_size)) + layers.append(nn.ReLU()) + current_input_size = hidden_size + + layers.append(nn.Linear(current_input_size, output_size)) + + self.network = nn.Sequential(*layers) + + def forward(self, x): + """ + :param x: The input tensor, shape (batch_size, input_size) + :return: The output tensor, shape (batch_size, output_size) + """ + return self.network(x) diff --git a/backends/mlir/cpu/KernelBench/level3/40_GRUHidden.py b/backends/mlir/cpu/KernelBench/level3/40_GRUHidden.py new file mode 100644 index 0000000..34aae11 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/40_GRUHidden.py @@ -0,0 +1,42 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__( + self, input_size, hidden_size, num_layers=3, bias=True, batch_first=False + ): + """ + :param input_size: The number of expected features in the input x + :param hidden_size: The number of features in the hidden state h + :param num_layers: Number of recurrent layers (default: 1) + :param bias: If False, then the layer does not use bias weights b_ih and b_hh (default: True) + :param batch_first: If True, then the input and output tensors are provided as (batch, seq, feature) (default: False) + """ + super(Model, self).__init__() + + self.gru = nn.GRU( + input_size, + hidden_size, + num_layers, + bias, + batch_first, + dropout=0, + bidirectional=False, + ) + + def forward(self, x, h0): + """ + :param x: The input tensor, shape (seq_len, batch_size, input_size) if batch_first=False, otherwise (batch_size, seq_len, input_size) + :param h_0: The initial hidden state for the input sequence, shape (num_layers * num_directions, batch_size, hidden_size) (default: None) + :return: output, h_n + - output: The output features (h_t) from the last layer of the GRU, for each t, shape (seq_len, batch_size, num_directions * hidden_size) if batch_first=False, otherwise (batch_size, seq_len, num_directions * hidden_size) + - h_n: The hidden state for t = seq_len, shape (num_layers * num_directions, batch_size, hidden_size) + """ + output, h_n = self.gru(x, h0) + return h_n diff --git a/backends/mlir/cpu/KernelBench/level3/41_GRUBidirectional.py b/backends/mlir/cpu/KernelBench/level3/41_GRUBidirectional.py new file mode 100644 index 0000000..cf17962 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/41_GRUBidirectional.py @@ -0,0 +1,46 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__( + self, input_size, hidden_size, num_layers=3, bias=True, batch_first=False + ): + """ + :param input_size: The number of expected features in the input x + :param hidden_size: The number of features in the hidden state h + :param num_layers: Number of recurrent layers (default: 1) + :param bias: If False, then the layer does not use bias weights b_ih and b_hh (default: True) + :param batch_first: If True, then the input and output tensors are provided as (batch, seq, feature) (default: False) + """ + super(Model, self).__init__() + + self.gru = nn.GRU( + input_size, + hidden_size, + num_layers, + bias, + batch_first, + dropout=0, + bidirectional=True, + ) + self.h0 = torch.randn((num_layers * 2, batch_size, hidden_size)) + + def forward(self, x, h0): + """ + :param x: The input tensor, shape (seq_len, batch_size, input_size) if batch_first=False, otherwise (batch_size, seq_len, input_size) + :param h_0: The initial hidden state for the input sequence, shape (num_layers * num_directions, batch_size, hidden_size) (default: None) + :return: output, h_n + - output: The output features (h_t) from the last layer of the GRU, for each t, shape (seq_len, batch_size, num_directions * hidden_size) if batch_first=False, otherwise (batch_size, seq_len, num_directions * hidden_size) + - h_n: The hidden state for t = seq_len, shape (num_layers * num_directions, batch_size, hidden_size) + """ + output, h_n = self.gru(x, h0) + return output + + +batch_size = 10 diff --git a/backends/mlir/cpu/KernelBench/level3/42_GRUBidirectionalHidden.py b/backends/mlir/cpu/KernelBench/level3/42_GRUBidirectionalHidden.py new file mode 100644 index 0000000..6e0b0b9 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/42_GRUBidirectionalHidden.py @@ -0,0 +1,42 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__( + self, input_size, hidden_size, num_layers=3, bias=True, batch_first=False + ): + """ + :param input_size: The number of expected features in the input x + :param hidden_size: The number of features in the hidden state h + :param num_layers: Number of recurrent layers (default: 1) + :param bias: If False, then the layer does not use bias weights b_ih and b_hh (default: True) + :param batch_first: If True, then the input and output tensors are provided as (batch, seq, feature) (default: False) + """ + super(Model, self).__init__() + + self.gru = nn.GRU( + input_size, + hidden_size, + num_layers, + bias, + batch_first, + dropout=0, + bidirectional=True, + ) + + def forward(self, x, h0): + """ + :param x: The input tensor, shape (seq_len, batch_size, input_size) if batch_first=False, otherwise (batch_size, seq_len, input_size) + :param h_0: The initial hidden state for the input sequence, shape (num_layers * num_directions, batch_size, hidden_size) (default: None) + :return: output, h_n + - output: The output features (h_t) from the last layer of the GRU, for each t, shape (seq_len, batch_size, num_directions * hidden_size) if batch_first=False, otherwise (batch_size, seq_len, num_directions * hidden_size) + - h_n: The hidden state for t = seq_len, shape (num_layers * num_directions, batch_size, hidden_size) + """ + output, h_n = self.gru(x, h0) + return h_n diff --git a/backends/mlir/cpu/KernelBench/level3/43_MinGPTCausalAttention.py b/backends/mlir/cpu/KernelBench/level3/43_MinGPTCausalAttention.py new file mode 100644 index 0000000..61f89a3 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/43_MinGPTCausalAttention.py @@ -0,0 +1,70 @@ +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +# From https://github.com/karpathy/minGPT/blob/master/mingpt/model.py +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + """ + A vanilla multi-head masked self-attention layer with a projection at the end. + It is possible to use torch.nn.MultiheadAttention here but I am including an + explicit implementation here to show that there is nothing too scary here. + """ + + def __init__(self, n_embd, n_head, attn_pdrop, resid_pdrop, max_seqlen): + super().__init__() + assert n_embd % n_head == 0 + # key, query, value projections for all heads, but in a batch + self.c_attn = nn.Linear(n_embd, 3 * n_embd) + # output projection + self.c_proj = nn.Linear(n_embd, n_embd) + # regularization + self.attn_dropout = nn.Dropout(attn_pdrop) + self.resid_dropout = nn.Dropout(resid_pdrop) + # causal mask to ensure that attention is only applied to the left in the input sequence + self.register_buffer( + "bias", + torch.tril(torch.ones(max_seqlen, max_seqlen)).view( + 1, 1, max_seqlen, max_seqlen + ), + ) + self.n_head = n_head + self.n_embd = n_embd + + def forward(self, x): + B, T, C = ( + x.size() + ) # batch size, sequence length, embedding dimensionality (n_embd) + + # calculate query, key, values for all heads in batch and move head forward to be the batch dim + q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + k = k.view(B, T, self.n_head, C // self.n_head).transpose( + 1, 2 + ) # (B, nh, T, hs) + q = q.view(B, T, self.n_head, C // self.n_head).transpose( + 1, 2 + ) # (B, nh, T, hs) + v = v.view(B, T, self.n_head, C // self.n_head).transpose( + 1, 2 + ) # (B, nh, T, hs) + + # causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T) + att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1))) + att = att.masked_fill(self.bias[:, :, :T, :T] == 0, float("-inf")) + att = F.softmax(att, dim=-1) + att = self.attn_dropout(att) + y = att @ v # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs) + y = ( + y.transpose(1, 2).contiguous().view(B, T, C) + ) # re-assemble all head outputs side by side + + # output projection + y = self.resid_dropout(self.c_proj(y)) + return y diff --git a/backends/mlir/cpu/KernelBench/level3/44_MiniGPTBlock.py b/backends/mlir/cpu/KernelBench/level3/44_MiniGPTBlock.py new file mode 100644 index 0000000..6effe48 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/44_MiniGPTBlock.py @@ -0,0 +1,120 @@ +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import ai_bench.mlir + +# From https://github.com/karpathy/minGPT/blob/master/mingpt/model.py + + +class NewGELU(nn.Module): + """ + Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). + Reference: Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415 + """ + + def __init__(self): + super(NewGELU, self).__init__() + + def forward(self, x): + return ( + 0.5 + * x + * ( + 1.0 + + torch.tanh( + math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)) + ) + ) + ) + + +class CausalSelfAttention(nn.Module): + """ + A vanilla multi-head masked self-attention layer with a projection at the end. + It is possible to use torch.nn.MultiheadAttention here but I am including an + explicit implementation here to show that there is nothing too scary here. + """ + + def __init__(self, n_embd, n_head, attn_pdrop, resid_pdrop, max_seqlen): + super().__init__() + assert n_embd % n_head == 0 + # key, query, value projections for all heads, but in a batch + self.c_attn = nn.Linear(n_embd, 3 * n_embd) + # output projection + self.c_proj = nn.Linear(n_embd, n_embd) + # regularization + self.attn_dropout = nn.Dropout(attn_pdrop) + self.resid_dropout = nn.Dropout(resid_pdrop) + # causal mask to ensure that attention is only applied to the left in the input sequence + self.register_buffer( + "bias", + torch.tril(torch.ones(max_seqlen, max_seqlen)).view( + 1, 1, max_seqlen, max_seqlen + ), + ) + self.n_head = n_head + self.n_embd = n_embd + + def forward(self, x): + B, T, C = ( + x.size() + ) # batch size, sequence length, embedding dimensionality (n_embd) + + # calculate query, key, values for all heads in batch and move head forward to be the batch dim + q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + k = k.view(B, T, self.n_head, C // self.n_head).transpose( + 1, 2 + ) # (B, nh, T, hs) + q = q.view(B, T, self.n_head, C // self.n_head).transpose( + 1, 2 + ) # (B, nh, T, hs) + v = v.view(B, T, self.n_head, C // self.n_head).transpose( + 1, 2 + ) # (B, nh, T, hs) + + # causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T) + att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1))) + att = att.masked_fill(self.bias[:, :, :T, :T] == 0, float("-inf")) + att = F.softmax(att, dim=-1) + att = self.attn_dropout(att) + y = att @ v # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs) + y = ( + y.transpose(1, 2).contiguous().view(B, T, C) + ) # re-assemble all head outputs side by side + + # output projection + y = self.resid_dropout(self.c_proj(y)) + return y + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + """an unassuming Transformer block""" + + def __init__(self, n_embd, n_head, attn_pdrop, resid_pdrop, max_seqlen): + super().__init__() + self.ln_1 = nn.LayerNorm(n_embd) + self.attn = CausalSelfAttention( + n_embd, n_head, attn_pdrop, resid_pdrop, max_seqlen + ) + self.ln_2 = nn.LayerNorm(n_embd) + self.mlp = nn.ModuleDict( + dict( + c_fc=nn.Linear(n_embd, 4 * n_embd), + c_proj=nn.Linear(4 * n_embd, n_embd), + act=NewGELU(), + dropout=nn.Dropout(resid_pdrop), + ) + ) + m = self.mlp + self.mlpf = lambda x: m.dropout(m.c_proj(m.act(m.c_fc(x)))) # MLP forward + + def forward(self, x): + x = x + self.attn(self.ln_1(x)) + x = x + self.mlpf(self.ln_2(x)) + return x diff --git a/backends/mlir/cpu/KernelBench/level3/46_NetVladWithGhostClusters.py b/backends/mlir/cpu/KernelBench/level3/46_NetVladWithGhostClusters.py new file mode 100644 index 0000000..6ab2b14 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/46_NetVladWithGhostClusters.py @@ -0,0 +1,91 @@ +# Copyright 2018 Antoine Miech All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Code modified from here +https://github.com/albanie/collaborative-experts/blob/master/model/net_vlad.py +""" + +import math + +import torch +import torch as th +import torch.nn as nn +import torch.nn.functional as F + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, cluster_size, feature_size, ghost_clusters): + super(Model, self).__init__() + + self.feature_size = feature_size + self.cluster_size = cluster_size + self.ghost_clusters = ghost_clusters + + init_sc = 1 / math.sqrt(feature_size) + clusters = cluster_size + ghost_clusters + + # The `clusters` weights are the `(w,b)` in the paper + self.clusters = nn.Parameter(init_sc * th.randn(feature_size, clusters)) + self.batch_norm = nn.BatchNorm1d(clusters) + # The `clusters2` weights are the visual words `c_k` in the paper + self.clusters2 = nn.Parameter(init_sc * th.randn(1, feature_size, cluster_size)) + self.out_dim = self.cluster_size * feature_size + + def forward(self, x, mask=None): + """Aggregates feature maps into a fixed size representation. In the following + notation, B = batch_size, N = num_features, K = num_clusters, D = feature_size. + + Args: + x (th.Tensor): B x N x D + + Returns: + (th.Tensor): B x DK + """ + max_sample = x.size()[1] + x = x.view(-1, self.feature_size) # B x N x D -> BN x D + + if x.device != self.clusters.device: + msg = f"x.device {x.device} != cluster.device {self.clusters.device}" + raise ValueError(msg) + + assignment = th.matmul(x, self.clusters) # (BN x D) x (D x (K+G)) -> BN x (K+G) + assignment = self.batch_norm(assignment) + + assignment = F.softmax(assignment, dim=1) # BN x (K+G) -> BN x (K+G) + # remove ghost assigments + assignment = assignment[:, : self.cluster_size] + assignment = assignment.view(-1, max_sample, self.cluster_size) # -> B x N x K + a_sum = th.sum(assignment, dim=1, keepdim=True) # B x N x K -> B x 1 x K + a = a_sum * self.clusters2 + + assignment = assignment.transpose(1, 2) # B x N x K -> B x K x N + + x = x.view(-1, max_sample, self.feature_size) # BN x D -> B x N x D + vlad = th.matmul(assignment, x) # (B x K x N) x (B x N x D) -> B x K x D + vlad = vlad.transpose(1, 2) # -> B x D x K + vlad = vlad - a + + # L2 intra norm + vlad = F.normalize(vlad) + + # flattening + L2 norm + vlad = vlad.reshape(-1, self.cluster_size * self.feature_size) # -> B x DK + vlad = F.normalize(vlad) + return vlad # B x DK diff --git a/backends/mlir/cpu/KernelBench/level3/47_NetVladNoGhostClusters.py b/backends/mlir/cpu/KernelBench/level3/47_NetVladNoGhostClusters.py new file mode 100644 index 0000000..6ab2b14 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/47_NetVladNoGhostClusters.py @@ -0,0 +1,91 @@ +# Copyright 2018 Antoine Miech All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Code modified from here +https://github.com/albanie/collaborative-experts/blob/master/model/net_vlad.py +""" + +import math + +import torch +import torch as th +import torch.nn as nn +import torch.nn.functional as F + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, cluster_size, feature_size, ghost_clusters): + super(Model, self).__init__() + + self.feature_size = feature_size + self.cluster_size = cluster_size + self.ghost_clusters = ghost_clusters + + init_sc = 1 / math.sqrt(feature_size) + clusters = cluster_size + ghost_clusters + + # The `clusters` weights are the `(w,b)` in the paper + self.clusters = nn.Parameter(init_sc * th.randn(feature_size, clusters)) + self.batch_norm = nn.BatchNorm1d(clusters) + # The `clusters2` weights are the visual words `c_k` in the paper + self.clusters2 = nn.Parameter(init_sc * th.randn(1, feature_size, cluster_size)) + self.out_dim = self.cluster_size * feature_size + + def forward(self, x, mask=None): + """Aggregates feature maps into a fixed size representation. In the following + notation, B = batch_size, N = num_features, K = num_clusters, D = feature_size. + + Args: + x (th.Tensor): B x N x D + + Returns: + (th.Tensor): B x DK + """ + max_sample = x.size()[1] + x = x.view(-1, self.feature_size) # B x N x D -> BN x D + + if x.device != self.clusters.device: + msg = f"x.device {x.device} != cluster.device {self.clusters.device}" + raise ValueError(msg) + + assignment = th.matmul(x, self.clusters) # (BN x D) x (D x (K+G)) -> BN x (K+G) + assignment = self.batch_norm(assignment) + + assignment = F.softmax(assignment, dim=1) # BN x (K+G) -> BN x (K+G) + # remove ghost assigments + assignment = assignment[:, : self.cluster_size] + assignment = assignment.view(-1, max_sample, self.cluster_size) # -> B x N x K + a_sum = th.sum(assignment, dim=1, keepdim=True) # B x N x K -> B x 1 x K + a = a_sum * self.clusters2 + + assignment = assignment.transpose(1, 2) # B x N x K -> B x K x N + + x = x.view(-1, max_sample, self.feature_size) # BN x D -> B x N x D + vlad = th.matmul(assignment, x) # (B x K x N) x (B x N x D) -> B x K x D + vlad = vlad.transpose(1, 2) # -> B x D x K + vlad = vlad - a + + # L2 intra norm + vlad = F.normalize(vlad) + + # flattening + L2 norm + vlad = vlad.reshape(-1, self.cluster_size * self.feature_size) # -> B x DK + vlad = F.normalize(vlad) + return vlad # B x DK diff --git a/backends/mlir/cpu/KernelBench/level3/4_LeNet5.py b/backends/mlir/cpu/KernelBench/level3/4_LeNet5.py new file mode 100644 index 0000000..0125db0 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/4_LeNet5.py @@ -0,0 +1,56 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, num_classes): + """ + LeNet-5 architecture implementation in PyTorch. + + :param num_classes: The number of output classes. + """ + super(Model, self).__init__() + + # Convolutional layers + self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1) + self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1) + + # Fully connected layers + self.fc1 = nn.Linear(in_features=16 * 5 * 5, out_features=120) + self.fc2 = nn.Linear(in_features=120, out_features=84) + self.fc3 = nn.Linear(in_features=84, out_features=num_classes) + + def forward(self, x): + """ + Forward pass of the LeNet-5 model. + + :param x: The input tensor, shape (batch_size, 1, 32, 32) + :return: The output tensor, shape (batch_size, num_classes) + """ + # First convolutional layer with ReLU activation and max pooling + x = F.relu(self.conv1(x)) + x = F.max_pool2d(x, kernel_size=2, stride=2) + + # Second convolutional layer with ReLU activation and max pooling + x = F.relu(self.conv2(x)) + x = F.max_pool2d(x, kernel_size=2, stride=2) + + # Flatten the output for the fully connected layers + x = x.view(-1, 16 * 5 * 5) + + # First fully connected layer with ReLU activation + x = F.relu(self.fc1(x)) + + # Second fully connected layer with ReLU activation + x = F.relu(self.fc2(x)) + + # Final fully connected layer + x = self.fc3(x) + + return x diff --git a/backends/mlir/cpu/KernelBench/level3/50_ReLUSelfAttention.py b/backends/mlir/cpu/KernelBench/level3/50_ReLUSelfAttention.py new file mode 100644 index 0000000..4789b1b --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/50_ReLUSelfAttention.py @@ -0,0 +1,88 @@ +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import ai_bench.mlir + +# From https://github.com/karpathy/minGPT/blob/master/mingpt/model.py + + +class NewGELU(nn.Module): + """ + Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). + Reference: Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415 + """ + + def __init__(self): + super(NewGELU, self).__init__() + + def forward(self, x): + return ( + 0.5 + * x + * ( + 1.0 + + torch.tanh( + math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)) + ) + ) + ) + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + """ + A multi-head masked self-attention layer with a projection at the end that uses ReLU instead of Softmax. + It is possible to use torch.nn.MultiheadAttention here but I am including an + explicit implementation here to show that there is nothing too scary here. + """ + + def __init__(self, n_embd, n_head, max_seqlen): + super().__init__() + assert n_embd % n_head == 0 + # key, query, value projections for all heads, but in a batch + self.c_attn = nn.Linear(n_embd, 3 * n_embd) + # output projection + self.c_proj = nn.Linear(n_embd, n_embd) + # causal mask to ensure that attention is only applied to the left in the input sequence + self.register_buffer( + "bias", + torch.tril(torch.ones(max_seqlen, max_seqlen)).view( + 1, 1, max_seqlen, max_seqlen + ), + ) + self.n_head = n_head + self.n_embd = n_embd + + def forward(self, x): + B, T, C = ( + x.size() + ) # batch size, sequence length, embedding dimensionality (n_embd) + + # calculate query, key, values for all heads in batch and move head forward to be the batch dim + q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + k = k.view(B, T, self.n_head, C // self.n_head).transpose( + 1, 2 + ) # (B, nh, T, hs) + q = q.view(B, T, self.n_head, C // self.n_head).transpose( + 1, 2 + ) # (B, nh, T, hs) + v = v.view(B, T, self.n_head, C // self.n_head).transpose( + 1, 2 + ) # (B, nh, T, hs) + + # causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T) + att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1))) + att = att.masked_fill(self.bias[:, :, :T, :T] == 0, float("-inf")) + att = F.relu(att) + + y = att @ v # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs) + y = ( + y.transpose(1, 2).contiguous().view(B, T, C) + ) # re-assemble all head outputs side by side + + return y diff --git a/backends/mlir/cpu/KernelBench/level3/5_AlexNet.py b/backends/mlir/cpu/KernelBench/level3/5_AlexNet.py new file mode 100644 index 0000000..fe92f94 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/5_AlexNet.py @@ -0,0 +1,96 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, num_classes=1000): + """ + :param num_classes: The number of output classes (default is 1000 for ImageNet) + """ + super(Model, self).__init__() + + # First convolutional layer + self.conv1 = nn.Conv2d( + in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=2 + ) + self.relu1 = nn.ReLU(inplace=True) + self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2) + + # Second convolutional layer + self.conv2 = nn.Conv2d( + in_channels=96, out_channels=256, kernel_size=5, padding=2 + ) + self.relu2 = nn.ReLU(inplace=True) + self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2) + + # Third convolutional layer + self.conv3 = nn.Conv2d( + in_channels=256, out_channels=384, kernel_size=3, padding=1 + ) + self.relu3 = nn.ReLU(inplace=True) + + # Fourth convolutional layer + self.conv4 = nn.Conv2d( + in_channels=384, out_channels=384, kernel_size=3, padding=1 + ) + self.relu4 = nn.ReLU(inplace=True) + + # Fifth convolutional layer + self.conv5 = nn.Conv2d( + in_channels=384, out_channels=256, kernel_size=3, padding=1 + ) + self.relu5 = nn.ReLU(inplace=True) + self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2) + + # Fully connected layers + self.fc1 = nn.Linear(in_features=256 * 6 * 6, out_features=4096) + self.relu6 = nn.ReLU(inplace=True) + self.dropout1 = nn.Dropout(p=0.0) + + self.fc2 = nn.Linear(in_features=4096, out_features=4096) + self.relu7 = nn.ReLU(inplace=True) + self.dropout2 = nn.Dropout(p=0.0) + + self.fc3 = nn.Linear(in_features=4096, out_features=num_classes) + + def forward(self, x): + """ + :param x: The input tensor, shape (batch_size, 3, 224, 224) + :return: The output tensor, shape (batch_size, num_classes) + """ + x = self.conv1(x) + x = self.relu1(x) + x = self.maxpool1(x) + + x = self.conv2(x) + x = self.relu2(x) + x = self.maxpool2(x) + + x = self.conv3(x) + x = self.relu3(x) + + x = self.conv4(x) + x = self.relu4(x) + + x = self.conv5(x) + x = self.relu5(x) + x = self.maxpool3(x) + + x = torch.flatten(x, 1) + + x = self.fc1(x) + x = self.relu6(x) + x = self.dropout1(x) + + x = self.fc2(x) + x = self.relu7(x) + x = self.dropout2(x) + + x = self.fc3(x) + + return x diff --git a/backends/mlir/cpu/KernelBench/level3/6_GoogleNetInceptionModule.py b/backends/mlir/cpu/KernelBench/level3/6_GoogleNetInceptionModule.py new file mode 100644 index 0000000..8068586 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/6_GoogleNetInceptionModule.py @@ -0,0 +1,57 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__( + self, in_channels, out_1x1, reduce_3x3, out_3x3, reduce_5x5, out_5x5, pool_proj + ): + """ + :param in_channels: Number of input channels + :param out_1x1: Number of output channels for the 1x1 convolution + :param reduce_3x3: Number of output channels for the 1x1 reduction before 3x3 convolution + :param out_3x3: Number of output channels for the 3x3 convolution + :param reduce_5x5: Number of output channels for the 1x1 reduction before 5x5 convolution + :param out_5x5: Number of output channels for the 5x5 convolution + :param pool_proj: Number of output channels for the pooling projection + """ + super(Model, self).__init__() + + # 1x1 convolution branch + self.branch1x1 = nn.Conv2d(in_channels, out_1x1, kernel_size=1) + + # 3x3 convolution branch + self.branch3x3 = nn.Sequential( + nn.Conv2d(in_channels, reduce_3x3, kernel_size=1), + nn.Conv2d(reduce_3x3, out_3x3, kernel_size=3, padding=1), + ) + + # 5x5 convolution branch + self.branch5x5 = nn.Sequential( + nn.Conv2d(in_channels, reduce_5x5, kernel_size=1), + nn.Conv2d(reduce_5x5, out_5x5, kernel_size=5, padding=2), + ) + + # Max pooling branch + self.branch_pool = nn.Sequential( + nn.MaxPool2d(kernel_size=3, stride=1, padding=1), + nn.Conv2d(in_channels, pool_proj, kernel_size=1), + ) + + def forward(self, x): + """ + :param x: Input tensor, shape (batch_size, in_channels, height, width) + :return: Output tensor, shape (batch_size, out_channels, height, width) + """ + branch1x1 = self.branch1x1(x) + branch3x3 = self.branch3x3(x) + branch5x5 = self.branch5x5(x) + branch_pool = self.branch_pool(x) + + outputs = [branch1x1, branch3x3, branch5x5, branch_pool] + return torch.cat(outputs, 1) diff --git a/backends/mlir/cpu/KernelBench/level3/7_GoogleNetInceptionV1.py b/backends/mlir/cpu/KernelBench/level3/7_GoogleNetInceptionV1.py new file mode 100644 index 0000000..39f6e90 --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/7_GoogleNetInceptionV1.py @@ -0,0 +1,120 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +import ai_bench.mlir + + +class InceptionModule(nn.Module): + def __init__( + self, in_channels, out_1x1, reduce_3x3, out_3x3, reduce_5x5, out_5x5, pool_proj + ): + """ + :param in_channels: Number of input channels + :param out_1x1: Number of output channels for the 1x1 convolution + :param reduce_3x3: Number of output channels for the 1x1 reduction before 3x3 convolution + :param out_3x3: Number of output channels for the 3x3 convolution + :param reduce_5x5: Number of output channels for the 1x1 reduction before 5x5 convolution + :param out_5x5: Number of output channels for the 5x5 convolution + :param pool_proj: Number of output channels for the pooling projection + """ + super(InceptionModule, self).__init__() + + # 1x1 convolution branch + self.branch1x1 = nn.Conv2d(in_channels, out_1x1, kernel_size=1) + + # 3x3 convolution branch + self.branch3x3 = nn.Sequential( + nn.Conv2d(in_channels, reduce_3x3, kernel_size=1), + nn.Conv2d(reduce_3x3, out_3x3, kernel_size=3, padding=1), + ) + + # 5x5 convolution branch + self.branch5x5 = nn.Sequential( + nn.Conv2d(in_channels, reduce_5x5, kernel_size=1), + nn.Conv2d(reduce_5x5, out_5x5, kernel_size=5, padding=2), + ) + + # Max pooling branch + self.branch_pool = nn.Sequential( + nn.MaxPool2d(kernel_size=3, stride=1, padding=1), + nn.Conv2d(in_channels, pool_proj, kernel_size=1), + ) + + def forward(self, x): + """ + :param x: Input tensor, shape (batch_size, in_channels, height, width) + :return: Output tensor, shape (batch_size, out_channels, height, width) + """ + branch1x1 = self.branch1x1(x) + branch3x3 = self.branch3x3(x) + branch5x5 = self.branch5x5(x) + branch_pool = self.branch_pool(x) + + outputs = [branch1x1, branch3x3, branch5x5, branch_pool] + return torch.cat(outputs, 1) + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + def __init__(self, num_classes=1000): + """ + :param num_classes: Number of output classes + """ + super(Model, self).__init__() + + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3) + self.maxpool1 = nn.MaxPool2d(3, stride=2, padding=1) + self.conv2 = nn.Conv2d(64, 64, kernel_size=1) + self.conv3 = nn.Conv2d(64, 192, kernel_size=3, padding=1) + self.maxpool2 = nn.MaxPool2d(3, stride=2, padding=1) + + self.inception3a = InceptionModule(192, 64, 96, 128, 16, 32, 32) + self.inception3b = InceptionModule(256, 128, 128, 192, 32, 96, 64) + self.maxpool3 = nn.MaxPool2d(3, stride=2, padding=1) + + self.inception4a = InceptionModule(480, 192, 96, 208, 16, 48, 64) + self.inception4b = InceptionModule(512, 160, 112, 224, 24, 64, 64) + self.inception4c = InceptionModule(512, 128, 128, 256, 24, 64, 64) + self.inception4d = InceptionModule(512, 112, 144, 288, 32, 64, 64) + self.inception4e = InceptionModule(528, 256, 160, 320, 32, 128, 128) + self.maxpool4 = nn.MaxPool2d(3, stride=2, padding=1) + + self.inception5a = InceptionModule(832, 256, 160, 320, 32, 128, 128) + self.inception5b = InceptionModule(832, 384, 192, 384, 48, 128, 128) + + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.dropout = nn.Dropout(0.0) + self.fc = nn.Linear(1024, num_classes) + + def forward(self, x): + """ + :param x: Input tensor, shape (batch_size, 3, height, width) + :return: Output tensor, shape (batch_size, num_classes) + """ + x = self.maxpool1(F.relu(self.conv1(x))) + x = F.relu(self.conv2(x)) + x = self.maxpool2(F.relu(self.conv3(x))) + + x = self.inception3a(x) + x = self.inception3b(x) + x = self.maxpool3(x) + + x = self.inception4a(x) + x = self.inception4b(x) + x = self.inception4c(x) + x = self.inception4d(x) + x = self.inception4e(x) + x = self.maxpool4(x) + + x = self.inception5a(x) + x = self.inception5b(x) + + x = self.avgpool(x) + x = torch.flatten(x, 1) + x = self.dropout(x) + x = self.fc(x) + + return x diff --git a/backends/mlir/cpu/KernelBench/level3/8_ResNetBasicBlock.py b/backends/mlir/cpu/KernelBench/level3/8_ResNetBasicBlock.py new file mode 100644 index 0000000..521bcac --- /dev/null +++ b/backends/mlir/cpu/KernelBench/level3/8_ResNetBasicBlock.py @@ -0,0 +1,67 @@ +import torch +import torch.nn as nn + +import ai_bench.mlir + + +@torch.compile( + dynamic=False, backend=ai_bench.mlir.cpu_backend(ai_bench.mlir.cpu_pipeline) +) +class Model(nn.Module): + expansion = 1 + + def __init__(self, in_channels, out_channels, stride=1): + """ + :param in_channels: Number of input channels + :param out_channels: Number of output channels + :param stride: Stride for the first convolutional layer + :param downsample: Downsample layer for the shortcut connection + """ + super(Model, self).__init__() + self.conv1 = nn.Conv2d( + in_channels, + out_channels, + kernel_size=3, + stride=stride, + padding=1, + bias=False, + ) + self.bn1 = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU(inplace=True) + self.conv2 = nn.Conv2d( + out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False + ) + self.bn2 = nn.BatchNorm2d(out_channels) + self.downsample = nn.Sequential( + nn.Conv2d( + in_channels, + out_channels * self.expansion, + kernel_size=1, + stride=stride, + bias=False, + ), + nn.BatchNorm2d(out_channels * self.expansion), + ) + self.stride = stride + + def forward(self, x): + """ + :param x: Input tensor, shape (batch_size, in_channels, height, width) + :return: Output tensor, shape (batch_size, out_channels, height, width) + """ + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out