// For now, only half_t is supported. TF32 is WIP.
// Instantiate the function template for different HEADDIMS.
// For now, only half_t is supported. TF32 is WIP.
if (kHeadSize == 64) {
testFmhaForward<cutlass::half_t, 64>(seqLength, seqLength, numHeads,
batchSize, iterations, refCheck,
printValues, nStreams);