ROCm
diff --git a/‎fbgemm_gpu/fbgemm_gpu/sll/__init__.py
Lines changed: 5 additions & 5 deletions b/‎fbgemm_gpu/fbgemm_gpu/sll/__init__.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎fbgemm_gpu/fbgemm_gpu/sll/triton/__init__.py
Lines changed: 21 additions & 0 deletions b/‎fbgemm_gpu/fbgemm_gpu/sll/triton/__init__.py
Lines changed: 21 additions & 0 deletions
diff --git a/‎fbgemm_gpu/fbgemm_gpu/sll/triton/common.py
Lines changed: 16 additions & 0 deletions b/‎fbgemm_gpu/fbgemm_gpu/sll/triton/common.py
Lines changed: 16 additions & 0 deletions
@@ -46,7 +46,6 @@
     jagged_jagged_bmm,
     jagged_jagged_bmm_jagged_out,
     jagged_softmax,
-    multi_head_jagged_flash_attention,
     triton_jagged_self_substraction_jagged_out,
 )
 
@@ -326,15 +325,16 @@
         "CUDA": jagged_dense_flash_attention,
         "AutogradCUDA": jagged_dense_flash_attention,
     },
-    "sll_multi_head_jagged_flash_attention": {
-        "CUDA": multi_head_jagged_flash_attention,
-        "AutogradCUDA": multi_head_jagged_flash_attention,
-    },
 }
 
 for op_name, dispatches in sll_cpu_registrations.items():
     lib.register(op_name, dispatches)
 
 if torch.cuda.is_available():
+    from fbgemm_gpu.sll.triton import op_registrations
+
+    for op_name, dispatches in op_registrations.items():
+        lib.register(op_name, dispatches)
+
     for op_name, dispatches in sll_gpu_registrations.items():
         lib.register(op_name, dispatches)
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+
+from fbgemm_gpu.sll.triton.multi_head_jagged_flash_attention import (  # noqa F401
+    multi_head_jagged_flash_attention,
+    MultiHeadJaggedFlashAttention,
+)
+
+op_registrations = {
+    "sll_multi_head_jagged_flash_attention": {
+        "CUDA": multi_head_jagged_flash_attention,
+        "AutogradCUDA": multi_head_jagged_flash_attention,
+    },
+}
@@ -0,0 +1,16 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import torch
+
+
+def expect_contiguous(x: torch.Tensor) -> torch.Tensor:
+    if not x.is_contiguous():
+        return x.contiguous()
+    else:
+        return x