hpcaitech · MaruyamaAya · May 27, 2022 · May 27, 2022
diff --git a/energonai/context/config.py b/energonai/context/config.py
@@ -27,7 +27,8 @@
         'backend':"nccl",
         'rm_padding': False,
         'seed' : 1024,
-        'verbose' : True
+        'verbose' : True,
+        'trt_sample' : None
 }
 
 

diff --git a/energonai/engine/rpc_worker.py b/energonai/engine/rpc_worker.py
@@ -1,19 +1,14 @@
-import os
 import time
 import torch
-import inspect
-import torch.distributed.rpc as rpc
-import sys
 
 from colossalai.core import global_context as gpc
 from colossalai.context import ParallelMode
 from colossalai.logging import get_dist_logger
 
-from .rpc_utils import remote_cls_method, sync_cls_method, async_cls_method
 from .pipeline_wrapper import PipelineCommWrapper
 from .vit_pipeline_wrapper import ViTPipelineCommWrapper
 
-# from torch2trt import torch2trt
+from energonai.context import mcfg
 
 logger = get_dist_logger('energonai')
 
@@ -39,9 +34,12 @@ def top(self, key):
         return output
 
 
+
+
 class RPCWorker:
 
     def __init__(self, model_class, model_config, model_type, dtype, max_batch_size: int = 1) -> None:
+
         self.model_class = model_class
         self.model_config = model_config
         self.dtype = dtype
@@ -55,7 +53,7 @@ def __init__(self, model_class, model_config, model_type, dtype, max_batch_size:
 
         # self.trt_sample = None
         self._init_self()
-        self.return_dict = ReturnDict()
+        self.return_dict = ReturnDict()        
 
     def _init_self(self):
         logger.info("Init model in rank {}".format(self.rank))
@@ -67,10 +65,21 @@ def _init_self(self):
 
         self.model.eval()
 
-        # if trt_sample is not None and gpc.get_world_size(ParallelMode.MODEL) > 1:
-        #     logger.error("Tensor Parallelism does not support TensorRT convert")
-        # elif trt_sample is not None and gpc.get_world_size(ParallelMode.MODEL) == 1:
-        #     model = torch2trt(model, [self.trt_sample])
+        if mcfg['trt_sample'] is not None:
+            try:
+                logger.info('Import Torch2Trt')
+                from torch2trt import torch2trt 
+                from energonai.engine import trt_converter            
+            except:
+                logger.error("Installation Required, \n \
+                    follow https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html \
+                    and https://github.com/NVIDIA-AI-IOT/torch2trt")
+
+        if mcfg['trt_sample'] is not None and gpc.get_world_size(ParallelMode.MODEL) > 1:
+            logger.error("Tensor Parallelism does not support TensorRT convert")
+        elif mcfg['trt_sample'] is not None and gpc.get_world_size(ParallelMode.MODEL) == 1:
+            self.model = torch2trt(self.model, mcfg['trt_sample'])
+            logger.info("TensorRT convert complete.")
 
         try:        
             self.model = pipe_wrapper[self.model_type](model=self.model, max_batch_size=self.max_batch_size, dtype=self.dtype)

diff --git a/energonai/engine/trt_converter.py b/energonai/engine/trt_converter.py
@@ -0,0 +1,10 @@
+from torch2trt.torch2trt import *
+
+@tensorrt_converter('torch.matmul')
+def convert_mul(ctx):
+    input_a = ctx.method_args[0]
+    input_b = ctx.method_args[1]
+    input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b)
+    output = ctx.method_return
+    layer = ctx.network.add_matrix_multiply(input_a_trt, trt.MatrixOperation.NONE, input_b_trt, trt.MatrixOperation.NONE)
+    output._trt = layer.get_output(0)