-
-
Notifications
You must be signed in to change notification settings - Fork 598
Expand file tree
/
Copy pathrun_profile.py
More file actions
110 lines (90 loc) · 3.25 KB
/
run_profile.py
File metadata and controls
110 lines (90 loc) · 3.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""Copyright(c) 2023 lyuwenyu. All Rights Reserved.
"""
import math
import os
import sys
import torch
import torch.nn as nn
from torch import Tensor
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
from typing import Any, Dict, List, Optional
from src.core import YAMLConfig, yaml_utils
__all__ = ["profile_stats"]
def _auto_scale_flops(flops: float):
"""Copied from torch.profiler.profile"""
flop_headers = [
"",
"K",
"M",
"G",
"T",
"P",
]
assert flops > 0
log_flops = max(0, min(math.log10(flops) / 3, float(len(flop_headers) - 1)))
assert log_flops >= 0 and log_flops < len(flop_headers)
return (pow(10, (math.floor(log_flops) * -3.0)), flop_headers[int(log_flops)])
def profile_stats(
model: nn.Module,
data: Optional[Tensor]=None,
shape: List[int]=[1, 3, 640, 640],
verbose: bool=False
) -> Dict[str, Any]:
is_training = model.training
model.train()
num_params = sum([p.numel() for p in model.parameters() if p.requires_grad])
model.eval()
if data is None:
dtype = next(model.parameters()).dtype
device = next(model.parameters()).device
data = torch.rand(*shape, dtype=dtype, device=device)
print(device)
def trace_handler(prof):
print(prof.key_averages().table(sort_by='self_cuda_time_total', row_limit=-1))
wait = 0
warmup = 1
active = 1
repeat = 1
skip_first = 0
with torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
torch.profiler.ProfilerActivity.CUDA,
],
schedule=torch.profiler.schedule(
wait=wait,
warmup=warmup,
active=active,
repeat=repeat,
skip_first=skip_first,
),
with_flops=True,
) as p:
n_step = skip_first + (wait + warmup + active) * repeat
for _ in range(n_step):
_ = model(data)
p.step()
if is_training:
model.train()
statistics = p.key_averages()
info = statistics.table(sort_by='self_cuda_time_total', row_limit=-1)
num_flops = sum(event.flops for event in statistics if event.flops > 0) / active
(flops_scale, flops_header) = _auto_scale_flops(num_flops)
if verbose:
print(info)
print(f'Total number of trainable parameters: {num_params}')
print(f'Total number of flops: {num_flops * flops_scale:.3f}{flops_header} with {shape}')
return {'n_parameters': num_params, 'n_flops': num_flops, 'info': info}
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--config', type=str, required=True)
parser.add_argument('-d', '--device', type=str, default='cuda:0', help='device',)
parser.add_argument('-u', '--update', nargs='+', help='Update yaml config from command line.')
args = parser.parse_args()
update_dict = yaml_utils.parse_cli(args.update) if args.update else {}
update_dict.update({k: v for k, v in args.__dict__.items() \
if k not in ['update', ] and v is not None})
cfg = YAMLConfig(args.config, **update_dict)
model = cfg.model.to(args.device)
profile_stats(model, verbose=True)