@@ -54,28 +54,33 @@ static struct trace_array *hwlat_trace;
54
54
#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
55
55
#define DEFAULT_LAT_THRESHOLD 10 /* 10us */
56
56
57
- /* sampling thread*/
58
- static struct task_struct * hwlat_kthread ;
59
-
60
57
static struct dentry * hwlat_sample_width ; /* sample width us */
61
58
static struct dentry * hwlat_sample_window ; /* sample window us */
62
59
static struct dentry * hwlat_thread_mode ; /* hwlat thread mode */
63
60
64
61
enum {
65
62
MODE_NONE = 0 ,
66
63
MODE_ROUND_ROBIN ,
64
+ MODE_PER_CPU ,
67
65
MODE_MAX
68
66
};
69
- static char * thread_mode_str [] = { "none" , "round-robin" };
67
+ static char * thread_mode_str [] = { "none" , "round-robin" , "per-cpu" };
70
68
71
69
/* Save the previous tracing_thresh value */
72
70
static unsigned long save_tracing_thresh ;
73
71
74
- /* NMI timestamp counters */
75
- static u64 nmi_ts_start ;
76
- static u64 nmi_total_ts ;
77
- static int nmi_count ;
78
- static int nmi_cpu ;
72
+ /* runtime kthread data */
73
+ struct hwlat_kthread_data {
74
+ struct task_struct * kthread ;
75
+ /* NMI timestamp counters */
76
+ u64 nmi_ts_start ;
77
+ u64 nmi_total_ts ;
78
+ int nmi_count ;
79
+ int nmi_cpu ;
80
+ };
81
+
82
+ struct hwlat_kthread_data hwlat_single_cpu_data ;
83
+ DEFINE_PER_CPU (struct hwlat_kthread_data , hwlat_per_cpu_data );
79
84
80
85
/* Tells NMIs to call back to the hwlat tracer to record timestamps */
81
86
bool trace_hwlat_callback_enabled ;
@@ -112,6 +117,14 @@ static struct hwlat_data {
112
117
.thread_mode = MODE_ROUND_ROBIN
113
118
};
114
119
120
+ static struct hwlat_kthread_data * get_cpu_data (void )
121
+ {
122
+ if (hwlat_data .thread_mode == MODE_PER_CPU )
123
+ return this_cpu_ptr (& hwlat_per_cpu_data );
124
+ else
125
+ return & hwlat_single_cpu_data ;
126
+ }
127
+
115
128
static bool hwlat_busy ;
116
129
117
130
static void trace_hwlat_sample (struct hwlat_sample * sample )
@@ -149,7 +162,9 @@ static void trace_hwlat_sample(struct hwlat_sample *sample)
149
162
150
163
void trace_hwlat_callback (bool enter )
151
164
{
152
- if (smp_processor_id () != nmi_cpu )
165
+ struct hwlat_kthread_data * kdata = get_cpu_data ();
166
+
167
+ if (!kdata -> kthread )
153
168
return ;
154
169
155
170
/*
@@ -158,13 +173,13 @@ void trace_hwlat_callback(bool enter)
158
173
*/
159
174
if (!IS_ENABLED (CONFIG_GENERIC_SCHED_CLOCK )) {
160
175
if (enter )
161
- nmi_ts_start = time_get ();
176
+ kdata -> nmi_ts_start = time_get ();
162
177
else
163
- nmi_total_ts += time_get () - nmi_ts_start ;
178
+ kdata -> nmi_total_ts += time_get () - kdata -> nmi_ts_start ;
164
179
}
165
180
166
181
if (enter )
167
- nmi_count ++ ;
182
+ kdata -> nmi_count ++ ;
168
183
}
169
184
170
185
/**
@@ -176,6 +191,7 @@ void trace_hwlat_callback(bool enter)
176
191
*/
177
192
static int get_sample (void )
178
193
{
194
+ struct hwlat_kthread_data * kdata = get_cpu_data ();
179
195
struct trace_array * tr = hwlat_trace ;
180
196
struct hwlat_sample s ;
181
197
time_type start , t1 , t2 , last_t2 ;
@@ -188,9 +204,8 @@ static int get_sample(void)
188
204
189
205
do_div (thresh , NSEC_PER_USEC ); /* modifies interval value */
190
206
191
- nmi_cpu = smp_processor_id ();
192
- nmi_total_ts = 0 ;
193
- nmi_count = 0 ;
207
+ kdata -> nmi_total_ts = 0 ;
208
+ kdata -> nmi_count = 0 ;
194
209
/* Make sure NMIs see this first */
195
210
barrier ();
196
211
@@ -260,15 +275,15 @@ static int get_sample(void)
260
275
ret = 1 ;
261
276
262
277
/* We read in microseconds */
263
- if (nmi_total_ts )
264
- do_div (nmi_total_ts , NSEC_PER_USEC );
278
+ if (kdata -> nmi_total_ts )
279
+ do_div (kdata -> nmi_total_ts , NSEC_PER_USEC );
265
280
266
281
hwlat_data .count ++ ;
267
282
s .seqnum = hwlat_data .count ;
268
283
s .duration = sample ;
269
284
s .outer_duration = outer_sample ;
270
- s .nmi_total_ts = nmi_total_ts ;
271
- s .nmi_count = nmi_count ;
285
+ s .nmi_total_ts = kdata -> nmi_total_ts ;
286
+ s .nmi_count = kdata -> nmi_count ;
272
287
s .count = count ;
273
288
trace_hwlat_sample (& s );
274
289
@@ -364,21 +379,40 @@ static int kthread_fn(void *data)
364
379
}
365
380
366
381
/*
367
- * start_kthread - Kick off the hardware latency sampling/detector kthread
382
+ * stop_stop_kthread - Inform the hardware latency sampling/detector kthread to stop
383
+ *
384
+ * This kicks the running hardware latency sampling/detector kernel thread and
385
+ * tells it to stop sampling now. Use this on unload and at system shutdown.
386
+ */
387
+ static void stop_single_kthread (void )
388
+ {
389
+ struct hwlat_kthread_data * kdata = get_cpu_data ();
390
+ struct task_struct * kthread = kdata -> kthread ;
391
+
392
+ if (!kthread )
393
+ return ;
394
+
395
+ kthread_stop (kthread );
396
+ kdata -> kthread = NULL ;
397
+ }
398
+
399
+
400
+ /*
401
+ * start_single_kthread - Kick off the hardware latency sampling/detector kthread
368
402
*
369
403
* This starts the kernel thread that will sit and sample the CPU timestamp
370
404
* counter (TSC or similar) and look for potential hardware latencies.
371
405
*/
372
- static int start_kthread (struct trace_array * tr )
406
+ static int start_single_kthread (struct trace_array * tr )
373
407
{
408
+ struct hwlat_kthread_data * kdata = get_cpu_data ();
374
409
struct cpumask * current_mask = & save_cpumask ;
375
410
struct task_struct * kthread ;
376
411
int next_cpu ;
377
412
378
- if (hwlat_kthread )
413
+ if (kdata -> kthread )
379
414
return 0 ;
380
415
381
-
382
416
kthread = kthread_create (kthread_fn , NULL , "hwlatd" );
383
417
if (IS_ERR (kthread )) {
384
418
pr_err (BANNER "could not start sampling thread\n" );
@@ -400,24 +434,97 @@ static int start_kthread(struct trace_array *tr)
400
434
401
435
sched_setaffinity (kthread -> pid , current_mask );
402
436
403
- hwlat_kthread = kthread ;
437
+ kdata -> kthread = kthread ;
404
438
wake_up_process (kthread );
405
439
406
440
return 0 ;
407
441
}
408
442
409
443
/*
410
- * stop_kthread - Inform the hardware latency sampling/detector kthread to stop
444
+ * stop_cpu_kthread - Stop a hwlat cpu kthread
445
+ */
446
+ static void stop_cpu_kthread (unsigned int cpu )
447
+ {
448
+ struct task_struct * kthread ;
449
+
450
+ kthread = per_cpu (hwlat_per_cpu_data , cpu ).kthread ;
451
+ if (kthread )
452
+ kthread_stop (kthread );
453
+ }
454
+
455
+ /*
456
+ * stop_per_cpu_kthreads - Inform the hardware latency sampling/detector kthread to stop
411
457
*
412
- * This kicks the running hardware latency sampling/detector kernel thread and
458
+ * This kicks the running hardware latency sampling/detector kernel threads and
413
459
* tells it to stop sampling now. Use this on unload and at system shutdown.
414
460
*/
415
- static void stop_kthread (void )
461
+ static void stop_per_cpu_kthreads (void )
416
462
{
417
- if (!hwlat_kthread )
418
- return ;
419
- kthread_stop (hwlat_kthread );
420
- hwlat_kthread = NULL ;
463
+ unsigned int cpu ;
464
+
465
+ get_online_cpus ();
466
+ for_each_online_cpu (cpu )
467
+ stop_cpu_kthread (cpu );
468
+ put_online_cpus ();
469
+ }
470
+
471
+ /*
472
+ * start_cpu_kthread - Start a hwlat cpu kthread
473
+ */
474
+ static int start_cpu_kthread (unsigned int cpu )
475
+ {
476
+ struct task_struct * kthread ;
477
+ char comm [24 ];
478
+
479
+ snprintf (comm , 24 , "hwlatd/%d" , cpu );
480
+
481
+ kthread = kthread_create_on_cpu (kthread_fn , NULL , cpu , comm );
482
+ if (IS_ERR (kthread )) {
483
+ pr_err (BANNER "could not start sampling thread\n" );
484
+ return - ENOMEM ;
485
+ }
486
+
487
+ per_cpu (hwlat_per_cpu_data , cpu ).kthread = kthread ;
488
+ wake_up_process (kthread );
489
+
490
+ return 0 ;
491
+ }
492
+
493
+ /*
494
+ * start_per_cpu_kthreads - Kick off the hardware latency sampling/detector kthreads
495
+ *
496
+ * This starts the kernel threads that will sit on potentially all cpus and
497
+ * sample the CPU timestamp counter (TSC or similar) and look for potential
498
+ * hardware latencies.
499
+ */
500
+ static int start_per_cpu_kthreads (struct trace_array * tr )
501
+ {
502
+ struct cpumask * current_mask = & save_cpumask ;
503
+ unsigned int cpu ;
504
+ int retval ;
505
+
506
+ get_online_cpus ();
507
+ /*
508
+ * Run only on CPUs in which hwlat is allowed to run.
509
+ */
510
+ cpumask_and (current_mask , cpu_online_mask , tr -> tracing_cpumask );
511
+
512
+ for_each_online_cpu (cpu )
513
+ per_cpu (hwlat_per_cpu_data , cpu ).kthread = NULL ;
514
+
515
+ for_each_cpu (cpu , current_mask ) {
516
+ retval = start_cpu_kthread (cpu );
517
+ if (retval )
518
+ goto out_error ;
519
+ }
520
+ put_online_cpus ();
521
+
522
+ return 0 ;
523
+
524
+ out_error :
525
+ put_online_cpus ();
526
+ stop_per_cpu_kthreads ();
527
+ return retval ;
421
528
}
422
529
423
530
/*
@@ -600,7 +707,8 @@ static void hwlat_tracer_stop(struct trace_array *tr);
600
707
* The "none" sets the allowed cpumask for a single hwlatd thread at the
601
708
* startup and lets the scheduler handle the migration. The default mode is
602
709
* the "round-robin" one, in which a single hwlatd thread runs, migrating
603
- * among the allowed CPUs in a round-robin fashion.
710
+ * among the allowed CPUs in a round-robin fashion. The "per-cpu" mode
711
+ * creates one hwlatd thread per allowed CPU.
604
712
*/
605
713
static ssize_t hwlat_mode_write (struct file * filp , const char __user * ubuf ,
606
714
size_t cnt , loff_t * ppos )
@@ -724,14 +832,20 @@ static void hwlat_tracer_start(struct trace_array *tr)
724
832
{
725
833
int err ;
726
834
727
- err = start_kthread (tr );
835
+ if (hwlat_data .thread_mode == MODE_PER_CPU )
836
+ err = start_per_cpu_kthreads (tr );
837
+ else
838
+ err = start_single_kthread (tr );
728
839
if (err )
729
840
pr_err (BANNER "Cannot start hwlat kthread\n" );
730
841
}
731
842
732
843
static void hwlat_tracer_stop (struct trace_array * tr )
733
844
{
734
- stop_kthread ();
845
+ if (hwlat_data .thread_mode == MODE_PER_CPU )
846
+ stop_per_cpu_kthreads ();
847
+ else
848
+ stop_single_kthread ();
735
849
}
736
850
737
851
static int hwlat_tracer_init (struct trace_array * tr )
@@ -760,7 +874,7 @@ static int hwlat_tracer_init(struct trace_array *tr)
760
874
761
875
static void hwlat_tracer_reset (struct trace_array * tr )
762
876
{
763
- stop_kthread ( );
877
+ hwlat_tracer_stop ( tr );
764
878
765
879
/* the tracing threshold is static between runs */
766
880
last_tracing_thresh = tracing_thresh ;
0 commit comments