Skip to content

Commit f46b165

Browse files
Daniel Bristot de Oliveirarostedt
authored andcommitted
trace/hwlat: Implement the per-cpu mode
Implements the per-cpu mode in which a sampling thread is created for each cpu in the "cpus" (and tracing_mask). The per-cpu mode has the potention to speed up the hwlat detection by running on multiple CPUs at the same time, at the cost of higher cpu usage with irqs disabled. Use with care. [ Changed get_cpu_data() to static. Reported-by: kernel test robot <[email protected]> ] Link: https://lkml.kernel.org/r/ec06d0ab340e8460d293772faba19ad8a5c371aa.1624372313.git.bristot@redhat.com Cc: Phil Auld <[email protected]> Cc: Sebastian Andrzej Siewior <[email protected]> Cc: Kate Carcia <[email protected]> Cc: Jonathan Corbet <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Alexandre Chartre <[email protected]> Cc: Clark Willaims <[email protected]> Cc: John Kacur <[email protected]> Cc: Juri Lelli <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: "H. Peter Anvin" <[email protected]> Cc: [email protected] Cc: [email protected] Cc: [email protected] Signed-off-by: Daniel Bristot de Oliveira <[email protected]> Signed-off-by: Steven Rostedt (VMware) <[email protected]>
1 parent 7bb7d80 commit f46b165

File tree

2 files changed

+152
-37
lines changed

2 files changed

+152
-37
lines changed

Documentation/trace/hwlat_detector.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,11 @@ in /sys/kernel/tracing:
7878
- hwlat_detector/window - amount of time between (width) runs (usecs)
7979
- hwlat_detector/mode - the thread mode
8080

81-
By default, the hwlat detector's kernel thread will migrate across each CPU
81+
By default, one hwlat detector's kernel thread will migrate across each CPU
8282
specified in cpumask at the beginning of a new window, in a round-robin
8383
fashion. This behavior can be changed by changing the thread mode,
8484
the available options are:
8585

8686
- none: do not force migration
8787
- round-robin: migrate across each CPU specified in cpumask [default]
88+
- per-cpu: create one thread for each cpu in tracing_cpumask

kernel/trace/trace_hwlat.c

Lines changed: 150 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -54,28 +54,33 @@ static struct trace_array *hwlat_trace;
5454
#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
5555
#define DEFAULT_LAT_THRESHOLD 10 /* 10us */
5656

57-
/* sampling thread*/
58-
static struct task_struct *hwlat_kthread;
59-
6057
static struct dentry *hwlat_sample_width; /* sample width us */
6158
static struct dentry *hwlat_sample_window; /* sample window us */
6259
static struct dentry *hwlat_thread_mode; /* hwlat thread mode */
6360

6461
enum {
6562
MODE_NONE = 0,
6663
MODE_ROUND_ROBIN,
64+
MODE_PER_CPU,
6765
MODE_MAX
6866
};
69-
static char *thread_mode_str[] = { "none", "round-robin" };
67+
static char *thread_mode_str[] = { "none", "round-robin", "per-cpu" };
7068

7169
/* Save the previous tracing_thresh value */
7270
static unsigned long save_tracing_thresh;
7371

74-
/* NMI timestamp counters */
75-
static u64 nmi_ts_start;
76-
static u64 nmi_total_ts;
77-
static int nmi_count;
78-
static int nmi_cpu;
72+
/* runtime kthread data */
73+
struct hwlat_kthread_data {
74+
struct task_struct *kthread;
75+
/* NMI timestamp counters */
76+
u64 nmi_ts_start;
77+
u64 nmi_total_ts;
78+
int nmi_count;
79+
int nmi_cpu;
80+
};
81+
82+
struct hwlat_kthread_data hwlat_single_cpu_data;
83+
DEFINE_PER_CPU(struct hwlat_kthread_data, hwlat_per_cpu_data);
7984

8085
/* Tells NMIs to call back to the hwlat tracer to record timestamps */
8186
bool trace_hwlat_callback_enabled;
@@ -112,6 +117,14 @@ static struct hwlat_data {
112117
.thread_mode = MODE_ROUND_ROBIN
113118
};
114119

120+
static struct hwlat_kthread_data *get_cpu_data(void)
121+
{
122+
if (hwlat_data.thread_mode == MODE_PER_CPU)
123+
return this_cpu_ptr(&hwlat_per_cpu_data);
124+
else
125+
return &hwlat_single_cpu_data;
126+
}
127+
115128
static bool hwlat_busy;
116129

117130
static void trace_hwlat_sample(struct hwlat_sample *sample)
@@ -149,7 +162,9 @@ static void trace_hwlat_sample(struct hwlat_sample *sample)
149162

150163
void trace_hwlat_callback(bool enter)
151164
{
152-
if (smp_processor_id() != nmi_cpu)
165+
struct hwlat_kthread_data *kdata = get_cpu_data();
166+
167+
if (!kdata->kthread)
153168
return;
154169

155170
/*
@@ -158,13 +173,13 @@ void trace_hwlat_callback(bool enter)
158173
*/
159174
if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
160175
if (enter)
161-
nmi_ts_start = time_get();
176+
kdata->nmi_ts_start = time_get();
162177
else
163-
nmi_total_ts += time_get() - nmi_ts_start;
178+
kdata->nmi_total_ts += time_get() - kdata->nmi_ts_start;
164179
}
165180

166181
if (enter)
167-
nmi_count++;
182+
kdata->nmi_count++;
168183
}
169184

170185
/**
@@ -176,6 +191,7 @@ void trace_hwlat_callback(bool enter)
176191
*/
177192
static int get_sample(void)
178193
{
194+
struct hwlat_kthread_data *kdata = get_cpu_data();
179195
struct trace_array *tr = hwlat_trace;
180196
struct hwlat_sample s;
181197
time_type start, t1, t2, last_t2;
@@ -188,9 +204,8 @@ static int get_sample(void)
188204

189205
do_div(thresh, NSEC_PER_USEC); /* modifies interval value */
190206

191-
nmi_cpu = smp_processor_id();
192-
nmi_total_ts = 0;
193-
nmi_count = 0;
207+
kdata->nmi_total_ts = 0;
208+
kdata->nmi_count = 0;
194209
/* Make sure NMIs see this first */
195210
barrier();
196211

@@ -260,15 +275,15 @@ static int get_sample(void)
260275
ret = 1;
261276

262277
/* We read in microseconds */
263-
if (nmi_total_ts)
264-
do_div(nmi_total_ts, NSEC_PER_USEC);
278+
if (kdata->nmi_total_ts)
279+
do_div(kdata->nmi_total_ts, NSEC_PER_USEC);
265280

266281
hwlat_data.count++;
267282
s.seqnum = hwlat_data.count;
268283
s.duration = sample;
269284
s.outer_duration = outer_sample;
270-
s.nmi_total_ts = nmi_total_ts;
271-
s.nmi_count = nmi_count;
285+
s.nmi_total_ts = kdata->nmi_total_ts;
286+
s.nmi_count = kdata->nmi_count;
272287
s.count = count;
273288
trace_hwlat_sample(&s);
274289

@@ -364,21 +379,40 @@ static int kthread_fn(void *data)
364379
}
365380

366381
/*
367-
* start_kthread - Kick off the hardware latency sampling/detector kthread
382+
* stop_stop_kthread - Inform the hardware latency sampling/detector kthread to stop
383+
*
384+
* This kicks the running hardware latency sampling/detector kernel thread and
385+
* tells it to stop sampling now. Use this on unload and at system shutdown.
386+
*/
387+
static void stop_single_kthread(void)
388+
{
389+
struct hwlat_kthread_data *kdata = get_cpu_data();
390+
struct task_struct *kthread = kdata->kthread;
391+
392+
if (!kthread)
393+
return;
394+
395+
kthread_stop(kthread);
396+
kdata->kthread = NULL;
397+
}
398+
399+
400+
/*
401+
* start_single_kthread - Kick off the hardware latency sampling/detector kthread
368402
*
369403
* This starts the kernel thread that will sit and sample the CPU timestamp
370404
* counter (TSC or similar) and look for potential hardware latencies.
371405
*/
372-
static int start_kthread(struct trace_array *tr)
406+
static int start_single_kthread(struct trace_array *tr)
373407
{
408+
struct hwlat_kthread_data *kdata = get_cpu_data();
374409
struct cpumask *current_mask = &save_cpumask;
375410
struct task_struct *kthread;
376411
int next_cpu;
377412

378-
if (hwlat_kthread)
413+
if (kdata->kthread)
379414
return 0;
380415

381-
382416
kthread = kthread_create(kthread_fn, NULL, "hwlatd");
383417
if (IS_ERR(kthread)) {
384418
pr_err(BANNER "could not start sampling thread\n");
@@ -400,24 +434,97 @@ static int start_kthread(struct trace_array *tr)
400434

401435
sched_setaffinity(kthread->pid, current_mask);
402436

403-
hwlat_kthread = kthread;
437+
kdata->kthread = kthread;
404438
wake_up_process(kthread);
405439

406440
return 0;
407441
}
408442

409443
/*
410-
* stop_kthread - Inform the hardware latency sampling/detector kthread to stop
444+
* stop_cpu_kthread - Stop a hwlat cpu kthread
445+
*/
446+
static void stop_cpu_kthread(unsigned int cpu)
447+
{
448+
struct task_struct *kthread;
449+
450+
kthread = per_cpu(hwlat_per_cpu_data, cpu).kthread;
451+
if (kthread)
452+
kthread_stop(kthread);
453+
}
454+
455+
/*
456+
* stop_per_cpu_kthreads - Inform the hardware latency sampling/detector kthread to stop
411457
*
412-
* This kicks the running hardware latency sampling/detector kernel thread and
458+
* This kicks the running hardware latency sampling/detector kernel threads and
413459
* tells it to stop sampling now. Use this on unload and at system shutdown.
414460
*/
415-
static void stop_kthread(void)
461+
static void stop_per_cpu_kthreads(void)
416462
{
417-
if (!hwlat_kthread)
418-
return;
419-
kthread_stop(hwlat_kthread);
420-
hwlat_kthread = NULL;
463+
unsigned int cpu;
464+
465+
get_online_cpus();
466+
for_each_online_cpu(cpu)
467+
stop_cpu_kthread(cpu);
468+
put_online_cpus();
469+
}
470+
471+
/*
472+
* start_cpu_kthread - Start a hwlat cpu kthread
473+
*/
474+
static int start_cpu_kthread(unsigned int cpu)
475+
{
476+
struct task_struct *kthread;
477+
char comm[24];
478+
479+
snprintf(comm, 24, "hwlatd/%d", cpu);
480+
481+
kthread = kthread_create_on_cpu(kthread_fn, NULL, cpu, comm);
482+
if (IS_ERR(kthread)) {
483+
pr_err(BANNER "could not start sampling thread\n");
484+
return -ENOMEM;
485+
}
486+
487+
per_cpu(hwlat_per_cpu_data, cpu).kthread = kthread;
488+
wake_up_process(kthread);
489+
490+
return 0;
491+
}
492+
493+
/*
494+
* start_per_cpu_kthreads - Kick off the hardware latency sampling/detector kthreads
495+
*
496+
* This starts the kernel threads that will sit on potentially all cpus and
497+
* sample the CPU timestamp counter (TSC or similar) and look for potential
498+
* hardware latencies.
499+
*/
500+
static int start_per_cpu_kthreads(struct trace_array *tr)
501+
{
502+
struct cpumask *current_mask = &save_cpumask;
503+
unsigned int cpu;
504+
int retval;
505+
506+
get_online_cpus();
507+
/*
508+
* Run only on CPUs in which hwlat is allowed to run.
509+
*/
510+
cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask);
511+
512+
for_each_online_cpu(cpu)
513+
per_cpu(hwlat_per_cpu_data, cpu).kthread = NULL;
514+
515+
for_each_cpu(cpu, current_mask) {
516+
retval = start_cpu_kthread(cpu);
517+
if (retval)
518+
goto out_error;
519+
}
520+
put_online_cpus();
521+
522+
return 0;
523+
524+
out_error:
525+
put_online_cpus();
526+
stop_per_cpu_kthreads();
527+
return retval;
421528
}
422529

423530
/*
@@ -600,7 +707,8 @@ static void hwlat_tracer_stop(struct trace_array *tr);
600707
* The "none" sets the allowed cpumask for a single hwlatd thread at the
601708
* startup and lets the scheduler handle the migration. The default mode is
602709
* the "round-robin" one, in which a single hwlatd thread runs, migrating
603-
* among the allowed CPUs in a round-robin fashion.
710+
* among the allowed CPUs in a round-robin fashion. The "per-cpu" mode
711+
* creates one hwlatd thread per allowed CPU.
604712
*/
605713
static ssize_t hwlat_mode_write(struct file *filp, const char __user *ubuf,
606714
size_t cnt, loff_t *ppos)
@@ -724,14 +832,20 @@ static void hwlat_tracer_start(struct trace_array *tr)
724832
{
725833
int err;
726834

727-
err = start_kthread(tr);
835+
if (hwlat_data.thread_mode == MODE_PER_CPU)
836+
err = start_per_cpu_kthreads(tr);
837+
else
838+
err = start_single_kthread(tr);
728839
if (err)
729840
pr_err(BANNER "Cannot start hwlat kthread\n");
730841
}
731842

732843
static void hwlat_tracer_stop(struct trace_array *tr)
733844
{
734-
stop_kthread();
845+
if (hwlat_data.thread_mode == MODE_PER_CPU)
846+
stop_per_cpu_kthreads();
847+
else
848+
stop_single_kthread();
735849
}
736850

737851
static int hwlat_tracer_init(struct trace_array *tr)
@@ -760,7 +874,7 @@ static int hwlat_tracer_init(struct trace_array *tr)
760874

761875
static void hwlat_tracer_reset(struct trace_array *tr)
762876
{
763-
stop_kthread();
877+
hwlat_tracer_stop(tr);
764878

765879
/* the tracing threshold is static between runs */
766880
last_tracing_thresh = tracing_thresh;

0 commit comments

Comments
 (0)