blob: 9bf41de47e9a0dcb20bb2877dd66d71798f3d9a6 [file] [log] [blame]
Minchan Kim6e12c5b2021-03-18 09:56:10 -07001// SPDX-License-Identifier: GPL-2.0
2#include <linux/debugfs.h>
3#include <linux/mm.h>
4#include <linux/slab.h>
5#include <linux/uaccess.h>
6#include <linux/memblock.h>
7#include <linux/stacktrace.h>
8#include <linux/page_pinner.h>
9#include <linux/jump_label.h>
10#include <linux/migrate.h>
11#include <linux/stackdepot.h>
12#include <linux/seq_file.h>
13#include <linux/sched/clock.h>
14
15#include "internal.h"
16
17#define PAGE_PINNER_STACK_DEPTH 16
18#define LONTERM_PIN_BUCKETS 4096
19
20struct page_pinner {
21 depot_stack_handle_t handle;
22 s64 ts_usec;
23 atomic_t count;
24};
25
26struct captured_pinner {
27 depot_stack_handle_t handle;
28 s64 ts_usec;
29 int page_mt;
30 unsigned long page_flags;
31 unsigned long pfn;
32};
33
34struct longterm_pinner {
35 spinlock_t lock;
36 unsigned int index;
37 struct captured_pinner pinner[LONTERM_PIN_BUCKETS];
38};
39
40static struct longterm_pinner lt_pinner = {
41 .lock = __SPIN_LOCK_UNLOCKED(lt_pinner.lock),
42};
43
44static s64 threshold_usec = 300000;
45
Minchan Kimddc4a482021-03-29 16:48:47 -070046/* alloc_contig failed pinner */
47static struct longterm_pinner acf_pinner = {
48 .lock = __SPIN_LOCK_UNLOCKED(acf_pinner.lock),
49};
50
Minchan Kim6e12c5b2021-03-18 09:56:10 -070051static bool page_pinner_enabled;
52DEFINE_STATIC_KEY_FALSE(page_pinner_inited);
53
Minchan Kimddc4a482021-03-29 16:48:47 -070054DEFINE_STATIC_KEY_TRUE(failure_tracking);
55EXPORT_SYMBOL(failure_tracking);
56
Minchan Kim6e12c5b2021-03-18 09:56:10 -070057static depot_stack_handle_t failure_handle;
58
59static int __init early_page_pinner_param(char *buf)
60{
61 page_pinner_enabled = true;
62 return 0;
63}
64early_param("page_pinner", early_page_pinner_param);
65
66static bool need_page_pinner(void)
67{
68 return page_pinner_enabled;
69}
70
71static noinline void register_failure_stack(void)
72{
73 unsigned long entries[4];
74 unsigned int nr_entries;
75
76 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
77 failure_handle = stack_depot_save(entries, nr_entries, GFP_KERNEL);
78}
79
80static void init_page_pinner(void)
81{
82 if (!page_pinner_enabled)
83 return;
84
85 register_failure_stack();
86 static_branch_enable(&page_pinner_inited);
87}
88
89struct page_ext_operations page_pinner_ops = {
90 .size = sizeof(struct page_pinner),
91 .need = need_page_pinner,
92 .init = init_page_pinner,
93};
94
95static inline struct page_pinner *get_page_pinner(struct page_ext *page_ext)
96{
97 return (void *)page_ext + page_pinner_ops.offset;
98}
99
100static noinline depot_stack_handle_t save_stack(gfp_t flags)
101{
102 unsigned long entries[PAGE_PINNER_STACK_DEPTH];
103 depot_stack_handle_t handle;
104 unsigned int nr_entries;
105
106 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
107 handle = stack_depot_save(entries, nr_entries, flags);
108 if (!handle)
109 handle = failure_handle;
110
111 return handle;
112}
113
114static void check_lonterm_pin(struct page_pinner *page_pinner,
115 struct page *page)
116{
117 s64 now, delta = 0;
118 unsigned long flags;
119 unsigned int idx;
120
121 now = ktime_to_us(ktime_get_boottime());
122
123 /* get/put_page can be raced. Ignore that case */
124 if (page_pinner->ts_usec < now)
125 delta = now - page_pinner->ts_usec;
126
127 if (delta <= threshold_usec)
128 return;
129
130 spin_lock_irqsave(&lt_pinner.lock, flags);
131 idx = lt_pinner.index++;
132 lt_pinner.index %= LONTERM_PIN_BUCKETS;
133
134 lt_pinner.pinner[idx].handle = page_pinner->handle;
135 lt_pinner.pinner[idx].ts_usec = delta;
136 lt_pinner.pinner[idx].page_flags = page->flags;
137 lt_pinner.pinner[idx].page_mt = get_pageblock_migratetype(page);
138 lt_pinner.pinner[idx].pfn = page_to_pfn(page);
139 spin_unlock_irqrestore(&lt_pinner.lock, flags);
140
141}
142
143void __reset_page_pinner(struct page *page, unsigned int order, bool free)
144{
145 struct page_pinner *page_pinner;
146 struct page_ext *page_ext;
147 int i;
148
149 page_ext = lookup_page_ext(page);
150 if (unlikely(!page_ext))
151 return;
152
153 for (i = 0; i < (1 << order); i++) {
154 if (!test_bit(PAGE_EXT_GET, &page_ext->flags))
155 continue;
156
157 page_pinner = get_page_pinner(page_ext);
158 if (free) {
159 WARN_ON_ONCE(atomic_read(&page_pinner->count));
160 atomic_set(&page_pinner->count, 0);
Minchan Kimddc4a482021-03-29 16:48:47 -0700161 __clear_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700162 } else {
163 WARN_ON_ONCE(atomic_dec_if_positive(
164 &page_pinner->count) < 0);
165 check_lonterm_pin(page_pinner, page);
166 }
167 clear_bit(PAGE_EXT_GET, &page_ext->flags);
168 page_ext = page_ext_next(page_ext);
169 }
170}
171
172static inline void __set_page_pinner_handle(struct page *page,
173 struct page_ext *page_ext, depot_stack_handle_t handle,
174 unsigned int order)
175{
176 struct page_pinner *page_pinner;
177 int i;
178 s64 usec = ktime_to_us(ktime_get_boottime());
179
180 for (i = 0; i < (1 << order); i++) {
181 page_pinner = get_page_pinner(page_ext);
182 page_pinner->handle = handle;
183 page_pinner->ts_usec = usec;
184 set_bit(PAGE_EXT_GET, &page_ext->flags);
185 atomic_inc(&page_pinner->count);
186 page_ext = page_ext_next(page_ext);
187 }
188}
189
190noinline void __set_page_pinner(struct page *page, unsigned int order)
191{
192 struct page_ext *page_ext = lookup_page_ext(page);
193 depot_stack_handle_t handle;
194
195 if (unlikely(!page_ext))
196 return;
197
198 handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
199 __set_page_pinner_handle(page, page_ext, handle, order);
200}
201
202static ssize_t
203print_page_pinner(char __user *buf, size_t count, unsigned long pfn,
204 int pageblock_mt, unsigned long page_flags, s64 ts_usec,
205 depot_stack_handle_t handle, int shared_count)
206{
207 int ret;
208 unsigned long *entries;
209 unsigned int nr_entries;
210 char *kbuf;
211
212 count = min_t(size_t, count, PAGE_SIZE);
213 kbuf = kmalloc(count, GFP_KERNEL);
214 if (!kbuf)
215 return -ENOMEM;
216
217 ret = snprintf(kbuf, count,
218 "Page pinned ts %lld us count %d\n",
219 ts_usec, shared_count);
220
221 if (ret >= count)
222 goto err;
223
224 /* Print information relevant to grouping pages by mobility */
225 ret += snprintf(kbuf + ret, count - ret,
226 "PFN %lu Block %lu type %s Flags %#lx(%pGp)\n",
227 pfn,
228 pfn >> pageblock_order,
229 migratetype_names[pageblock_mt],
230 page_flags, &page_flags);
231
232 if (ret >= count)
233 goto err;
234
235 nr_entries = stack_depot_fetch(handle, &entries);
236 ret += stack_trace_snprint(kbuf + ret, count - ret, entries,
237 nr_entries, 0);
238 if (ret >= count)
239 goto err;
240
241 ret += snprintf(kbuf + ret, count - ret, "\n");
242 if (ret >= count)
243 goto err;
244
245 if (copy_to_user(buf, kbuf, ret))
246 ret = -EFAULT;
247
248 kfree(kbuf);
249 return ret;
250
251err:
252 kfree(kbuf);
253 return -ENOMEM;
254}
255
256void __dump_page_pinner(struct page *page)
257{
258 struct page_ext *page_ext = lookup_page_ext(page);
259 struct page_pinner *page_pinner;
260 depot_stack_handle_t handle;
261 unsigned long *entries;
262 unsigned int nr_entries;
263 int pageblock_mt;
264 unsigned long pfn;
265 int count;
266
267 if (unlikely(!page_ext)) {
268 pr_alert("There is not page extension available.\n");
269 return;
270 }
271
272 page_pinner = get_page_pinner(page_ext);
273
274 count = atomic_read(&page_pinner->count);
275 if (!count) {
276 pr_alert("page_pinner info is not present (never set?)\n");
277 return;
278 }
279
280 pfn = page_to_pfn(page);
281 pr_alert("page last pinned ts %lld count %d\n",
282 page_pinner->ts_usec,
283 count);
284
285 pageblock_mt = get_pageblock_migratetype(page);
286 pr_alert("PFN %lu Block %lu type %s Flags %#lx(%pGp)\n",
287 pfn,
288 pfn >> pageblock_order,
289 migratetype_names[pageblock_mt],
290 page->flags, &page->flags);
291
292 handle = READ_ONCE(page_pinner->handle);
293 if (!handle) {
294 pr_alert("page_pinner allocation stack trace missing\n");
295 } else {
296 nr_entries = stack_depot_fetch(handle, &entries);
297 stack_trace_print(entries, nr_entries, 0);
298 }
299}
300
Minchan Kimddc4a482021-03-29 16:48:47 -0700301void __page_pinner_migration_failed(struct page *page)
302{
303 struct page_ext *page_ext = lookup_page_ext(page);
304 struct page_pinner *page_pinner;
305 depot_stack_handle_t handle;
306 unsigned long flags;
307 unsigned int idx;
308
309 if (unlikely(!page_ext))
310 return;
311
312 page_pinner = get_page_pinner(page_ext);
313 if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags))
314 return;
315
316 handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
317
318 spin_lock_irqsave(&acf_pinner.lock, flags);
319 idx = acf_pinner.index++;
320 acf_pinner.index %= LONTERM_PIN_BUCKETS;
321
322 acf_pinner.pinner[idx].handle = handle;
323 acf_pinner.pinner[idx].ts_usec = ktime_to_us(ktime_get_boottime());
324 acf_pinner.pinner[idx].page_flags = page->flags;
325 acf_pinner.pinner[idx].page_mt = get_pageblock_migratetype(page);
326 acf_pinner.pinner[idx].pfn = page_to_pfn(page);
327 spin_unlock_irqrestore(&acf_pinner.lock, flags);
328}
329EXPORT_SYMBOL(__page_pinner_migration_failed);
330
331void __page_pinner_mark_migration_failed_pages(struct list_head *page_list)
332{
333 struct page *page;
334 struct page_ext *page_ext;
335
336 list_for_each_entry(page, page_list, lru) {
337 page_ext = lookup_page_ext(page);
338 if (unlikely(!page_ext))
339 continue;
340 __set_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
341 }
342}
343
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700344static ssize_t
345read_longterm_page_pinner(struct file *file, char __user *buf, size_t count,
346 loff_t *ppos)
347{
348 loff_t i, idx;
349 struct captured_pinner record;
350 unsigned long flags;
351
352 if (!static_branch_unlikely(&page_pinner_inited))
353 return -EINVAL;
354
355 if (*ppos >= LONTERM_PIN_BUCKETS)
356 return 0;
357
358 i = *ppos;
359 *ppos = i + 1;
360
361 /*
362 * reading the records in the reverse order with newest one
363 * being read first followed by older ones
364 */
365 idx = (lt_pinner.index - 1 - i + LONTERM_PIN_BUCKETS) %
366 LONTERM_PIN_BUCKETS;
367 spin_lock_irqsave(&lt_pinner.lock, flags);
368 record = lt_pinner.pinner[idx];
369 spin_unlock_irqrestore(&lt_pinner.lock, flags);
370 if (!record.handle)
371 return 0;
372
373 return print_page_pinner(buf, count, record.pfn, record.page_mt,
374 record.page_flags, record.ts_usec,
375 record.handle, 0);
376}
377
378static const struct file_operations proc_longterm_pinner_operations = {
379 .read = read_longterm_page_pinner,
380};
381
Minchan Kimddc4a482021-03-29 16:48:47 -0700382static ssize_t read_alloc_contig_failed(struct file *file, char __user *buf,
383 size_t count, loff_t *ppos)
384{
385 loff_t i, idx;
386 struct captured_pinner record;
387 unsigned long flags;
388
389 if (!static_branch_unlikely(&failure_tracking))
390 return -EINVAL;
391
392 if (*ppos >= LONTERM_PIN_BUCKETS)
393 return 0;
394
395 i = *ppos;
396 *ppos = i + 1;
397
398 /*
399 * reading the records in the reverse order with newest one
400 * being read first followed by older ones
401 */
402 idx = (acf_pinner.index - 1 - i + LONTERM_PIN_BUCKETS) %
403 LONTERM_PIN_BUCKETS;
404
405 spin_lock_irqsave(&acf_pinner.lock, flags);
406 record = acf_pinner.pinner[idx];
407 spin_unlock_irqrestore(&acf_pinner.lock, flags);
408 if (!record.handle)
409 return 0;
410
411 return print_page_pinner(buf, count, record.pfn, record.page_mt,
412 record.page_flags, record.ts_usec,
413 record.handle, 0);
414}
415
416static const struct file_operations proc_alloc_contig_failed_operations = {
417 .read = read_alloc_contig_failed,
418};
419
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700420static int pp_threshold_set(void *data, unsigned long long val)
421{
422 unsigned long flags;
423
424 threshold_usec = (s64)val;
425
426 spin_lock_irqsave(&lt_pinner.lock, flags);
427 memset(lt_pinner.pinner, 0,
428 sizeof(struct captured_pinner) * LONTERM_PIN_BUCKETS);
429 lt_pinner.index = 0;
430 spin_unlock_irqrestore(&lt_pinner.lock, flags);
431 return 0;
432}
433
434static int pp_threshold_get(void *data, unsigned long long *val)
435{
436 *val = (unsigned long long)threshold_usec;
437
438 return 0;
439}
440DEFINE_DEBUGFS_ATTRIBUTE(pp_threshold_fops, pp_threshold_get,
441 pp_threshold_set, "%lld\n");
442
Minchan Kimddc4a482021-03-29 16:48:47 -0700443static int failure_tracking_set(void *data, u64 val)
444{
445 bool on;
446
447 on = (bool)val;
448 if (on)
449 static_branch_enable(&failure_tracking);
450 else
451 static_branch_disable(&failure_tracking);
452 return 0;
453}
454
455static int failure_tracking_get(void *data, u64 *val)
456{
457 *val = static_branch_unlikely(&failure_tracking);
458 return 0;
459}
460DEFINE_DEBUGFS_ATTRIBUTE(failure_tracking_fops,
461 failure_tracking_get,
462 failure_tracking_set, "%llu\n");
463
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700464static int __init page_pinner_init(void)
465{
466 struct dentry *pp_debugfs_root;
467
468 if (!static_branch_unlikely(&page_pinner_inited))
469 return 0;
470
471 pr_info("page_pinner enabled\n");
Minchan Kimddc4a482021-03-29 16:48:47 -0700472
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700473 pp_debugfs_root = debugfs_create_dir("page_pinner", NULL);
474
475 debugfs_create_file("longterm_pinner", 0400, pp_debugfs_root, NULL,
476 &proc_longterm_pinner_operations);
477
478 debugfs_create_file("threshold", 0444, pp_debugfs_root, NULL,
479 &pp_threshold_fops);
Minchan Kimddc4a482021-03-29 16:48:47 -0700480
481 debugfs_create_file("alloc_contig_failed", 0400,
482 pp_debugfs_root, NULL,
483 &proc_alloc_contig_failed_operations);
484
485 debugfs_create_file("failure_tracking", 0444,
486 pp_debugfs_root, NULL,
487 &failure_tracking_fops);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700488 return 0;
489}
490late_initcall(page_pinner_init)