blob: f4a141aafac6072d301dfc3c1904a8d22437648c [file] [log] [blame]
Minchan Kim6e12c5b2021-03-18 09:56:10 -07001// SPDX-License-Identifier: GPL-2.0
2#include <linux/debugfs.h>
3#include <linux/mm.h>
4#include <linux/slab.h>
5#include <linux/uaccess.h>
6#include <linux/memblock.h>
7#include <linux/stacktrace.h>
8#include <linux/page_pinner.h>
9#include <linux/jump_label.h>
10#include <linux/migrate.h>
11#include <linux/stackdepot.h>
12#include <linux/seq_file.h>
13#include <linux/sched/clock.h>
14
15#include "internal.h"
16
17#define PAGE_PINNER_STACK_DEPTH 16
Minchan Kim9a453102021-07-07 13:10:59 -070018#define LONGTERM_PIN_BUCKETS 4096
Minchan Kim6e12c5b2021-03-18 09:56:10 -070019
20struct page_pinner {
21 depot_stack_handle_t handle;
22 s64 ts_usec;
23 atomic_t count;
24};
25
26struct captured_pinner {
27 depot_stack_handle_t handle;
Minchan Kim0445b672021-07-07 13:46:58 -070028 union {
29 s64 ts_usec;
30 s64 elapsed;
31 };
Minchan Kim32549482021-07-11 12:43:11 -070032
33 /* struct page fields */
Minchan Kim6e12c5b2021-03-18 09:56:10 -070034 unsigned long pfn;
Minchan Kim32549482021-07-11 12:43:11 -070035 int count;
36 int mapcount;
37 struct address_space *mapping;
38 unsigned long flags;
Minchan Kim6e12c5b2021-03-18 09:56:10 -070039};
40
41struct longterm_pinner {
42 spinlock_t lock;
43 unsigned int index;
Minchan Kim9a453102021-07-07 13:10:59 -070044 struct captured_pinner pinner[LONGTERM_PIN_BUCKETS];
Minchan Kim6e12c5b2021-03-18 09:56:10 -070045};
46
47static struct longterm_pinner lt_pinner = {
48 .lock = __SPIN_LOCK_UNLOCKED(lt_pinner.lock),
49};
50
51static s64 threshold_usec = 300000;
52
Minchan Kimddc4a482021-03-29 16:48:47 -070053/* alloc_contig failed pinner */
54static struct longterm_pinner acf_pinner = {
55 .lock = __SPIN_LOCK_UNLOCKED(acf_pinner.lock),
56};
57
Minchan Kim6e12c5b2021-03-18 09:56:10 -070058static bool page_pinner_enabled;
59DEFINE_STATIC_KEY_FALSE(page_pinner_inited);
60
Minchan Kimddc4a482021-03-29 16:48:47 -070061DEFINE_STATIC_KEY_TRUE(failure_tracking);
Todd Kjos66e24eb2021-09-23 23:44:34 +000062EXPORT_SYMBOL(failure_tracking);
Minchan Kimddc4a482021-03-29 16:48:47 -070063
Minchan Kim6e12c5b2021-03-18 09:56:10 -070064static depot_stack_handle_t failure_handle;
65
66static int __init early_page_pinner_param(char *buf)
67{
68 page_pinner_enabled = true;
69 return 0;
70}
71early_param("page_pinner", early_page_pinner_param);
72
73static bool need_page_pinner(void)
74{
75 return page_pinner_enabled;
76}
77
78static noinline void register_failure_stack(void)
79{
80 unsigned long entries[4];
81 unsigned int nr_entries;
82
83 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
84 failure_handle = stack_depot_save(entries, nr_entries, GFP_KERNEL);
85}
86
87static void init_page_pinner(void)
88{
89 if (!page_pinner_enabled)
90 return;
91
92 register_failure_stack();
93 static_branch_enable(&page_pinner_inited);
94}
95
96struct page_ext_operations page_pinner_ops = {
97 .size = sizeof(struct page_pinner),
98 .need = need_page_pinner,
99 .init = init_page_pinner,
100};
101
102static inline struct page_pinner *get_page_pinner(struct page_ext *page_ext)
103{
104 return (void *)page_ext + page_pinner_ops.offset;
105}
106
107static noinline depot_stack_handle_t save_stack(gfp_t flags)
108{
109 unsigned long entries[PAGE_PINNER_STACK_DEPTH];
110 depot_stack_handle_t handle;
111 unsigned int nr_entries;
112
113 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
114 handle = stack_depot_save(entries, nr_entries, flags);
115 if (!handle)
116 handle = failure_handle;
117
118 return handle;
119}
120
Minchan Kim71da0672021-07-07 13:27:30 -0700121static void capture_page_state(struct page *page,
122 struct captured_pinner *record)
123{
Minchan Kim32549482021-07-11 12:43:11 -0700124 record->flags = page->flags;
125 record->mapping = page_mapping(page);
Minchan Kim71da0672021-07-07 13:27:30 -0700126 record->pfn = page_to_pfn(page);
Minchan Kim32549482021-07-11 12:43:11 -0700127 record->count = page_count(page);
128 record->mapcount = page_mapcount(page);
Minchan Kim71da0672021-07-07 13:27:30 -0700129}
130
Minchan Kim9a453102021-07-07 13:10:59 -0700131static void check_longterm_pin(struct page_pinner *page_pinner,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700132 struct page *page)
133{
134 s64 now, delta = 0;
135 unsigned long flags;
136 unsigned int idx;
Minchan Kim71da0672021-07-07 13:27:30 -0700137 struct captured_pinner record;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700138
139 now = ktime_to_us(ktime_get_boottime());
140
141 /* get/put_page can be raced. Ignore that case */
142 if (page_pinner->ts_usec < now)
143 delta = now - page_pinner->ts_usec;
144
145 if (delta <= threshold_usec)
146 return;
147
Minchan Kim71da0672021-07-07 13:27:30 -0700148 record.handle = page_pinner->handle;
Minchan Kim0445b672021-07-07 13:46:58 -0700149 record.elapsed = delta;
Minchan Kim71da0672021-07-07 13:27:30 -0700150 capture_page_state(page, &record);
151
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700152 spin_lock_irqsave(&lt_pinner.lock, flags);
153 idx = lt_pinner.index++;
Minchan Kim9a453102021-07-07 13:10:59 -0700154 lt_pinner.index %= LONGTERM_PIN_BUCKETS;
Minchan Kim71da0672021-07-07 13:27:30 -0700155 lt_pinner.pinner[idx] = record;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700156 spin_unlock_irqrestore(&lt_pinner.lock, flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700157}
158
159void __reset_page_pinner(struct page *page, unsigned int order, bool free)
160{
161 struct page_pinner *page_pinner;
162 struct page_ext *page_ext;
163 int i;
164
165 page_ext = lookup_page_ext(page);
166 if (unlikely(!page_ext))
167 return;
168
169 for (i = 0; i < (1 << order); i++) {
Minchan Kimd0127832021-07-08 11:12:59 -0700170 if (!test_bit(PAGE_EXT_GET, &page_ext->flags) &&
171 !test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED,
172 &page_ext->flags))
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700173 continue;
174
175 page_pinner = get_page_pinner(page_ext);
176 if (free) {
Minchan Kim13362ab2021-07-11 12:44:33 -0700177 /* record page free call path */
178 __page_pinner_migration_failed(page);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700179 atomic_set(&page_pinner->count, 0);
Minchan Kimddc4a482021-03-29 16:48:47 -0700180 __clear_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700181 } else {
Minchan Kim9a453102021-07-07 13:10:59 -0700182 check_longterm_pin(page_pinner, page);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700183 }
184 clear_bit(PAGE_EXT_GET, &page_ext->flags);
185 page_ext = page_ext_next(page_ext);
186 }
187}
188
189static inline void __set_page_pinner_handle(struct page *page,
190 struct page_ext *page_ext, depot_stack_handle_t handle,
191 unsigned int order)
192{
193 struct page_pinner *page_pinner;
194 int i;
195 s64 usec = ktime_to_us(ktime_get_boottime());
196
197 for (i = 0; i < (1 << order); i++) {
198 page_pinner = get_page_pinner(page_ext);
199 page_pinner->handle = handle;
200 page_pinner->ts_usec = usec;
201 set_bit(PAGE_EXT_GET, &page_ext->flags);
202 atomic_inc(&page_pinner->count);
203 page_ext = page_ext_next(page_ext);
204 }
205}
206
207noinline void __set_page_pinner(struct page *page, unsigned int order)
208{
209 struct page_ext *page_ext = lookup_page_ext(page);
210 depot_stack_handle_t handle;
211
212 if (unlikely(!page_ext))
213 return;
214
215 handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
216 __set_page_pinner_handle(page, page_ext, handle, order);
217}
218
219static ssize_t
Minchan Kim0445b672021-07-07 13:46:58 -0700220print_page_pinner(bool longterm, char __user *buf, size_t count, struct captured_pinner *record)
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700221{
222 int ret;
223 unsigned long *entries;
224 unsigned int nr_entries;
225 char *kbuf;
226
227 count = min_t(size_t, count, PAGE_SIZE);
228 kbuf = kmalloc(count, GFP_KERNEL);
229 if (!kbuf)
230 return -ENOMEM;
231
Minchan Kim0445b672021-07-07 13:46:58 -0700232 if (longterm) {
233 ret = snprintf(kbuf, count, "Page pinned for %lld us\n",
234 record->elapsed);
235 } else {
236 s64 ts_usec = record->ts_usec;
237 unsigned long rem_usec = do_div(ts_usec, 1000000);
238
239 ret = snprintf(kbuf, count,
240 "Page pinned ts [%5lu.%06lu]\n",
241 (unsigned long)ts_usec, rem_usec);
242 }
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700243
244 if (ret >= count)
245 goto err;
246
247 /* Print information relevant to grouping pages by mobility */
248 ret += snprintf(kbuf + ret, count - ret,
Minchan Kim32549482021-07-11 12:43:11 -0700249 "PFN 0x%lx Block %lu count %d mapcount %d mapping %pS Flags %#lx(%pGp)\n",
Minchan Kim71da0672021-07-07 13:27:30 -0700250 record->pfn,
251 record->pfn >> pageblock_order,
Minchan Kim32549482021-07-11 12:43:11 -0700252 record->count, record->mapcount,
253 record->mapping,
254 record->flags, &record->flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700255
256 if (ret >= count)
257 goto err;
258
Minchan Kim71da0672021-07-07 13:27:30 -0700259 nr_entries = stack_depot_fetch(record->handle, &entries);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700260 ret += stack_trace_snprint(kbuf + ret, count - ret, entries,
261 nr_entries, 0);
262 if (ret >= count)
263 goto err;
264
265 ret += snprintf(kbuf + ret, count - ret, "\n");
266 if (ret >= count)
267 goto err;
268
269 if (copy_to_user(buf, kbuf, ret))
270 ret = -EFAULT;
271
272 kfree(kbuf);
273 return ret;
274
275err:
276 kfree(kbuf);
277 return -ENOMEM;
278}
279
280void __dump_page_pinner(struct page *page)
281{
282 struct page_ext *page_ext = lookup_page_ext(page);
283 struct page_pinner *page_pinner;
284 depot_stack_handle_t handle;
285 unsigned long *entries;
286 unsigned int nr_entries;
287 int pageblock_mt;
288 unsigned long pfn;
289 int count;
Minchan Kim0445b672021-07-07 13:46:58 -0700290 unsigned long rem_usec;
291 s64 ts_usec;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700292
293 if (unlikely(!page_ext)) {
294 pr_alert("There is not page extension available.\n");
295 return;
296 }
297
298 page_pinner = get_page_pinner(page_ext);
299
300 count = atomic_read(&page_pinner->count);
301 if (!count) {
302 pr_alert("page_pinner info is not present (never set?)\n");
303 return;
304 }
305
306 pfn = page_to_pfn(page);
Minchan Kim0445b672021-07-07 13:46:58 -0700307 ts_usec = page_pinner->ts_usec;
308 rem_usec = do_div(ts_usec, 1000000);
309 pr_alert("page last pinned %5lu.%06lu] count %d\n",
310 (unsigned long)ts_usec, rem_usec, count);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700311
312 pageblock_mt = get_pageblock_migratetype(page);
313 pr_alert("PFN %lu Block %lu type %s Flags %#lx(%pGp)\n",
314 pfn,
315 pfn >> pageblock_order,
316 migratetype_names[pageblock_mt],
317 page->flags, &page->flags);
318
319 handle = READ_ONCE(page_pinner->handle);
320 if (!handle) {
321 pr_alert("page_pinner allocation stack trace missing\n");
322 } else {
323 nr_entries = stack_depot_fetch(handle, &entries);
324 stack_trace_print(entries, nr_entries, 0);
325 }
326}
327
Minchan Kimddc4a482021-03-29 16:48:47 -0700328void __page_pinner_migration_failed(struct page *page)
329{
330 struct page_ext *page_ext = lookup_page_ext(page);
331 struct page_pinner *page_pinner;
Minchan Kim71da0672021-07-07 13:27:30 -0700332 struct captured_pinner record;
Minchan Kimddc4a482021-03-29 16:48:47 -0700333 unsigned long flags;
334 unsigned int idx;
335
336 if (unlikely(!page_ext))
337 return;
338
339 page_pinner = get_page_pinner(page_ext);
340 if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags))
341 return;
342
Minchan Kim71da0672021-07-07 13:27:30 -0700343 record.handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
344 record.ts_usec = ktime_to_us(ktime_get_boottime());
345 capture_page_state(page, &record);
Minchan Kimddc4a482021-03-29 16:48:47 -0700346
347 spin_lock_irqsave(&acf_pinner.lock, flags);
348 idx = acf_pinner.index++;
Minchan Kim9a453102021-07-07 13:10:59 -0700349 acf_pinner.index %= LONGTERM_PIN_BUCKETS;
Minchan Kim71da0672021-07-07 13:27:30 -0700350 acf_pinner.pinner[idx] = record;
Minchan Kimddc4a482021-03-29 16:48:47 -0700351 spin_unlock_irqrestore(&acf_pinner.lock, flags);
352}
Todd Kjos66e24eb2021-09-23 23:44:34 +0000353EXPORT_SYMBOL(__page_pinner_migration_failed);
Minchan Kimddc4a482021-03-29 16:48:47 -0700354
355void __page_pinner_mark_migration_failed_pages(struct list_head *page_list)
356{
357 struct page *page;
358 struct page_ext *page_ext;
359
360 list_for_each_entry(page, page_list, lru) {
Minchan Kim3a71ca12021-05-21 11:43:54 -0700361 /* The page will be freed by putback_movable_pages soon */
362 if (page_count(page) == 1)
363 continue;
Minchan Kimddc4a482021-03-29 16:48:47 -0700364 page_ext = lookup_page_ext(page);
365 if (unlikely(!page_ext))
366 continue;
367 __set_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
Minchan Kim13362ab2021-07-11 12:44:33 -0700368 __page_pinner_migration_failed(page);
Minchan Kimddc4a482021-03-29 16:48:47 -0700369 }
370}
371
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700372static ssize_t
373read_longterm_page_pinner(struct file *file, char __user *buf, size_t count,
374 loff_t *ppos)
375{
376 loff_t i, idx;
377 struct captured_pinner record;
378 unsigned long flags;
379
380 if (!static_branch_unlikely(&page_pinner_inited))
381 return -EINVAL;
382
Minchan Kim9a453102021-07-07 13:10:59 -0700383 if (*ppos >= LONGTERM_PIN_BUCKETS)
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700384 return 0;
385
386 i = *ppos;
387 *ppos = i + 1;
388
389 /*
390 * reading the records in the reverse order with newest one
391 * being read first followed by older ones
392 */
Minchan Kim9a453102021-07-07 13:10:59 -0700393 idx = (lt_pinner.index - 1 - i + LONGTERM_PIN_BUCKETS) %
394 LONGTERM_PIN_BUCKETS;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700395 spin_lock_irqsave(&lt_pinner.lock, flags);
396 record = lt_pinner.pinner[idx];
397 spin_unlock_irqrestore(&lt_pinner.lock, flags);
398 if (!record.handle)
399 return 0;
400
Minchan Kim0445b672021-07-07 13:46:58 -0700401 return print_page_pinner(true, buf, count, &record);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700402}
403
404static const struct file_operations proc_longterm_pinner_operations = {
405 .read = read_longterm_page_pinner,
406};
407
Minchan Kimddc4a482021-03-29 16:48:47 -0700408static ssize_t read_alloc_contig_failed(struct file *file, char __user *buf,
409 size_t count, loff_t *ppos)
410{
411 loff_t i, idx;
412 struct captured_pinner record;
413 unsigned long flags;
414
415 if (!static_branch_unlikely(&failure_tracking))
416 return -EINVAL;
417
Minchan Kim9a453102021-07-07 13:10:59 -0700418 if (*ppos >= LONGTERM_PIN_BUCKETS)
Minchan Kimddc4a482021-03-29 16:48:47 -0700419 return 0;
420
421 i = *ppos;
422 *ppos = i + 1;
423
424 /*
425 * reading the records in the reverse order with newest one
426 * being read first followed by older ones
427 */
Minchan Kim9a453102021-07-07 13:10:59 -0700428 idx = (acf_pinner.index - 1 - i + LONGTERM_PIN_BUCKETS) %
429 LONGTERM_PIN_BUCKETS;
Minchan Kimddc4a482021-03-29 16:48:47 -0700430
431 spin_lock_irqsave(&acf_pinner.lock, flags);
432 record = acf_pinner.pinner[idx];
433 spin_unlock_irqrestore(&acf_pinner.lock, flags);
434 if (!record.handle)
435 return 0;
436
Minchan Kim0445b672021-07-07 13:46:58 -0700437 return print_page_pinner(false, buf, count, &record);
Minchan Kimddc4a482021-03-29 16:48:47 -0700438}
439
440static const struct file_operations proc_alloc_contig_failed_operations = {
441 .read = read_alloc_contig_failed,
442};
443
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700444static int pp_threshold_set(void *data, unsigned long long val)
445{
446 unsigned long flags;
447
448 threshold_usec = (s64)val;
449
450 spin_lock_irqsave(&lt_pinner.lock, flags);
451 memset(lt_pinner.pinner, 0,
Minchan Kim9a453102021-07-07 13:10:59 -0700452 sizeof(struct captured_pinner) * LONGTERM_PIN_BUCKETS);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700453 lt_pinner.index = 0;
454 spin_unlock_irqrestore(&lt_pinner.lock, flags);
455 return 0;
456}
457
458static int pp_threshold_get(void *data, unsigned long long *val)
459{
460 *val = (unsigned long long)threshold_usec;
461
462 return 0;
463}
464DEFINE_DEBUGFS_ATTRIBUTE(pp_threshold_fops, pp_threshold_get,
465 pp_threshold_set, "%lld\n");
466
Minchan Kimddc4a482021-03-29 16:48:47 -0700467static int failure_tracking_set(void *data, u64 val)
468{
469 bool on;
470
471 on = (bool)val;
472 if (on)
473 static_branch_enable(&failure_tracking);
474 else
475 static_branch_disable(&failure_tracking);
476 return 0;
477}
478
479static int failure_tracking_get(void *data, u64 *val)
480{
481 *val = static_branch_unlikely(&failure_tracking);
482 return 0;
483}
484DEFINE_DEBUGFS_ATTRIBUTE(failure_tracking_fops,
485 failure_tracking_get,
486 failure_tracking_set, "%llu\n");
487
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700488static int __init page_pinner_init(void)
489{
490 struct dentry *pp_debugfs_root;
491
492 if (!static_branch_unlikely(&page_pinner_inited))
493 return 0;
494
495 pr_info("page_pinner enabled\n");
Minchan Kimddc4a482021-03-29 16:48:47 -0700496
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700497 pp_debugfs_root = debugfs_create_dir("page_pinner", NULL);
498
Minchan Kim7d3618b2021-06-22 19:49:51 -0700499 debugfs_create_file("longterm_pinner", 0444, pp_debugfs_root, NULL,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700500 &proc_longterm_pinner_operations);
501
Minchan Kim7d3618b2021-06-22 19:49:51 -0700502 debugfs_create_file("threshold", 0644, pp_debugfs_root, NULL,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700503 &pp_threshold_fops);
Minchan Kimddc4a482021-03-29 16:48:47 -0700504
Minchan Kim7d3618b2021-06-22 19:49:51 -0700505 debugfs_create_file("alloc_contig_failed", 0444,
Minchan Kimddc4a482021-03-29 16:48:47 -0700506 pp_debugfs_root, NULL,
507 &proc_alloc_contig_failed_operations);
508
Minchan Kim7d3618b2021-06-22 19:49:51 -0700509 debugfs_create_file("failure_tracking", 0644,
Minchan Kimddc4a482021-03-29 16:48:47 -0700510 pp_debugfs_root, NULL,
511 &failure_tracking_fops);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700512 return 0;
513}
514late_initcall(page_pinner_init)