blob: 0245881aadeb733387b9b8b1f7f4b1c6cc52dc62 [file] [log] [blame]
Minchan Kim6e12c5b2021-03-18 09:56:10 -07001// SPDX-License-Identifier: GPL-2.0
2#include <linux/debugfs.h>
3#include <linux/mm.h>
4#include <linux/slab.h>
5#include <linux/uaccess.h>
6#include <linux/memblock.h>
7#include <linux/stacktrace.h>
8#include <linux/page_pinner.h>
9#include <linux/jump_label.h>
10#include <linux/migrate.h>
11#include <linux/stackdepot.h>
12#include <linux/seq_file.h>
13#include <linux/sched/clock.h>
14
15#include "internal.h"
16
17#define PAGE_PINNER_STACK_DEPTH 16
Minchan Kim9a453102021-07-07 13:10:59 -070018#define LONGTERM_PIN_BUCKETS 4096
Minchan Kim6e12c5b2021-03-18 09:56:10 -070019
20struct page_pinner {
21 depot_stack_handle_t handle;
22 s64 ts_usec;
23 atomic_t count;
24};
25
26struct captured_pinner {
27 depot_stack_handle_t handle;
Minchan Kim0445b672021-07-07 13:46:58 -070028 union {
29 s64 ts_usec;
30 s64 elapsed;
31 };
Minchan Kim6e12c5b2021-03-18 09:56:10 -070032 int page_mt;
33 unsigned long page_flags;
34 unsigned long pfn;
35};
36
37struct longterm_pinner {
38 spinlock_t lock;
39 unsigned int index;
Minchan Kim9a453102021-07-07 13:10:59 -070040 struct captured_pinner pinner[LONGTERM_PIN_BUCKETS];
Minchan Kim6e12c5b2021-03-18 09:56:10 -070041};
42
43static struct longterm_pinner lt_pinner = {
44 .lock = __SPIN_LOCK_UNLOCKED(lt_pinner.lock),
45};
46
47static s64 threshold_usec = 300000;
48
Minchan Kimddc4a482021-03-29 16:48:47 -070049/* alloc_contig failed pinner */
50static struct longterm_pinner acf_pinner = {
51 .lock = __SPIN_LOCK_UNLOCKED(acf_pinner.lock),
52};
53
Minchan Kim6e12c5b2021-03-18 09:56:10 -070054static bool page_pinner_enabled;
55DEFINE_STATIC_KEY_FALSE(page_pinner_inited);
56
Minchan Kimddc4a482021-03-29 16:48:47 -070057DEFINE_STATIC_KEY_TRUE(failure_tracking);
58EXPORT_SYMBOL(failure_tracking);
59
Minchan Kim6e12c5b2021-03-18 09:56:10 -070060static depot_stack_handle_t failure_handle;
61
62static int __init early_page_pinner_param(char *buf)
63{
64 page_pinner_enabled = true;
65 return 0;
66}
67early_param("page_pinner", early_page_pinner_param);
68
69static bool need_page_pinner(void)
70{
71 return page_pinner_enabled;
72}
73
74static noinline void register_failure_stack(void)
75{
76 unsigned long entries[4];
77 unsigned int nr_entries;
78
79 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
80 failure_handle = stack_depot_save(entries, nr_entries, GFP_KERNEL);
81}
82
83static void init_page_pinner(void)
84{
85 if (!page_pinner_enabled)
86 return;
87
88 register_failure_stack();
89 static_branch_enable(&page_pinner_inited);
90}
91
92struct page_ext_operations page_pinner_ops = {
93 .size = sizeof(struct page_pinner),
94 .need = need_page_pinner,
95 .init = init_page_pinner,
96};
97
98static inline struct page_pinner *get_page_pinner(struct page_ext *page_ext)
99{
100 return (void *)page_ext + page_pinner_ops.offset;
101}
102
103static noinline depot_stack_handle_t save_stack(gfp_t flags)
104{
105 unsigned long entries[PAGE_PINNER_STACK_DEPTH];
106 depot_stack_handle_t handle;
107 unsigned int nr_entries;
108
109 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
110 handle = stack_depot_save(entries, nr_entries, flags);
111 if (!handle)
112 handle = failure_handle;
113
114 return handle;
115}
116
Minchan Kim71da0672021-07-07 13:27:30 -0700117static void capture_page_state(struct page *page,
118 struct captured_pinner *record)
119{
120 record->page_flags = page->flags;
121 record->page_mt = get_pageblock_migratetype(page);
122 record->pfn = page_to_pfn(page);
123}
124
Minchan Kim9a453102021-07-07 13:10:59 -0700125static void check_longterm_pin(struct page_pinner *page_pinner,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700126 struct page *page)
127{
128 s64 now, delta = 0;
129 unsigned long flags;
130 unsigned int idx;
Minchan Kim71da0672021-07-07 13:27:30 -0700131 struct captured_pinner record;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700132
133 now = ktime_to_us(ktime_get_boottime());
134
135 /* get/put_page can be raced. Ignore that case */
136 if (page_pinner->ts_usec < now)
137 delta = now - page_pinner->ts_usec;
138
139 if (delta <= threshold_usec)
140 return;
141
Minchan Kim71da0672021-07-07 13:27:30 -0700142 record.handle = page_pinner->handle;
Minchan Kim0445b672021-07-07 13:46:58 -0700143 record.elapsed = delta;
Minchan Kim71da0672021-07-07 13:27:30 -0700144 capture_page_state(page, &record);
145
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700146 spin_lock_irqsave(&lt_pinner.lock, flags);
147 idx = lt_pinner.index++;
Minchan Kim9a453102021-07-07 13:10:59 -0700148 lt_pinner.index %= LONGTERM_PIN_BUCKETS;
Minchan Kim71da0672021-07-07 13:27:30 -0700149 lt_pinner.pinner[idx] = record;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700150 spin_unlock_irqrestore(&lt_pinner.lock, flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700151}
152
153void __reset_page_pinner(struct page *page, unsigned int order, bool free)
154{
155 struct page_pinner *page_pinner;
156 struct page_ext *page_ext;
157 int i;
158
159 page_ext = lookup_page_ext(page);
160 if (unlikely(!page_ext))
161 return;
162
163 for (i = 0; i < (1 << order); i++) {
Minchan Kimd0127832021-07-08 11:12:59 -0700164 if (!test_bit(PAGE_EXT_GET, &page_ext->flags) &&
165 !test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED,
166 &page_ext->flags))
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700167 continue;
168
169 page_pinner = get_page_pinner(page_ext);
170 if (free) {
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700171 atomic_set(&page_pinner->count, 0);
Minchan Kimddc4a482021-03-29 16:48:47 -0700172 __clear_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700173 } else {
Minchan Kim9a453102021-07-07 13:10:59 -0700174 check_longterm_pin(page_pinner, page);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700175 }
176 clear_bit(PAGE_EXT_GET, &page_ext->flags);
177 page_ext = page_ext_next(page_ext);
178 }
179}
180
181static inline void __set_page_pinner_handle(struct page *page,
182 struct page_ext *page_ext, depot_stack_handle_t handle,
183 unsigned int order)
184{
185 struct page_pinner *page_pinner;
186 int i;
187 s64 usec = ktime_to_us(ktime_get_boottime());
188
189 for (i = 0; i < (1 << order); i++) {
190 page_pinner = get_page_pinner(page_ext);
191 page_pinner->handle = handle;
192 page_pinner->ts_usec = usec;
193 set_bit(PAGE_EXT_GET, &page_ext->flags);
194 atomic_inc(&page_pinner->count);
195 page_ext = page_ext_next(page_ext);
196 }
197}
198
199noinline void __set_page_pinner(struct page *page, unsigned int order)
200{
201 struct page_ext *page_ext = lookup_page_ext(page);
202 depot_stack_handle_t handle;
203
204 if (unlikely(!page_ext))
205 return;
206
207 handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
208 __set_page_pinner_handle(page, page_ext, handle, order);
209}
210
211static ssize_t
Minchan Kim0445b672021-07-07 13:46:58 -0700212print_page_pinner(bool longterm, char __user *buf, size_t count, struct captured_pinner *record)
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700213{
214 int ret;
215 unsigned long *entries;
216 unsigned int nr_entries;
217 char *kbuf;
218
219 count = min_t(size_t, count, PAGE_SIZE);
220 kbuf = kmalloc(count, GFP_KERNEL);
221 if (!kbuf)
222 return -ENOMEM;
223
Minchan Kim0445b672021-07-07 13:46:58 -0700224 if (longterm) {
225 ret = snprintf(kbuf, count, "Page pinned for %lld us\n",
226 record->elapsed);
227 } else {
228 s64 ts_usec = record->ts_usec;
229 unsigned long rem_usec = do_div(ts_usec, 1000000);
230
231 ret = snprintf(kbuf, count,
232 "Page pinned ts [%5lu.%06lu]\n",
233 (unsigned long)ts_usec, rem_usec);
234 }
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700235
236 if (ret >= count)
237 goto err;
238
239 /* Print information relevant to grouping pages by mobility */
240 ret += snprintf(kbuf + ret, count - ret,
241 "PFN %lu Block %lu type %s Flags %#lx(%pGp)\n",
Minchan Kim71da0672021-07-07 13:27:30 -0700242 record->pfn,
243 record->pfn >> pageblock_order,
244 migratetype_names[record->page_mt],
245 record->page_flags, &record->page_flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700246
247 if (ret >= count)
248 goto err;
249
Minchan Kim71da0672021-07-07 13:27:30 -0700250 nr_entries = stack_depot_fetch(record->handle, &entries);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700251 ret += stack_trace_snprint(kbuf + ret, count - ret, entries,
252 nr_entries, 0);
253 if (ret >= count)
254 goto err;
255
256 ret += snprintf(kbuf + ret, count - ret, "\n");
257 if (ret >= count)
258 goto err;
259
260 if (copy_to_user(buf, kbuf, ret))
261 ret = -EFAULT;
262
263 kfree(kbuf);
264 return ret;
265
266err:
267 kfree(kbuf);
268 return -ENOMEM;
269}
270
271void __dump_page_pinner(struct page *page)
272{
273 struct page_ext *page_ext = lookup_page_ext(page);
274 struct page_pinner *page_pinner;
275 depot_stack_handle_t handle;
276 unsigned long *entries;
277 unsigned int nr_entries;
278 int pageblock_mt;
279 unsigned long pfn;
280 int count;
Minchan Kim0445b672021-07-07 13:46:58 -0700281 unsigned long rem_usec;
282 s64 ts_usec;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700283
284 if (unlikely(!page_ext)) {
285 pr_alert("There is not page extension available.\n");
286 return;
287 }
288
289 page_pinner = get_page_pinner(page_ext);
290
291 count = atomic_read(&page_pinner->count);
292 if (!count) {
293 pr_alert("page_pinner info is not present (never set?)\n");
294 return;
295 }
296
297 pfn = page_to_pfn(page);
Minchan Kim0445b672021-07-07 13:46:58 -0700298 ts_usec = page_pinner->ts_usec;
299 rem_usec = do_div(ts_usec, 1000000);
300 pr_alert("page last pinned %5lu.%06lu] count %d\n",
301 (unsigned long)ts_usec, rem_usec, count);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700302
303 pageblock_mt = get_pageblock_migratetype(page);
304 pr_alert("PFN %lu Block %lu type %s Flags %#lx(%pGp)\n",
305 pfn,
306 pfn >> pageblock_order,
307 migratetype_names[pageblock_mt],
308 page->flags, &page->flags);
309
310 handle = READ_ONCE(page_pinner->handle);
311 if (!handle) {
312 pr_alert("page_pinner allocation stack trace missing\n");
313 } else {
314 nr_entries = stack_depot_fetch(handle, &entries);
315 stack_trace_print(entries, nr_entries, 0);
316 }
317}
318
Minchan Kimddc4a482021-03-29 16:48:47 -0700319void __page_pinner_migration_failed(struct page *page)
320{
321 struct page_ext *page_ext = lookup_page_ext(page);
322 struct page_pinner *page_pinner;
Minchan Kim71da0672021-07-07 13:27:30 -0700323 struct captured_pinner record;
Minchan Kimddc4a482021-03-29 16:48:47 -0700324 unsigned long flags;
325 unsigned int idx;
326
327 if (unlikely(!page_ext))
328 return;
329
330 page_pinner = get_page_pinner(page_ext);
331 if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags))
332 return;
333
Minchan Kim71da0672021-07-07 13:27:30 -0700334 record.handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
335 record.ts_usec = ktime_to_us(ktime_get_boottime());
336 capture_page_state(page, &record);
Minchan Kimddc4a482021-03-29 16:48:47 -0700337
338 spin_lock_irqsave(&acf_pinner.lock, flags);
339 idx = acf_pinner.index++;
Minchan Kim9a453102021-07-07 13:10:59 -0700340 acf_pinner.index %= LONGTERM_PIN_BUCKETS;
Minchan Kim71da0672021-07-07 13:27:30 -0700341 acf_pinner.pinner[idx] = record;
Minchan Kimddc4a482021-03-29 16:48:47 -0700342 spin_unlock_irqrestore(&acf_pinner.lock, flags);
343}
344EXPORT_SYMBOL(__page_pinner_migration_failed);
345
346void __page_pinner_mark_migration_failed_pages(struct list_head *page_list)
347{
348 struct page *page;
349 struct page_ext *page_ext;
350
351 list_for_each_entry(page, page_list, lru) {
Minchan Kim3a71ca12021-05-21 11:43:54 -0700352 /* The page will be freed by putback_movable_pages soon */
353 if (page_count(page) == 1)
354 continue;
Minchan Kimddc4a482021-03-29 16:48:47 -0700355 page_ext = lookup_page_ext(page);
356 if (unlikely(!page_ext))
357 continue;
358 __set_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
359 }
360}
361
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700362static ssize_t
363read_longterm_page_pinner(struct file *file, char __user *buf, size_t count,
364 loff_t *ppos)
365{
366 loff_t i, idx;
367 struct captured_pinner record;
368 unsigned long flags;
369
370 if (!static_branch_unlikely(&page_pinner_inited))
371 return -EINVAL;
372
Minchan Kim9a453102021-07-07 13:10:59 -0700373 if (*ppos >= LONGTERM_PIN_BUCKETS)
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700374 return 0;
375
376 i = *ppos;
377 *ppos = i + 1;
378
379 /*
380 * reading the records in the reverse order with newest one
381 * being read first followed by older ones
382 */
Minchan Kim9a453102021-07-07 13:10:59 -0700383 idx = (lt_pinner.index - 1 - i + LONGTERM_PIN_BUCKETS) %
384 LONGTERM_PIN_BUCKETS;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700385 spin_lock_irqsave(&lt_pinner.lock, flags);
386 record = lt_pinner.pinner[idx];
387 spin_unlock_irqrestore(&lt_pinner.lock, flags);
388 if (!record.handle)
389 return 0;
390
Minchan Kim0445b672021-07-07 13:46:58 -0700391 return print_page_pinner(true, buf, count, &record);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700392}
393
394static const struct file_operations proc_longterm_pinner_operations = {
395 .read = read_longterm_page_pinner,
396};
397
Minchan Kimddc4a482021-03-29 16:48:47 -0700398static ssize_t read_alloc_contig_failed(struct file *file, char __user *buf,
399 size_t count, loff_t *ppos)
400{
401 loff_t i, idx;
402 struct captured_pinner record;
403 unsigned long flags;
404
405 if (!static_branch_unlikely(&failure_tracking))
406 return -EINVAL;
407
Minchan Kim9a453102021-07-07 13:10:59 -0700408 if (*ppos >= LONGTERM_PIN_BUCKETS)
Minchan Kimddc4a482021-03-29 16:48:47 -0700409 return 0;
410
411 i = *ppos;
412 *ppos = i + 1;
413
414 /*
415 * reading the records in the reverse order with newest one
416 * being read first followed by older ones
417 */
Minchan Kim9a453102021-07-07 13:10:59 -0700418 idx = (acf_pinner.index - 1 - i + LONGTERM_PIN_BUCKETS) %
419 LONGTERM_PIN_BUCKETS;
Minchan Kimddc4a482021-03-29 16:48:47 -0700420
421 spin_lock_irqsave(&acf_pinner.lock, flags);
422 record = acf_pinner.pinner[idx];
423 spin_unlock_irqrestore(&acf_pinner.lock, flags);
424 if (!record.handle)
425 return 0;
426
Minchan Kim0445b672021-07-07 13:46:58 -0700427 return print_page_pinner(false, buf, count, &record);
Minchan Kimddc4a482021-03-29 16:48:47 -0700428}
429
430static const struct file_operations proc_alloc_contig_failed_operations = {
431 .read = read_alloc_contig_failed,
432};
433
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700434static int pp_threshold_set(void *data, unsigned long long val)
435{
436 unsigned long flags;
437
438 threshold_usec = (s64)val;
439
440 spin_lock_irqsave(&lt_pinner.lock, flags);
441 memset(lt_pinner.pinner, 0,
Minchan Kim9a453102021-07-07 13:10:59 -0700442 sizeof(struct captured_pinner) * LONGTERM_PIN_BUCKETS);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700443 lt_pinner.index = 0;
444 spin_unlock_irqrestore(&lt_pinner.lock, flags);
445 return 0;
446}
447
448static int pp_threshold_get(void *data, unsigned long long *val)
449{
450 *val = (unsigned long long)threshold_usec;
451
452 return 0;
453}
454DEFINE_DEBUGFS_ATTRIBUTE(pp_threshold_fops, pp_threshold_get,
455 pp_threshold_set, "%lld\n");
456
Minchan Kimddc4a482021-03-29 16:48:47 -0700457static int failure_tracking_set(void *data, u64 val)
458{
459 bool on;
460
461 on = (bool)val;
462 if (on)
463 static_branch_enable(&failure_tracking);
464 else
465 static_branch_disable(&failure_tracking);
466 return 0;
467}
468
469static int failure_tracking_get(void *data, u64 *val)
470{
471 *val = static_branch_unlikely(&failure_tracking);
472 return 0;
473}
474DEFINE_DEBUGFS_ATTRIBUTE(failure_tracking_fops,
475 failure_tracking_get,
476 failure_tracking_set, "%llu\n");
477
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700478static int __init page_pinner_init(void)
479{
480 struct dentry *pp_debugfs_root;
481
482 if (!static_branch_unlikely(&page_pinner_inited))
483 return 0;
484
485 pr_info("page_pinner enabled\n");
Minchan Kimddc4a482021-03-29 16:48:47 -0700486
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700487 pp_debugfs_root = debugfs_create_dir("page_pinner", NULL);
488
Minchan Kim7d3618b2021-06-22 19:49:51 -0700489 debugfs_create_file("longterm_pinner", 0444, pp_debugfs_root, NULL,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700490 &proc_longterm_pinner_operations);
491
Minchan Kim7d3618b2021-06-22 19:49:51 -0700492 debugfs_create_file("threshold", 0644, pp_debugfs_root, NULL,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700493 &pp_threshold_fops);
Minchan Kimddc4a482021-03-29 16:48:47 -0700494
Minchan Kim7d3618b2021-06-22 19:49:51 -0700495 debugfs_create_file("alloc_contig_failed", 0444,
Minchan Kimddc4a482021-03-29 16:48:47 -0700496 pp_debugfs_root, NULL,
497 &proc_alloc_contig_failed_operations);
498
Minchan Kim7d3618b2021-06-22 19:49:51 -0700499 debugfs_create_file("failure_tracking", 0644,
Minchan Kimddc4a482021-03-29 16:48:47 -0700500 pp_debugfs_root, NULL,
501 &failure_tracking_fops);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700502 return 0;
503}
504late_initcall(page_pinner_init)