blob: 54697332f77e43e6e16ce3b93a5afdfe91a2183a [file] [log] [blame]
Minchan Kim6e12c5b2021-03-18 09:56:10 -07001// SPDX-License-Identifier: GPL-2.0
2#include <linux/debugfs.h>
3#include <linux/mm.h>
4#include <linux/slab.h>
5#include <linux/uaccess.h>
6#include <linux/memblock.h>
7#include <linux/stacktrace.h>
8#include <linux/page_pinner.h>
9#include <linux/jump_label.h>
10#include <linux/migrate.h>
11#include <linux/stackdepot.h>
12#include <linux/seq_file.h>
13#include <linux/sched/clock.h>
14
15#include "internal.h"
16
17#define PAGE_PINNER_STACK_DEPTH 16
Minchan Kim9a453102021-07-07 13:10:59 -070018#define LONGTERM_PIN_BUCKETS 4096
Minchan Kim6e12c5b2021-03-18 09:56:10 -070019
20struct page_pinner {
21 depot_stack_handle_t handle;
22 s64 ts_usec;
23 atomic_t count;
24};
25
26struct captured_pinner {
27 depot_stack_handle_t handle;
28 s64 ts_usec;
29 int page_mt;
30 unsigned long page_flags;
31 unsigned long pfn;
32};
33
34struct longterm_pinner {
35 spinlock_t lock;
36 unsigned int index;
Minchan Kim9a453102021-07-07 13:10:59 -070037 struct captured_pinner pinner[LONGTERM_PIN_BUCKETS];
Minchan Kim6e12c5b2021-03-18 09:56:10 -070038};
39
40static struct longterm_pinner lt_pinner = {
41 .lock = __SPIN_LOCK_UNLOCKED(lt_pinner.lock),
42};
43
44static s64 threshold_usec = 300000;
45
Minchan Kimddc4a482021-03-29 16:48:47 -070046/* alloc_contig failed pinner */
47static struct longterm_pinner acf_pinner = {
48 .lock = __SPIN_LOCK_UNLOCKED(acf_pinner.lock),
49};
50
Minchan Kim6e12c5b2021-03-18 09:56:10 -070051static bool page_pinner_enabled;
52DEFINE_STATIC_KEY_FALSE(page_pinner_inited);
53
Minchan Kimddc4a482021-03-29 16:48:47 -070054DEFINE_STATIC_KEY_TRUE(failure_tracking);
55EXPORT_SYMBOL(failure_tracking);
56
Minchan Kim6e12c5b2021-03-18 09:56:10 -070057static depot_stack_handle_t failure_handle;
58
59static int __init early_page_pinner_param(char *buf)
60{
61 page_pinner_enabled = true;
62 return 0;
63}
64early_param("page_pinner", early_page_pinner_param);
65
66static bool need_page_pinner(void)
67{
68 return page_pinner_enabled;
69}
70
71static noinline void register_failure_stack(void)
72{
73 unsigned long entries[4];
74 unsigned int nr_entries;
75
76 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
77 failure_handle = stack_depot_save(entries, nr_entries, GFP_KERNEL);
78}
79
80static void init_page_pinner(void)
81{
82 if (!page_pinner_enabled)
83 return;
84
85 register_failure_stack();
86 static_branch_enable(&page_pinner_inited);
87}
88
89struct page_ext_operations page_pinner_ops = {
90 .size = sizeof(struct page_pinner),
91 .need = need_page_pinner,
92 .init = init_page_pinner,
93};
94
95static inline struct page_pinner *get_page_pinner(struct page_ext *page_ext)
96{
97 return (void *)page_ext + page_pinner_ops.offset;
98}
99
100static noinline depot_stack_handle_t save_stack(gfp_t flags)
101{
102 unsigned long entries[PAGE_PINNER_STACK_DEPTH];
103 depot_stack_handle_t handle;
104 unsigned int nr_entries;
105
106 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
107 handle = stack_depot_save(entries, nr_entries, flags);
108 if (!handle)
109 handle = failure_handle;
110
111 return handle;
112}
113
Minchan Kim71da0672021-07-07 13:27:30 -0700114static void capture_page_state(struct page *page,
115 struct captured_pinner *record)
116{
117 record->page_flags = page->flags;
118 record->page_mt = get_pageblock_migratetype(page);
119 record->pfn = page_to_pfn(page);
120}
121
Minchan Kim9a453102021-07-07 13:10:59 -0700122static void check_longterm_pin(struct page_pinner *page_pinner,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700123 struct page *page)
124{
125 s64 now, delta = 0;
126 unsigned long flags;
127 unsigned int idx;
Minchan Kim71da0672021-07-07 13:27:30 -0700128 struct captured_pinner record;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700129
130 now = ktime_to_us(ktime_get_boottime());
131
132 /* get/put_page can be raced. Ignore that case */
133 if (page_pinner->ts_usec < now)
134 delta = now - page_pinner->ts_usec;
135
136 if (delta <= threshold_usec)
137 return;
138
Minchan Kim71da0672021-07-07 13:27:30 -0700139 record.handle = page_pinner->handle;
140 record.ts_usec = delta;
141 capture_page_state(page, &record);
142
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700143 spin_lock_irqsave(&lt_pinner.lock, flags);
144 idx = lt_pinner.index++;
Minchan Kim9a453102021-07-07 13:10:59 -0700145 lt_pinner.index %= LONGTERM_PIN_BUCKETS;
Minchan Kim71da0672021-07-07 13:27:30 -0700146 lt_pinner.pinner[idx] = record;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700147 spin_unlock_irqrestore(&lt_pinner.lock, flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700148}
149
150void __reset_page_pinner(struct page *page, unsigned int order, bool free)
151{
152 struct page_pinner *page_pinner;
153 struct page_ext *page_ext;
154 int i;
155
156 page_ext = lookup_page_ext(page);
157 if (unlikely(!page_ext))
158 return;
159
160 for (i = 0; i < (1 << order); i++) {
Minchan Kimd0127832021-07-08 11:12:59 -0700161 if (!test_bit(PAGE_EXT_GET, &page_ext->flags) &&
162 !test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED,
163 &page_ext->flags))
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700164 continue;
165
166 page_pinner = get_page_pinner(page_ext);
167 if (free) {
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700168 atomic_set(&page_pinner->count, 0);
Minchan Kimddc4a482021-03-29 16:48:47 -0700169 __clear_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700170 } else {
Minchan Kim9a453102021-07-07 13:10:59 -0700171 check_longterm_pin(page_pinner, page);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700172 }
173 clear_bit(PAGE_EXT_GET, &page_ext->flags);
174 page_ext = page_ext_next(page_ext);
175 }
176}
177
178static inline void __set_page_pinner_handle(struct page *page,
179 struct page_ext *page_ext, depot_stack_handle_t handle,
180 unsigned int order)
181{
182 struct page_pinner *page_pinner;
183 int i;
184 s64 usec = ktime_to_us(ktime_get_boottime());
185
186 for (i = 0; i < (1 << order); i++) {
187 page_pinner = get_page_pinner(page_ext);
188 page_pinner->handle = handle;
189 page_pinner->ts_usec = usec;
190 set_bit(PAGE_EXT_GET, &page_ext->flags);
191 atomic_inc(&page_pinner->count);
192 page_ext = page_ext_next(page_ext);
193 }
194}
195
196noinline void __set_page_pinner(struct page *page, unsigned int order)
197{
198 struct page_ext *page_ext = lookup_page_ext(page);
199 depot_stack_handle_t handle;
200
201 if (unlikely(!page_ext))
202 return;
203
204 handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
205 __set_page_pinner_handle(page, page_ext, handle, order);
206}
207
208static ssize_t
Minchan Kim71da0672021-07-07 13:27:30 -0700209print_page_pinner(char __user *buf, size_t count, struct captured_pinner *record)
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700210{
211 int ret;
212 unsigned long *entries;
213 unsigned int nr_entries;
214 char *kbuf;
215
216 count = min_t(size_t, count, PAGE_SIZE);
217 kbuf = kmalloc(count, GFP_KERNEL);
218 if (!kbuf)
219 return -ENOMEM;
220
221 ret = snprintf(kbuf, count,
Minchan Kimb83e5642021-07-07 13:22:52 -0700222 "Page pinned ts %lld us\n",
Minchan Kim71da0672021-07-07 13:27:30 -0700223 record->ts_usec);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700224
225 if (ret >= count)
226 goto err;
227
228 /* Print information relevant to grouping pages by mobility */
229 ret += snprintf(kbuf + ret, count - ret,
230 "PFN %lu Block %lu type %s Flags %#lx(%pGp)\n",
Minchan Kim71da0672021-07-07 13:27:30 -0700231 record->pfn,
232 record->pfn >> pageblock_order,
233 migratetype_names[record->page_mt],
234 record->page_flags, &record->page_flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700235
236 if (ret >= count)
237 goto err;
238
Minchan Kim71da0672021-07-07 13:27:30 -0700239 nr_entries = stack_depot_fetch(record->handle, &entries);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700240 ret += stack_trace_snprint(kbuf + ret, count - ret, entries,
241 nr_entries, 0);
242 if (ret >= count)
243 goto err;
244
245 ret += snprintf(kbuf + ret, count - ret, "\n");
246 if (ret >= count)
247 goto err;
248
249 if (copy_to_user(buf, kbuf, ret))
250 ret = -EFAULT;
251
252 kfree(kbuf);
253 return ret;
254
255err:
256 kfree(kbuf);
257 return -ENOMEM;
258}
259
260void __dump_page_pinner(struct page *page)
261{
262 struct page_ext *page_ext = lookup_page_ext(page);
263 struct page_pinner *page_pinner;
264 depot_stack_handle_t handle;
265 unsigned long *entries;
266 unsigned int nr_entries;
267 int pageblock_mt;
268 unsigned long pfn;
269 int count;
270
271 if (unlikely(!page_ext)) {
272 pr_alert("There is not page extension available.\n");
273 return;
274 }
275
276 page_pinner = get_page_pinner(page_ext);
277
278 count = atomic_read(&page_pinner->count);
279 if (!count) {
280 pr_alert("page_pinner info is not present (never set?)\n");
281 return;
282 }
283
284 pfn = page_to_pfn(page);
285 pr_alert("page last pinned ts %lld count %d\n",
286 page_pinner->ts_usec,
287 count);
288
289 pageblock_mt = get_pageblock_migratetype(page);
290 pr_alert("PFN %lu Block %lu type %s Flags %#lx(%pGp)\n",
291 pfn,
292 pfn >> pageblock_order,
293 migratetype_names[pageblock_mt],
294 page->flags, &page->flags);
295
296 handle = READ_ONCE(page_pinner->handle);
297 if (!handle) {
298 pr_alert("page_pinner allocation stack trace missing\n");
299 } else {
300 nr_entries = stack_depot_fetch(handle, &entries);
301 stack_trace_print(entries, nr_entries, 0);
302 }
303}
304
Minchan Kimddc4a482021-03-29 16:48:47 -0700305void __page_pinner_migration_failed(struct page *page)
306{
307 struct page_ext *page_ext = lookup_page_ext(page);
308 struct page_pinner *page_pinner;
Minchan Kim71da0672021-07-07 13:27:30 -0700309 struct captured_pinner record;
Minchan Kimddc4a482021-03-29 16:48:47 -0700310 unsigned long flags;
311 unsigned int idx;
312
313 if (unlikely(!page_ext))
314 return;
315
316 page_pinner = get_page_pinner(page_ext);
317 if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags))
318 return;
319
Minchan Kim71da0672021-07-07 13:27:30 -0700320 record.handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
321 record.ts_usec = ktime_to_us(ktime_get_boottime());
322 capture_page_state(page, &record);
Minchan Kimddc4a482021-03-29 16:48:47 -0700323
324 spin_lock_irqsave(&acf_pinner.lock, flags);
325 idx = acf_pinner.index++;
Minchan Kim9a453102021-07-07 13:10:59 -0700326 acf_pinner.index %= LONGTERM_PIN_BUCKETS;
Minchan Kim71da0672021-07-07 13:27:30 -0700327 acf_pinner.pinner[idx] = record;
Minchan Kimddc4a482021-03-29 16:48:47 -0700328 spin_unlock_irqrestore(&acf_pinner.lock, flags);
329}
330EXPORT_SYMBOL(__page_pinner_migration_failed);
331
332void __page_pinner_mark_migration_failed_pages(struct list_head *page_list)
333{
334 struct page *page;
335 struct page_ext *page_ext;
336
337 list_for_each_entry(page, page_list, lru) {
Minchan Kim3a71ca12021-05-21 11:43:54 -0700338 /* The page will be freed by putback_movable_pages soon */
339 if (page_count(page) == 1)
340 continue;
Minchan Kimddc4a482021-03-29 16:48:47 -0700341 page_ext = lookup_page_ext(page);
342 if (unlikely(!page_ext))
343 continue;
344 __set_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
345 }
346}
347
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700348static ssize_t
349read_longterm_page_pinner(struct file *file, char __user *buf, size_t count,
350 loff_t *ppos)
351{
352 loff_t i, idx;
353 struct captured_pinner record;
354 unsigned long flags;
355
356 if (!static_branch_unlikely(&page_pinner_inited))
357 return -EINVAL;
358
Minchan Kim9a453102021-07-07 13:10:59 -0700359 if (*ppos >= LONGTERM_PIN_BUCKETS)
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700360 return 0;
361
362 i = *ppos;
363 *ppos = i + 1;
364
365 /*
366 * reading the records in the reverse order with newest one
367 * being read first followed by older ones
368 */
Minchan Kim9a453102021-07-07 13:10:59 -0700369 idx = (lt_pinner.index - 1 - i + LONGTERM_PIN_BUCKETS) %
370 LONGTERM_PIN_BUCKETS;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700371 spin_lock_irqsave(&lt_pinner.lock, flags);
372 record = lt_pinner.pinner[idx];
373 spin_unlock_irqrestore(&lt_pinner.lock, flags);
374 if (!record.handle)
375 return 0;
376
Minchan Kim71da0672021-07-07 13:27:30 -0700377 return print_page_pinner(buf, count, &record);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700378}
379
380static const struct file_operations proc_longterm_pinner_operations = {
381 .read = read_longterm_page_pinner,
382};
383
Minchan Kimddc4a482021-03-29 16:48:47 -0700384static ssize_t read_alloc_contig_failed(struct file *file, char __user *buf,
385 size_t count, loff_t *ppos)
386{
387 loff_t i, idx;
388 struct captured_pinner record;
389 unsigned long flags;
390
391 if (!static_branch_unlikely(&failure_tracking))
392 return -EINVAL;
393
Minchan Kim9a453102021-07-07 13:10:59 -0700394 if (*ppos >= LONGTERM_PIN_BUCKETS)
Minchan Kimddc4a482021-03-29 16:48:47 -0700395 return 0;
396
397 i = *ppos;
398 *ppos = i + 1;
399
400 /*
401 * reading the records in the reverse order with newest one
402 * being read first followed by older ones
403 */
Minchan Kim9a453102021-07-07 13:10:59 -0700404 idx = (acf_pinner.index - 1 - i + LONGTERM_PIN_BUCKETS) %
405 LONGTERM_PIN_BUCKETS;
Minchan Kimddc4a482021-03-29 16:48:47 -0700406
407 spin_lock_irqsave(&acf_pinner.lock, flags);
408 record = acf_pinner.pinner[idx];
409 spin_unlock_irqrestore(&acf_pinner.lock, flags);
410 if (!record.handle)
411 return 0;
412
Minchan Kim71da0672021-07-07 13:27:30 -0700413 return print_page_pinner(buf, count, &record);
Minchan Kimddc4a482021-03-29 16:48:47 -0700414}
415
416static const struct file_operations proc_alloc_contig_failed_operations = {
417 .read = read_alloc_contig_failed,
418};
419
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700420static int pp_threshold_set(void *data, unsigned long long val)
421{
422 unsigned long flags;
423
424 threshold_usec = (s64)val;
425
426 spin_lock_irqsave(&lt_pinner.lock, flags);
427 memset(lt_pinner.pinner, 0,
Minchan Kim9a453102021-07-07 13:10:59 -0700428 sizeof(struct captured_pinner) * LONGTERM_PIN_BUCKETS);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700429 lt_pinner.index = 0;
430 spin_unlock_irqrestore(&lt_pinner.lock, flags);
431 return 0;
432}
433
434static int pp_threshold_get(void *data, unsigned long long *val)
435{
436 *val = (unsigned long long)threshold_usec;
437
438 return 0;
439}
440DEFINE_DEBUGFS_ATTRIBUTE(pp_threshold_fops, pp_threshold_get,
441 pp_threshold_set, "%lld\n");
442
Minchan Kimddc4a482021-03-29 16:48:47 -0700443static int failure_tracking_set(void *data, u64 val)
444{
445 bool on;
446
447 on = (bool)val;
448 if (on)
449 static_branch_enable(&failure_tracking);
450 else
451 static_branch_disable(&failure_tracking);
452 return 0;
453}
454
455static int failure_tracking_get(void *data, u64 *val)
456{
457 *val = static_branch_unlikely(&failure_tracking);
458 return 0;
459}
460DEFINE_DEBUGFS_ATTRIBUTE(failure_tracking_fops,
461 failure_tracking_get,
462 failure_tracking_set, "%llu\n");
463
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700464static int __init page_pinner_init(void)
465{
466 struct dentry *pp_debugfs_root;
467
468 if (!static_branch_unlikely(&page_pinner_inited))
469 return 0;
470
471 pr_info("page_pinner enabled\n");
Minchan Kimddc4a482021-03-29 16:48:47 -0700472
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700473 pp_debugfs_root = debugfs_create_dir("page_pinner", NULL);
474
Minchan Kim7d3618b2021-06-22 19:49:51 -0700475 debugfs_create_file("longterm_pinner", 0444, pp_debugfs_root, NULL,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700476 &proc_longterm_pinner_operations);
477
Minchan Kim7d3618b2021-06-22 19:49:51 -0700478 debugfs_create_file("threshold", 0644, pp_debugfs_root, NULL,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700479 &pp_threshold_fops);
Minchan Kimddc4a482021-03-29 16:48:47 -0700480
Minchan Kim7d3618b2021-06-22 19:49:51 -0700481 debugfs_create_file("alloc_contig_failed", 0444,
Minchan Kimddc4a482021-03-29 16:48:47 -0700482 pp_debugfs_root, NULL,
483 &proc_alloc_contig_failed_operations);
484
Minchan Kim7d3618b2021-06-22 19:49:51 -0700485 debugfs_create_file("failure_tracking", 0644,
Minchan Kimddc4a482021-03-29 16:48:47 -0700486 pp_debugfs_root, NULL,
487 &failure_tracking_fops);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700488 return 0;
489}
490late_initcall(page_pinner_init)