blob: a8ef9af368226ce39c2726d74f19d5f63dda2914 [file] [log] [blame]
Minchan Kim6e12c5b2021-03-18 09:56:10 -07001// SPDX-License-Identifier: GPL-2.0
2#include <linux/debugfs.h>
3#include <linux/mm.h>
4#include <linux/slab.h>
5#include <linux/uaccess.h>
6#include <linux/memblock.h>
7#include <linux/stacktrace.h>
8#include <linux/page_pinner.h>
9#include <linux/jump_label.h>
10#include <linux/migrate.h>
11#include <linux/stackdepot.h>
12#include <linux/seq_file.h>
13#include <linux/sched/clock.h>
14
15#include "internal.h"
16
17#define PAGE_PINNER_STACK_DEPTH 16
18#define LONTERM_PIN_BUCKETS 4096
19
20struct page_pinner {
21 depot_stack_handle_t handle;
22 s64 ts_usec;
23 atomic_t count;
24};
25
26struct captured_pinner {
27 depot_stack_handle_t handle;
28 s64 ts_usec;
29 int page_mt;
30 unsigned long page_flags;
31 unsigned long pfn;
32};
33
34struct longterm_pinner {
35 spinlock_t lock;
36 unsigned int index;
37 struct captured_pinner pinner[LONTERM_PIN_BUCKETS];
38};
39
40static struct longterm_pinner lt_pinner = {
41 .lock = __SPIN_LOCK_UNLOCKED(lt_pinner.lock),
42};
43
44static s64 threshold_usec = 300000;
45
Minchan Kimddc4a482021-03-29 16:48:47 -070046/* alloc_contig failed pinner */
47static struct longterm_pinner acf_pinner = {
48 .lock = __SPIN_LOCK_UNLOCKED(acf_pinner.lock),
49};
50
Minchan Kim6e12c5b2021-03-18 09:56:10 -070051static bool page_pinner_enabled;
52DEFINE_STATIC_KEY_FALSE(page_pinner_inited);
53
Minchan Kimddc4a482021-03-29 16:48:47 -070054DEFINE_STATIC_KEY_TRUE(failure_tracking);
55EXPORT_SYMBOL(failure_tracking);
56
Minchan Kim6e12c5b2021-03-18 09:56:10 -070057static depot_stack_handle_t failure_handle;
58
59static int __init early_page_pinner_param(char *buf)
60{
61 page_pinner_enabled = true;
62 return 0;
63}
64early_param("page_pinner", early_page_pinner_param);
65
66static bool need_page_pinner(void)
67{
68 return page_pinner_enabled;
69}
70
71static noinline void register_failure_stack(void)
72{
73 unsigned long entries[4];
74 unsigned int nr_entries;
75
76 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
77 failure_handle = stack_depot_save(entries, nr_entries, GFP_KERNEL);
78}
79
80static void init_page_pinner(void)
81{
82 if (!page_pinner_enabled)
83 return;
84
85 register_failure_stack();
86 static_branch_enable(&page_pinner_inited);
87}
88
89struct page_ext_operations page_pinner_ops = {
90 .size = sizeof(struct page_pinner),
91 .need = need_page_pinner,
92 .init = init_page_pinner,
93};
94
95static inline struct page_pinner *get_page_pinner(struct page_ext *page_ext)
96{
97 return (void *)page_ext + page_pinner_ops.offset;
98}
99
100static noinline depot_stack_handle_t save_stack(gfp_t flags)
101{
102 unsigned long entries[PAGE_PINNER_STACK_DEPTH];
103 depot_stack_handle_t handle;
104 unsigned int nr_entries;
105
106 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
107 handle = stack_depot_save(entries, nr_entries, flags);
108 if (!handle)
109 handle = failure_handle;
110
111 return handle;
112}
113
114static void check_lonterm_pin(struct page_pinner *page_pinner,
115 struct page *page)
116{
117 s64 now, delta = 0;
118 unsigned long flags;
119 unsigned int idx;
120
121 now = ktime_to_us(ktime_get_boottime());
122
123 /* get/put_page can be raced. Ignore that case */
124 if (page_pinner->ts_usec < now)
125 delta = now - page_pinner->ts_usec;
126
127 if (delta <= threshold_usec)
128 return;
129
130 spin_lock_irqsave(&lt_pinner.lock, flags);
131 idx = lt_pinner.index++;
132 lt_pinner.index %= LONTERM_PIN_BUCKETS;
133
134 lt_pinner.pinner[idx].handle = page_pinner->handle;
135 lt_pinner.pinner[idx].ts_usec = delta;
136 lt_pinner.pinner[idx].page_flags = page->flags;
137 lt_pinner.pinner[idx].page_mt = get_pageblock_migratetype(page);
138 lt_pinner.pinner[idx].pfn = page_to_pfn(page);
139 spin_unlock_irqrestore(&lt_pinner.lock, flags);
140
141}
142
143void __reset_page_pinner(struct page *page, unsigned int order, bool free)
144{
145 struct page_pinner *page_pinner;
146 struct page_ext *page_ext;
147 int i;
148
149 page_ext = lookup_page_ext(page);
150 if (unlikely(!page_ext))
151 return;
152
153 for (i = 0; i < (1 << order); i++) {
Minchan Kimd0127832021-07-08 11:12:59 -0700154 if (!test_bit(PAGE_EXT_GET, &page_ext->flags) &&
155 !test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED,
156 &page_ext->flags))
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700157 continue;
158
159 page_pinner = get_page_pinner(page_ext);
160 if (free) {
161 WARN_ON_ONCE(atomic_read(&page_pinner->count));
162 atomic_set(&page_pinner->count, 0);
Minchan Kimddc4a482021-03-29 16:48:47 -0700163 __clear_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700164 } else {
165 WARN_ON_ONCE(atomic_dec_if_positive(
166 &page_pinner->count) < 0);
167 check_lonterm_pin(page_pinner, page);
168 }
169 clear_bit(PAGE_EXT_GET, &page_ext->flags);
170 page_ext = page_ext_next(page_ext);
171 }
172}
173
174static inline void __set_page_pinner_handle(struct page *page,
175 struct page_ext *page_ext, depot_stack_handle_t handle,
176 unsigned int order)
177{
178 struct page_pinner *page_pinner;
179 int i;
180 s64 usec = ktime_to_us(ktime_get_boottime());
181
182 for (i = 0; i < (1 << order); i++) {
183 page_pinner = get_page_pinner(page_ext);
184 page_pinner->handle = handle;
185 page_pinner->ts_usec = usec;
186 set_bit(PAGE_EXT_GET, &page_ext->flags);
187 atomic_inc(&page_pinner->count);
188 page_ext = page_ext_next(page_ext);
189 }
190}
191
192noinline void __set_page_pinner(struct page *page, unsigned int order)
193{
194 struct page_ext *page_ext = lookup_page_ext(page);
195 depot_stack_handle_t handle;
196
197 if (unlikely(!page_ext))
198 return;
199
200 handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
201 __set_page_pinner_handle(page, page_ext, handle, order);
202}
203
204static ssize_t
205print_page_pinner(char __user *buf, size_t count, unsigned long pfn,
206 int pageblock_mt, unsigned long page_flags, s64 ts_usec,
207 depot_stack_handle_t handle, int shared_count)
208{
209 int ret;
210 unsigned long *entries;
211 unsigned int nr_entries;
212 char *kbuf;
213
214 count = min_t(size_t, count, PAGE_SIZE);
215 kbuf = kmalloc(count, GFP_KERNEL);
216 if (!kbuf)
217 return -ENOMEM;
218
219 ret = snprintf(kbuf, count,
220 "Page pinned ts %lld us count %d\n",
221 ts_usec, shared_count);
222
223 if (ret >= count)
224 goto err;
225
226 /* Print information relevant to grouping pages by mobility */
227 ret += snprintf(kbuf + ret, count - ret,
228 "PFN %lu Block %lu type %s Flags %#lx(%pGp)\n",
229 pfn,
230 pfn >> pageblock_order,
231 migratetype_names[pageblock_mt],
232 page_flags, &page_flags);
233
234 if (ret >= count)
235 goto err;
236
237 nr_entries = stack_depot_fetch(handle, &entries);
238 ret += stack_trace_snprint(kbuf + ret, count - ret, entries,
239 nr_entries, 0);
240 if (ret >= count)
241 goto err;
242
243 ret += snprintf(kbuf + ret, count - ret, "\n");
244 if (ret >= count)
245 goto err;
246
247 if (copy_to_user(buf, kbuf, ret))
248 ret = -EFAULT;
249
250 kfree(kbuf);
251 return ret;
252
253err:
254 kfree(kbuf);
255 return -ENOMEM;
256}
257
258void __dump_page_pinner(struct page *page)
259{
260 struct page_ext *page_ext = lookup_page_ext(page);
261 struct page_pinner *page_pinner;
262 depot_stack_handle_t handle;
263 unsigned long *entries;
264 unsigned int nr_entries;
265 int pageblock_mt;
266 unsigned long pfn;
267 int count;
268
269 if (unlikely(!page_ext)) {
270 pr_alert("There is not page extension available.\n");
271 return;
272 }
273
274 page_pinner = get_page_pinner(page_ext);
275
276 count = atomic_read(&page_pinner->count);
277 if (!count) {
278 pr_alert("page_pinner info is not present (never set?)\n");
279 return;
280 }
281
282 pfn = page_to_pfn(page);
283 pr_alert("page last pinned ts %lld count %d\n",
284 page_pinner->ts_usec,
285 count);
286
287 pageblock_mt = get_pageblock_migratetype(page);
288 pr_alert("PFN %lu Block %lu type %s Flags %#lx(%pGp)\n",
289 pfn,
290 pfn >> pageblock_order,
291 migratetype_names[pageblock_mt],
292 page->flags, &page->flags);
293
294 handle = READ_ONCE(page_pinner->handle);
295 if (!handle) {
296 pr_alert("page_pinner allocation stack trace missing\n");
297 } else {
298 nr_entries = stack_depot_fetch(handle, &entries);
299 stack_trace_print(entries, nr_entries, 0);
300 }
301}
302
Minchan Kimddc4a482021-03-29 16:48:47 -0700303void __page_pinner_migration_failed(struct page *page)
304{
305 struct page_ext *page_ext = lookup_page_ext(page);
306 struct page_pinner *page_pinner;
307 depot_stack_handle_t handle;
308 unsigned long flags;
309 unsigned int idx;
310
311 if (unlikely(!page_ext))
312 return;
313
314 page_pinner = get_page_pinner(page_ext);
315 if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags))
316 return;
317
318 handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
319
320 spin_lock_irqsave(&acf_pinner.lock, flags);
321 idx = acf_pinner.index++;
322 acf_pinner.index %= LONTERM_PIN_BUCKETS;
323
324 acf_pinner.pinner[idx].handle = handle;
325 acf_pinner.pinner[idx].ts_usec = ktime_to_us(ktime_get_boottime());
326 acf_pinner.pinner[idx].page_flags = page->flags;
327 acf_pinner.pinner[idx].page_mt = get_pageblock_migratetype(page);
328 acf_pinner.pinner[idx].pfn = page_to_pfn(page);
329 spin_unlock_irqrestore(&acf_pinner.lock, flags);
330}
331EXPORT_SYMBOL(__page_pinner_migration_failed);
332
333void __page_pinner_mark_migration_failed_pages(struct list_head *page_list)
334{
335 struct page *page;
336 struct page_ext *page_ext;
337
338 list_for_each_entry(page, page_list, lru) {
Minchan Kim3a71ca12021-05-21 11:43:54 -0700339 /* The page will be freed by putback_movable_pages soon */
340 if (page_count(page) == 1)
341 continue;
Minchan Kimddc4a482021-03-29 16:48:47 -0700342 page_ext = lookup_page_ext(page);
343 if (unlikely(!page_ext))
344 continue;
345 __set_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
346 }
347}
348
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700349static ssize_t
350read_longterm_page_pinner(struct file *file, char __user *buf, size_t count,
351 loff_t *ppos)
352{
353 loff_t i, idx;
354 struct captured_pinner record;
355 unsigned long flags;
356
357 if (!static_branch_unlikely(&page_pinner_inited))
358 return -EINVAL;
359
360 if (*ppos >= LONTERM_PIN_BUCKETS)
361 return 0;
362
363 i = *ppos;
364 *ppos = i + 1;
365
366 /*
367 * reading the records in the reverse order with newest one
368 * being read first followed by older ones
369 */
370 idx = (lt_pinner.index - 1 - i + LONTERM_PIN_BUCKETS) %
371 LONTERM_PIN_BUCKETS;
372 spin_lock_irqsave(&lt_pinner.lock, flags);
373 record = lt_pinner.pinner[idx];
374 spin_unlock_irqrestore(&lt_pinner.lock, flags);
375 if (!record.handle)
376 return 0;
377
378 return print_page_pinner(buf, count, record.pfn, record.page_mt,
379 record.page_flags, record.ts_usec,
380 record.handle, 0);
381}
382
383static const struct file_operations proc_longterm_pinner_operations = {
384 .read = read_longterm_page_pinner,
385};
386
Minchan Kimddc4a482021-03-29 16:48:47 -0700387static ssize_t read_alloc_contig_failed(struct file *file, char __user *buf,
388 size_t count, loff_t *ppos)
389{
390 loff_t i, idx;
391 struct captured_pinner record;
392 unsigned long flags;
393
394 if (!static_branch_unlikely(&failure_tracking))
395 return -EINVAL;
396
397 if (*ppos >= LONTERM_PIN_BUCKETS)
398 return 0;
399
400 i = *ppos;
401 *ppos = i + 1;
402
403 /*
404 * reading the records in the reverse order with newest one
405 * being read first followed by older ones
406 */
407 idx = (acf_pinner.index - 1 - i + LONTERM_PIN_BUCKETS) %
408 LONTERM_PIN_BUCKETS;
409
410 spin_lock_irqsave(&acf_pinner.lock, flags);
411 record = acf_pinner.pinner[idx];
412 spin_unlock_irqrestore(&acf_pinner.lock, flags);
413 if (!record.handle)
414 return 0;
415
416 return print_page_pinner(buf, count, record.pfn, record.page_mt,
417 record.page_flags, record.ts_usec,
418 record.handle, 0);
419}
420
421static const struct file_operations proc_alloc_contig_failed_operations = {
422 .read = read_alloc_contig_failed,
423};
424
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700425static int pp_threshold_set(void *data, unsigned long long val)
426{
427 unsigned long flags;
428
429 threshold_usec = (s64)val;
430
431 spin_lock_irqsave(&lt_pinner.lock, flags);
432 memset(lt_pinner.pinner, 0,
433 sizeof(struct captured_pinner) * LONTERM_PIN_BUCKETS);
434 lt_pinner.index = 0;
435 spin_unlock_irqrestore(&lt_pinner.lock, flags);
436 return 0;
437}
438
439static int pp_threshold_get(void *data, unsigned long long *val)
440{
441 *val = (unsigned long long)threshold_usec;
442
443 return 0;
444}
445DEFINE_DEBUGFS_ATTRIBUTE(pp_threshold_fops, pp_threshold_get,
446 pp_threshold_set, "%lld\n");
447
Minchan Kimddc4a482021-03-29 16:48:47 -0700448static int failure_tracking_set(void *data, u64 val)
449{
450 bool on;
451
452 on = (bool)val;
453 if (on)
454 static_branch_enable(&failure_tracking);
455 else
456 static_branch_disable(&failure_tracking);
457 return 0;
458}
459
460static int failure_tracking_get(void *data, u64 *val)
461{
462 *val = static_branch_unlikely(&failure_tracking);
463 return 0;
464}
465DEFINE_DEBUGFS_ATTRIBUTE(failure_tracking_fops,
466 failure_tracking_get,
467 failure_tracking_set, "%llu\n");
468
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700469static int __init page_pinner_init(void)
470{
471 struct dentry *pp_debugfs_root;
472
473 if (!static_branch_unlikely(&page_pinner_inited))
474 return 0;
475
476 pr_info("page_pinner enabled\n");
Minchan Kimddc4a482021-03-29 16:48:47 -0700477
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700478 pp_debugfs_root = debugfs_create_dir("page_pinner", NULL);
479
Minchan Kim7d3618b2021-06-22 19:49:51 -0700480 debugfs_create_file("longterm_pinner", 0444, pp_debugfs_root, NULL,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700481 &proc_longterm_pinner_operations);
482
Minchan Kim7d3618b2021-06-22 19:49:51 -0700483 debugfs_create_file("threshold", 0644, pp_debugfs_root, NULL,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700484 &pp_threshold_fops);
Minchan Kimddc4a482021-03-29 16:48:47 -0700485
Minchan Kim7d3618b2021-06-22 19:49:51 -0700486 debugfs_create_file("alloc_contig_failed", 0444,
Minchan Kimddc4a482021-03-29 16:48:47 -0700487 pp_debugfs_root, NULL,
488 &proc_alloc_contig_failed_operations);
489
Minchan Kim7d3618b2021-06-22 19:49:51 -0700490 debugfs_create_file("failure_tracking", 0644,
Minchan Kimddc4a482021-03-29 16:48:47 -0700491 pp_debugfs_root, NULL,
492 &failure_tracking_fops);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700493 return 0;
494}
495late_initcall(page_pinner_init)