blob: 368cf498870d6ee244ae91fb93cbd20d186a093e [file] [log] [blame]
Minchan Kim6e12c5b2021-03-18 09:56:10 -07001// SPDX-License-Identifier: GPL-2.0
2#include <linux/debugfs.h>
3#include <linux/mm.h>
4#include <linux/slab.h>
5#include <linux/uaccess.h>
6#include <linux/memblock.h>
7#include <linux/stacktrace.h>
8#include <linux/page_pinner.h>
9#include <linux/jump_label.h>
10#include <linux/migrate.h>
11#include <linux/stackdepot.h>
12#include <linux/seq_file.h>
13#include <linux/sched/clock.h>
14
15#include "internal.h"
16
17#define PAGE_PINNER_STACK_DEPTH 16
Minchan Kim9a453102021-07-07 13:10:59 -070018#define LONGTERM_PIN_BUCKETS 4096
Minchan Kim6e12c5b2021-03-18 09:56:10 -070019
20struct page_pinner {
21 depot_stack_handle_t handle;
22 s64 ts_usec;
23 atomic_t count;
24};
25
26struct captured_pinner {
27 depot_stack_handle_t handle;
28 s64 ts_usec;
29 int page_mt;
30 unsigned long page_flags;
31 unsigned long pfn;
32};
33
34struct longterm_pinner {
35 spinlock_t lock;
36 unsigned int index;
Minchan Kim9a453102021-07-07 13:10:59 -070037 struct captured_pinner pinner[LONGTERM_PIN_BUCKETS];
Minchan Kim6e12c5b2021-03-18 09:56:10 -070038};
39
40static struct longterm_pinner lt_pinner = {
41 .lock = __SPIN_LOCK_UNLOCKED(lt_pinner.lock),
42};
43
44static s64 threshold_usec = 300000;
45
Minchan Kimddc4a482021-03-29 16:48:47 -070046/* alloc_contig failed pinner */
47static struct longterm_pinner acf_pinner = {
48 .lock = __SPIN_LOCK_UNLOCKED(acf_pinner.lock),
49};
50
Minchan Kim6e12c5b2021-03-18 09:56:10 -070051static bool page_pinner_enabled;
52DEFINE_STATIC_KEY_FALSE(page_pinner_inited);
53
Minchan Kimddc4a482021-03-29 16:48:47 -070054DEFINE_STATIC_KEY_TRUE(failure_tracking);
55EXPORT_SYMBOL(failure_tracking);
56
Minchan Kim6e12c5b2021-03-18 09:56:10 -070057static depot_stack_handle_t failure_handle;
58
59static int __init early_page_pinner_param(char *buf)
60{
61 page_pinner_enabled = true;
62 return 0;
63}
64early_param("page_pinner", early_page_pinner_param);
65
66static bool need_page_pinner(void)
67{
68 return page_pinner_enabled;
69}
70
71static noinline void register_failure_stack(void)
72{
73 unsigned long entries[4];
74 unsigned int nr_entries;
75
76 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
77 failure_handle = stack_depot_save(entries, nr_entries, GFP_KERNEL);
78}
79
80static void init_page_pinner(void)
81{
82 if (!page_pinner_enabled)
83 return;
84
85 register_failure_stack();
86 static_branch_enable(&page_pinner_inited);
87}
88
89struct page_ext_operations page_pinner_ops = {
90 .size = sizeof(struct page_pinner),
91 .need = need_page_pinner,
92 .init = init_page_pinner,
93};
94
95static inline struct page_pinner *get_page_pinner(struct page_ext *page_ext)
96{
97 return (void *)page_ext + page_pinner_ops.offset;
98}
99
100static noinline depot_stack_handle_t save_stack(gfp_t flags)
101{
102 unsigned long entries[PAGE_PINNER_STACK_DEPTH];
103 depot_stack_handle_t handle;
104 unsigned int nr_entries;
105
106 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
107 handle = stack_depot_save(entries, nr_entries, flags);
108 if (!handle)
109 handle = failure_handle;
110
111 return handle;
112}
113
Minchan Kim9a453102021-07-07 13:10:59 -0700114static void check_longterm_pin(struct page_pinner *page_pinner,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700115 struct page *page)
116{
117 s64 now, delta = 0;
118 unsigned long flags;
119 unsigned int idx;
120
121 now = ktime_to_us(ktime_get_boottime());
122
123 /* get/put_page can be raced. Ignore that case */
124 if (page_pinner->ts_usec < now)
125 delta = now - page_pinner->ts_usec;
126
127 if (delta <= threshold_usec)
128 return;
129
130 spin_lock_irqsave(&lt_pinner.lock, flags);
131 idx = lt_pinner.index++;
Minchan Kim9a453102021-07-07 13:10:59 -0700132 lt_pinner.index %= LONGTERM_PIN_BUCKETS;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700133
134 lt_pinner.pinner[idx].handle = page_pinner->handle;
135 lt_pinner.pinner[idx].ts_usec = delta;
136 lt_pinner.pinner[idx].page_flags = page->flags;
137 lt_pinner.pinner[idx].page_mt = get_pageblock_migratetype(page);
138 lt_pinner.pinner[idx].pfn = page_to_pfn(page);
139 spin_unlock_irqrestore(&lt_pinner.lock, flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700140}
141
142void __reset_page_pinner(struct page *page, unsigned int order, bool free)
143{
144 struct page_pinner *page_pinner;
145 struct page_ext *page_ext;
146 int i;
147
148 page_ext = lookup_page_ext(page);
149 if (unlikely(!page_ext))
150 return;
151
152 for (i = 0; i < (1 << order); i++) {
Minchan Kimd0127832021-07-08 11:12:59 -0700153 if (!test_bit(PAGE_EXT_GET, &page_ext->flags) &&
154 !test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED,
155 &page_ext->flags))
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700156 continue;
157
158 page_pinner = get_page_pinner(page_ext);
159 if (free) {
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700160 atomic_set(&page_pinner->count, 0);
Minchan Kimddc4a482021-03-29 16:48:47 -0700161 __clear_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700162 } else {
Minchan Kim9a453102021-07-07 13:10:59 -0700163 check_longterm_pin(page_pinner, page);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700164 }
165 clear_bit(PAGE_EXT_GET, &page_ext->flags);
166 page_ext = page_ext_next(page_ext);
167 }
168}
169
170static inline void __set_page_pinner_handle(struct page *page,
171 struct page_ext *page_ext, depot_stack_handle_t handle,
172 unsigned int order)
173{
174 struct page_pinner *page_pinner;
175 int i;
176 s64 usec = ktime_to_us(ktime_get_boottime());
177
178 for (i = 0; i < (1 << order); i++) {
179 page_pinner = get_page_pinner(page_ext);
180 page_pinner->handle = handle;
181 page_pinner->ts_usec = usec;
182 set_bit(PAGE_EXT_GET, &page_ext->flags);
183 atomic_inc(&page_pinner->count);
184 page_ext = page_ext_next(page_ext);
185 }
186}
187
188noinline void __set_page_pinner(struct page *page, unsigned int order)
189{
190 struct page_ext *page_ext = lookup_page_ext(page);
191 depot_stack_handle_t handle;
192
193 if (unlikely(!page_ext))
194 return;
195
196 handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
197 __set_page_pinner_handle(page, page_ext, handle, order);
198}
199
200static ssize_t
201print_page_pinner(char __user *buf, size_t count, unsigned long pfn,
202 int pageblock_mt, unsigned long page_flags, s64 ts_usec,
Minchan Kimb83e5642021-07-07 13:22:52 -0700203 depot_stack_handle_t handle)
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700204{
205 int ret;
206 unsigned long *entries;
207 unsigned int nr_entries;
208 char *kbuf;
209
210 count = min_t(size_t, count, PAGE_SIZE);
211 kbuf = kmalloc(count, GFP_KERNEL);
212 if (!kbuf)
213 return -ENOMEM;
214
215 ret = snprintf(kbuf, count,
Minchan Kimb83e5642021-07-07 13:22:52 -0700216 "Page pinned ts %lld us\n",
217 ts_usec);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700218
219 if (ret >= count)
220 goto err;
221
222 /* Print information relevant to grouping pages by mobility */
223 ret += snprintf(kbuf + ret, count - ret,
224 "PFN %lu Block %lu type %s Flags %#lx(%pGp)\n",
225 pfn,
226 pfn >> pageblock_order,
227 migratetype_names[pageblock_mt],
228 page_flags, &page_flags);
229
230 if (ret >= count)
231 goto err;
232
233 nr_entries = stack_depot_fetch(handle, &entries);
234 ret += stack_trace_snprint(kbuf + ret, count - ret, entries,
235 nr_entries, 0);
236 if (ret >= count)
237 goto err;
238
239 ret += snprintf(kbuf + ret, count - ret, "\n");
240 if (ret >= count)
241 goto err;
242
243 if (copy_to_user(buf, kbuf, ret))
244 ret = -EFAULT;
245
246 kfree(kbuf);
247 return ret;
248
249err:
250 kfree(kbuf);
251 return -ENOMEM;
252}
253
254void __dump_page_pinner(struct page *page)
255{
256 struct page_ext *page_ext = lookup_page_ext(page);
257 struct page_pinner *page_pinner;
258 depot_stack_handle_t handle;
259 unsigned long *entries;
260 unsigned int nr_entries;
261 int pageblock_mt;
262 unsigned long pfn;
263 int count;
264
265 if (unlikely(!page_ext)) {
266 pr_alert("There is not page extension available.\n");
267 return;
268 }
269
270 page_pinner = get_page_pinner(page_ext);
271
272 count = atomic_read(&page_pinner->count);
273 if (!count) {
274 pr_alert("page_pinner info is not present (never set?)\n");
275 return;
276 }
277
278 pfn = page_to_pfn(page);
279 pr_alert("page last pinned ts %lld count %d\n",
280 page_pinner->ts_usec,
281 count);
282
283 pageblock_mt = get_pageblock_migratetype(page);
284 pr_alert("PFN %lu Block %lu type %s Flags %#lx(%pGp)\n",
285 pfn,
286 pfn >> pageblock_order,
287 migratetype_names[pageblock_mt],
288 page->flags, &page->flags);
289
290 handle = READ_ONCE(page_pinner->handle);
291 if (!handle) {
292 pr_alert("page_pinner allocation stack trace missing\n");
293 } else {
294 nr_entries = stack_depot_fetch(handle, &entries);
295 stack_trace_print(entries, nr_entries, 0);
296 }
297}
298
Minchan Kimddc4a482021-03-29 16:48:47 -0700299void __page_pinner_migration_failed(struct page *page)
300{
301 struct page_ext *page_ext = lookup_page_ext(page);
302 struct page_pinner *page_pinner;
303 depot_stack_handle_t handle;
304 unsigned long flags;
305 unsigned int idx;
306
307 if (unlikely(!page_ext))
308 return;
309
310 page_pinner = get_page_pinner(page_ext);
311 if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags))
312 return;
313
314 handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
315
316 spin_lock_irqsave(&acf_pinner.lock, flags);
317 idx = acf_pinner.index++;
Minchan Kim9a453102021-07-07 13:10:59 -0700318 acf_pinner.index %= LONGTERM_PIN_BUCKETS;
Minchan Kimddc4a482021-03-29 16:48:47 -0700319
320 acf_pinner.pinner[idx].handle = handle;
321 acf_pinner.pinner[idx].ts_usec = ktime_to_us(ktime_get_boottime());
322 acf_pinner.pinner[idx].page_flags = page->flags;
323 acf_pinner.pinner[idx].page_mt = get_pageblock_migratetype(page);
324 acf_pinner.pinner[idx].pfn = page_to_pfn(page);
325 spin_unlock_irqrestore(&acf_pinner.lock, flags);
326}
327EXPORT_SYMBOL(__page_pinner_migration_failed);
328
329void __page_pinner_mark_migration_failed_pages(struct list_head *page_list)
330{
331 struct page *page;
332 struct page_ext *page_ext;
333
334 list_for_each_entry(page, page_list, lru) {
Minchan Kim3a71ca12021-05-21 11:43:54 -0700335 /* The page will be freed by putback_movable_pages soon */
336 if (page_count(page) == 1)
337 continue;
Minchan Kimddc4a482021-03-29 16:48:47 -0700338 page_ext = lookup_page_ext(page);
339 if (unlikely(!page_ext))
340 continue;
341 __set_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
342 }
343}
344
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700345static ssize_t
346read_longterm_page_pinner(struct file *file, char __user *buf, size_t count,
347 loff_t *ppos)
348{
349 loff_t i, idx;
350 struct captured_pinner record;
351 unsigned long flags;
352
353 if (!static_branch_unlikely(&page_pinner_inited))
354 return -EINVAL;
355
Minchan Kim9a453102021-07-07 13:10:59 -0700356 if (*ppos >= LONGTERM_PIN_BUCKETS)
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700357 return 0;
358
359 i = *ppos;
360 *ppos = i + 1;
361
362 /*
363 * reading the records in the reverse order with newest one
364 * being read first followed by older ones
365 */
Minchan Kim9a453102021-07-07 13:10:59 -0700366 idx = (lt_pinner.index - 1 - i + LONGTERM_PIN_BUCKETS) %
367 LONGTERM_PIN_BUCKETS;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700368 spin_lock_irqsave(&lt_pinner.lock, flags);
369 record = lt_pinner.pinner[idx];
370 spin_unlock_irqrestore(&lt_pinner.lock, flags);
371 if (!record.handle)
372 return 0;
373
374 return print_page_pinner(buf, count, record.pfn, record.page_mt,
375 record.page_flags, record.ts_usec,
Minchan Kimb83e5642021-07-07 13:22:52 -0700376 record.handle);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700377}
378
379static const struct file_operations proc_longterm_pinner_operations = {
380 .read = read_longterm_page_pinner,
381};
382
Minchan Kimddc4a482021-03-29 16:48:47 -0700383static ssize_t read_alloc_contig_failed(struct file *file, char __user *buf,
384 size_t count, loff_t *ppos)
385{
386 loff_t i, idx;
387 struct captured_pinner record;
388 unsigned long flags;
389
390 if (!static_branch_unlikely(&failure_tracking))
391 return -EINVAL;
392
Minchan Kim9a453102021-07-07 13:10:59 -0700393 if (*ppos >= LONGTERM_PIN_BUCKETS)
Minchan Kimddc4a482021-03-29 16:48:47 -0700394 return 0;
395
396 i = *ppos;
397 *ppos = i + 1;
398
399 /*
400 * reading the records in the reverse order with newest one
401 * being read first followed by older ones
402 */
Minchan Kim9a453102021-07-07 13:10:59 -0700403 idx = (acf_pinner.index - 1 - i + LONGTERM_PIN_BUCKETS) %
404 LONGTERM_PIN_BUCKETS;
Minchan Kimddc4a482021-03-29 16:48:47 -0700405
406 spin_lock_irqsave(&acf_pinner.lock, flags);
407 record = acf_pinner.pinner[idx];
408 spin_unlock_irqrestore(&acf_pinner.lock, flags);
409 if (!record.handle)
410 return 0;
411
412 return print_page_pinner(buf, count, record.pfn, record.page_mt,
413 record.page_flags, record.ts_usec,
Minchan Kimb83e5642021-07-07 13:22:52 -0700414 record.handle);
Minchan Kimddc4a482021-03-29 16:48:47 -0700415}
416
417static const struct file_operations proc_alloc_contig_failed_operations = {
418 .read = read_alloc_contig_failed,
419};
420
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700421static int pp_threshold_set(void *data, unsigned long long val)
422{
423 unsigned long flags;
424
425 threshold_usec = (s64)val;
426
427 spin_lock_irqsave(&lt_pinner.lock, flags);
428 memset(lt_pinner.pinner, 0,
Minchan Kim9a453102021-07-07 13:10:59 -0700429 sizeof(struct captured_pinner) * LONGTERM_PIN_BUCKETS);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700430 lt_pinner.index = 0;
431 spin_unlock_irqrestore(&lt_pinner.lock, flags);
432 return 0;
433}
434
435static int pp_threshold_get(void *data, unsigned long long *val)
436{
437 *val = (unsigned long long)threshold_usec;
438
439 return 0;
440}
441DEFINE_DEBUGFS_ATTRIBUTE(pp_threshold_fops, pp_threshold_get,
442 pp_threshold_set, "%lld\n");
443
Minchan Kimddc4a482021-03-29 16:48:47 -0700444static int failure_tracking_set(void *data, u64 val)
445{
446 bool on;
447
448 on = (bool)val;
449 if (on)
450 static_branch_enable(&failure_tracking);
451 else
452 static_branch_disable(&failure_tracking);
453 return 0;
454}
455
456static int failure_tracking_get(void *data, u64 *val)
457{
458 *val = static_branch_unlikely(&failure_tracking);
459 return 0;
460}
461DEFINE_DEBUGFS_ATTRIBUTE(failure_tracking_fops,
462 failure_tracking_get,
463 failure_tracking_set, "%llu\n");
464
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700465static int __init page_pinner_init(void)
466{
467 struct dentry *pp_debugfs_root;
468
469 if (!static_branch_unlikely(&page_pinner_inited))
470 return 0;
471
472 pr_info("page_pinner enabled\n");
Minchan Kimddc4a482021-03-29 16:48:47 -0700473
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700474 pp_debugfs_root = debugfs_create_dir("page_pinner", NULL);
475
Minchan Kim7d3618b2021-06-22 19:49:51 -0700476 debugfs_create_file("longterm_pinner", 0444, pp_debugfs_root, NULL,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700477 &proc_longterm_pinner_operations);
478
Minchan Kim7d3618b2021-06-22 19:49:51 -0700479 debugfs_create_file("threshold", 0644, pp_debugfs_root, NULL,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700480 &pp_threshold_fops);
Minchan Kimddc4a482021-03-29 16:48:47 -0700481
Minchan Kim7d3618b2021-06-22 19:49:51 -0700482 debugfs_create_file("alloc_contig_failed", 0444,
Minchan Kimddc4a482021-03-29 16:48:47 -0700483 pp_debugfs_root, NULL,
484 &proc_alloc_contig_failed_operations);
485
Minchan Kim7d3618b2021-06-22 19:49:51 -0700486 debugfs_create_file("failure_tracking", 0644,
Minchan Kimddc4a482021-03-29 16:48:47 -0700487 pp_debugfs_root, NULL,
488 &failure_tracking_fops);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700489 return 0;
490}
491late_initcall(page_pinner_init)