blob: 64b1e41545611ca5e3f04fe783217f0ab0781d3a [file] [log] [blame]
Minchan Kim6e12c5b2021-03-18 09:56:10 -07001// SPDX-License-Identifier: GPL-2.0
2#include <linux/debugfs.h>
3#include <linux/mm.h>
4#include <linux/slab.h>
5#include <linux/uaccess.h>
6#include <linux/memblock.h>
7#include <linux/stacktrace.h>
8#include <linux/page_pinner.h>
9#include <linux/jump_label.h>
10#include <linux/migrate.h>
11#include <linux/stackdepot.h>
12#include <linux/seq_file.h>
13#include <linux/sched/clock.h>
14
15#include "internal.h"
16
17#define PAGE_PINNER_STACK_DEPTH 16
Minchan Kim9a453102021-07-07 13:10:59 -070018#define LONGTERM_PIN_BUCKETS 4096
Minchan Kim6e12c5b2021-03-18 09:56:10 -070019
20struct page_pinner {
21 depot_stack_handle_t handle;
22 s64 ts_usec;
23 atomic_t count;
24};
25
26struct captured_pinner {
27 depot_stack_handle_t handle;
Minchan Kim0445b672021-07-07 13:46:58 -070028 union {
29 s64 ts_usec;
30 s64 elapsed;
31 };
Minchan Kim32549482021-07-11 12:43:11 -070032
33 /* struct page fields */
Minchan Kim6e12c5b2021-03-18 09:56:10 -070034 unsigned long pfn;
Minchan Kim32549482021-07-11 12:43:11 -070035 int count;
36 int mapcount;
37 struct address_space *mapping;
38 unsigned long flags;
Minchan Kim6e12c5b2021-03-18 09:56:10 -070039};
40
41struct longterm_pinner {
42 spinlock_t lock;
43 unsigned int index;
Minchan Kim9a453102021-07-07 13:10:59 -070044 struct captured_pinner pinner[LONGTERM_PIN_BUCKETS];
Minchan Kim6e12c5b2021-03-18 09:56:10 -070045};
46
47static struct longterm_pinner lt_pinner = {
48 .lock = __SPIN_LOCK_UNLOCKED(lt_pinner.lock),
49};
50
51static s64 threshold_usec = 300000;
52
Minchan Kimddc4a482021-03-29 16:48:47 -070053/* alloc_contig failed pinner */
54static struct longterm_pinner acf_pinner = {
55 .lock = __SPIN_LOCK_UNLOCKED(acf_pinner.lock),
56};
57
Minchan Kim6e12c5b2021-03-18 09:56:10 -070058static bool page_pinner_enabled;
59DEFINE_STATIC_KEY_FALSE(page_pinner_inited);
60
Minchan Kimddc4a482021-03-29 16:48:47 -070061DEFINE_STATIC_KEY_TRUE(failure_tracking);
62EXPORT_SYMBOL(failure_tracking);
63
Minchan Kim6e12c5b2021-03-18 09:56:10 -070064static depot_stack_handle_t failure_handle;
65
66static int __init early_page_pinner_param(char *buf)
67{
68 page_pinner_enabled = true;
69 return 0;
70}
71early_param("page_pinner", early_page_pinner_param);
72
73static bool need_page_pinner(void)
74{
75 return page_pinner_enabled;
76}
77
78static noinline void register_failure_stack(void)
79{
80 unsigned long entries[4];
81 unsigned int nr_entries;
82
83 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
84 failure_handle = stack_depot_save(entries, nr_entries, GFP_KERNEL);
85}
86
87static void init_page_pinner(void)
88{
89 if (!page_pinner_enabled)
90 return;
91
92 register_failure_stack();
93 static_branch_enable(&page_pinner_inited);
94}
95
96struct page_ext_operations page_pinner_ops = {
97 .size = sizeof(struct page_pinner),
98 .need = need_page_pinner,
99 .init = init_page_pinner,
100};
101
102static inline struct page_pinner *get_page_pinner(struct page_ext *page_ext)
103{
104 return (void *)page_ext + page_pinner_ops.offset;
105}
106
107static noinline depot_stack_handle_t save_stack(gfp_t flags)
108{
109 unsigned long entries[PAGE_PINNER_STACK_DEPTH];
110 depot_stack_handle_t handle;
111 unsigned int nr_entries;
112
113 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
114 handle = stack_depot_save(entries, nr_entries, flags);
115 if (!handle)
116 handle = failure_handle;
117
118 return handle;
119}
120
Minchan Kim71da0672021-07-07 13:27:30 -0700121static void capture_page_state(struct page *page,
122 struct captured_pinner *record)
123{
Minchan Kim32549482021-07-11 12:43:11 -0700124 record->flags = page->flags;
125 record->mapping = page_mapping(page);
Minchan Kim71da0672021-07-07 13:27:30 -0700126 record->pfn = page_to_pfn(page);
Minchan Kim32549482021-07-11 12:43:11 -0700127 record->count = page_count(page);
128 record->mapcount = page_mapcount(page);
Minchan Kim71da0672021-07-07 13:27:30 -0700129}
130
Minchan Kim9a453102021-07-07 13:10:59 -0700131static void check_longterm_pin(struct page_pinner *page_pinner,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700132 struct page *page)
133{
134 s64 now, delta = 0;
135 unsigned long flags;
136 unsigned int idx;
Minchan Kim71da0672021-07-07 13:27:30 -0700137 struct captured_pinner record;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700138
139 now = ktime_to_us(ktime_get_boottime());
140
141 /* get/put_page can be raced. Ignore that case */
142 if (page_pinner->ts_usec < now)
143 delta = now - page_pinner->ts_usec;
144
145 if (delta <= threshold_usec)
146 return;
147
Minchan Kim71da0672021-07-07 13:27:30 -0700148 record.handle = page_pinner->handle;
Minchan Kim0445b672021-07-07 13:46:58 -0700149 record.elapsed = delta;
Minchan Kim71da0672021-07-07 13:27:30 -0700150 capture_page_state(page, &record);
151
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700152 spin_lock_irqsave(&lt_pinner.lock, flags);
153 idx = lt_pinner.index++;
Minchan Kim9a453102021-07-07 13:10:59 -0700154 lt_pinner.index %= LONGTERM_PIN_BUCKETS;
Minchan Kim71da0672021-07-07 13:27:30 -0700155 lt_pinner.pinner[idx] = record;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700156 spin_unlock_irqrestore(&lt_pinner.lock, flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700157}
158
159void __reset_page_pinner(struct page *page, unsigned int order, bool free)
160{
161 struct page_pinner *page_pinner;
162 struct page_ext *page_ext;
163 int i;
164
165 page_ext = lookup_page_ext(page);
166 if (unlikely(!page_ext))
167 return;
168
169 for (i = 0; i < (1 << order); i++) {
Minchan Kimd0127832021-07-08 11:12:59 -0700170 if (!test_bit(PAGE_EXT_GET, &page_ext->flags) &&
171 !test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED,
172 &page_ext->flags))
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700173 continue;
174
175 page_pinner = get_page_pinner(page_ext);
176 if (free) {
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700177 atomic_set(&page_pinner->count, 0);
Minchan Kimddc4a482021-03-29 16:48:47 -0700178 __clear_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700179 } else {
Minchan Kim9a453102021-07-07 13:10:59 -0700180 check_longterm_pin(page_pinner, page);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700181 }
182 clear_bit(PAGE_EXT_GET, &page_ext->flags);
183 page_ext = page_ext_next(page_ext);
184 }
185}
186
187static inline void __set_page_pinner_handle(struct page *page,
188 struct page_ext *page_ext, depot_stack_handle_t handle,
189 unsigned int order)
190{
191 struct page_pinner *page_pinner;
192 int i;
193 s64 usec = ktime_to_us(ktime_get_boottime());
194
195 for (i = 0; i < (1 << order); i++) {
196 page_pinner = get_page_pinner(page_ext);
197 page_pinner->handle = handle;
198 page_pinner->ts_usec = usec;
199 set_bit(PAGE_EXT_GET, &page_ext->flags);
200 atomic_inc(&page_pinner->count);
201 page_ext = page_ext_next(page_ext);
202 }
203}
204
205noinline void __set_page_pinner(struct page *page, unsigned int order)
206{
207 struct page_ext *page_ext = lookup_page_ext(page);
208 depot_stack_handle_t handle;
209
210 if (unlikely(!page_ext))
211 return;
212
213 handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
214 __set_page_pinner_handle(page, page_ext, handle, order);
215}
216
217static ssize_t
Minchan Kim0445b672021-07-07 13:46:58 -0700218print_page_pinner(bool longterm, char __user *buf, size_t count, struct captured_pinner *record)
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700219{
220 int ret;
221 unsigned long *entries;
222 unsigned int nr_entries;
223 char *kbuf;
224
225 count = min_t(size_t, count, PAGE_SIZE);
226 kbuf = kmalloc(count, GFP_KERNEL);
227 if (!kbuf)
228 return -ENOMEM;
229
Minchan Kim0445b672021-07-07 13:46:58 -0700230 if (longterm) {
231 ret = snprintf(kbuf, count, "Page pinned for %lld us\n",
232 record->elapsed);
233 } else {
234 s64 ts_usec = record->ts_usec;
235 unsigned long rem_usec = do_div(ts_usec, 1000000);
236
237 ret = snprintf(kbuf, count,
238 "Page pinned ts [%5lu.%06lu]\n",
239 (unsigned long)ts_usec, rem_usec);
240 }
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700241
242 if (ret >= count)
243 goto err;
244
245 /* Print information relevant to grouping pages by mobility */
246 ret += snprintf(kbuf + ret, count - ret,
Minchan Kim32549482021-07-11 12:43:11 -0700247 "PFN 0x%lx Block %lu count %d mapcount %d mapping %pS Flags %#lx(%pGp)\n",
Minchan Kim71da0672021-07-07 13:27:30 -0700248 record->pfn,
249 record->pfn >> pageblock_order,
Minchan Kim32549482021-07-11 12:43:11 -0700250 record->count, record->mapcount,
251 record->mapping,
252 record->flags, &record->flags);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700253
254 if (ret >= count)
255 goto err;
256
Minchan Kim71da0672021-07-07 13:27:30 -0700257 nr_entries = stack_depot_fetch(record->handle, &entries);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700258 ret += stack_trace_snprint(kbuf + ret, count - ret, entries,
259 nr_entries, 0);
260 if (ret >= count)
261 goto err;
262
263 ret += snprintf(kbuf + ret, count - ret, "\n");
264 if (ret >= count)
265 goto err;
266
267 if (copy_to_user(buf, kbuf, ret))
268 ret = -EFAULT;
269
270 kfree(kbuf);
271 return ret;
272
273err:
274 kfree(kbuf);
275 return -ENOMEM;
276}
277
278void __dump_page_pinner(struct page *page)
279{
280 struct page_ext *page_ext = lookup_page_ext(page);
281 struct page_pinner *page_pinner;
282 depot_stack_handle_t handle;
283 unsigned long *entries;
284 unsigned int nr_entries;
285 int pageblock_mt;
286 unsigned long pfn;
287 int count;
Minchan Kim0445b672021-07-07 13:46:58 -0700288 unsigned long rem_usec;
289 s64 ts_usec;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700290
291 if (unlikely(!page_ext)) {
292 pr_alert("There is not page extension available.\n");
293 return;
294 }
295
296 page_pinner = get_page_pinner(page_ext);
297
298 count = atomic_read(&page_pinner->count);
299 if (!count) {
300 pr_alert("page_pinner info is not present (never set?)\n");
301 return;
302 }
303
304 pfn = page_to_pfn(page);
Minchan Kim0445b672021-07-07 13:46:58 -0700305 ts_usec = page_pinner->ts_usec;
306 rem_usec = do_div(ts_usec, 1000000);
307 pr_alert("page last pinned %5lu.%06lu] count %d\n",
308 (unsigned long)ts_usec, rem_usec, count);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700309
310 pageblock_mt = get_pageblock_migratetype(page);
311 pr_alert("PFN %lu Block %lu type %s Flags %#lx(%pGp)\n",
312 pfn,
313 pfn >> pageblock_order,
314 migratetype_names[pageblock_mt],
315 page->flags, &page->flags);
316
317 handle = READ_ONCE(page_pinner->handle);
318 if (!handle) {
319 pr_alert("page_pinner allocation stack trace missing\n");
320 } else {
321 nr_entries = stack_depot_fetch(handle, &entries);
322 stack_trace_print(entries, nr_entries, 0);
323 }
324}
325
Minchan Kimddc4a482021-03-29 16:48:47 -0700326void __page_pinner_migration_failed(struct page *page)
327{
328 struct page_ext *page_ext = lookup_page_ext(page);
329 struct page_pinner *page_pinner;
Minchan Kim71da0672021-07-07 13:27:30 -0700330 struct captured_pinner record;
Minchan Kimddc4a482021-03-29 16:48:47 -0700331 unsigned long flags;
332 unsigned int idx;
333
334 if (unlikely(!page_ext))
335 return;
336
337 page_pinner = get_page_pinner(page_ext);
338 if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags))
339 return;
340
Minchan Kim71da0672021-07-07 13:27:30 -0700341 record.handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
342 record.ts_usec = ktime_to_us(ktime_get_boottime());
343 capture_page_state(page, &record);
Minchan Kimddc4a482021-03-29 16:48:47 -0700344
345 spin_lock_irqsave(&acf_pinner.lock, flags);
346 idx = acf_pinner.index++;
Minchan Kim9a453102021-07-07 13:10:59 -0700347 acf_pinner.index %= LONGTERM_PIN_BUCKETS;
Minchan Kim71da0672021-07-07 13:27:30 -0700348 acf_pinner.pinner[idx] = record;
Minchan Kimddc4a482021-03-29 16:48:47 -0700349 spin_unlock_irqrestore(&acf_pinner.lock, flags);
350}
351EXPORT_SYMBOL(__page_pinner_migration_failed);
352
353void __page_pinner_mark_migration_failed_pages(struct list_head *page_list)
354{
355 struct page *page;
356 struct page_ext *page_ext;
357
358 list_for_each_entry(page, page_list, lru) {
Minchan Kim3a71ca12021-05-21 11:43:54 -0700359 /* The page will be freed by putback_movable_pages soon */
360 if (page_count(page) == 1)
361 continue;
Minchan Kimddc4a482021-03-29 16:48:47 -0700362 page_ext = lookup_page_ext(page);
363 if (unlikely(!page_ext))
364 continue;
365 __set_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
366 }
367}
368
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700369static ssize_t
370read_longterm_page_pinner(struct file *file, char __user *buf, size_t count,
371 loff_t *ppos)
372{
373 loff_t i, idx;
374 struct captured_pinner record;
375 unsigned long flags;
376
377 if (!static_branch_unlikely(&page_pinner_inited))
378 return -EINVAL;
379
Minchan Kim9a453102021-07-07 13:10:59 -0700380 if (*ppos >= LONGTERM_PIN_BUCKETS)
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700381 return 0;
382
383 i = *ppos;
384 *ppos = i + 1;
385
386 /*
387 * reading the records in the reverse order with newest one
388 * being read first followed by older ones
389 */
Minchan Kim9a453102021-07-07 13:10:59 -0700390 idx = (lt_pinner.index - 1 - i + LONGTERM_PIN_BUCKETS) %
391 LONGTERM_PIN_BUCKETS;
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700392 spin_lock_irqsave(&lt_pinner.lock, flags);
393 record = lt_pinner.pinner[idx];
394 spin_unlock_irqrestore(&lt_pinner.lock, flags);
395 if (!record.handle)
396 return 0;
397
Minchan Kim0445b672021-07-07 13:46:58 -0700398 return print_page_pinner(true, buf, count, &record);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700399}
400
401static const struct file_operations proc_longterm_pinner_operations = {
402 .read = read_longterm_page_pinner,
403};
404
Minchan Kimddc4a482021-03-29 16:48:47 -0700405static ssize_t read_alloc_contig_failed(struct file *file, char __user *buf,
406 size_t count, loff_t *ppos)
407{
408 loff_t i, idx;
409 struct captured_pinner record;
410 unsigned long flags;
411
412 if (!static_branch_unlikely(&failure_tracking))
413 return -EINVAL;
414
Minchan Kim9a453102021-07-07 13:10:59 -0700415 if (*ppos >= LONGTERM_PIN_BUCKETS)
Minchan Kimddc4a482021-03-29 16:48:47 -0700416 return 0;
417
418 i = *ppos;
419 *ppos = i + 1;
420
421 /*
422 * reading the records in the reverse order with newest one
423 * being read first followed by older ones
424 */
Minchan Kim9a453102021-07-07 13:10:59 -0700425 idx = (acf_pinner.index - 1 - i + LONGTERM_PIN_BUCKETS) %
426 LONGTERM_PIN_BUCKETS;
Minchan Kimddc4a482021-03-29 16:48:47 -0700427
428 spin_lock_irqsave(&acf_pinner.lock, flags);
429 record = acf_pinner.pinner[idx];
430 spin_unlock_irqrestore(&acf_pinner.lock, flags);
431 if (!record.handle)
432 return 0;
433
Minchan Kim0445b672021-07-07 13:46:58 -0700434 return print_page_pinner(false, buf, count, &record);
Minchan Kimddc4a482021-03-29 16:48:47 -0700435}
436
437static const struct file_operations proc_alloc_contig_failed_operations = {
438 .read = read_alloc_contig_failed,
439};
440
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700441static int pp_threshold_set(void *data, unsigned long long val)
442{
443 unsigned long flags;
444
445 threshold_usec = (s64)val;
446
447 spin_lock_irqsave(&lt_pinner.lock, flags);
448 memset(lt_pinner.pinner, 0,
Minchan Kim9a453102021-07-07 13:10:59 -0700449 sizeof(struct captured_pinner) * LONGTERM_PIN_BUCKETS);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700450 lt_pinner.index = 0;
451 spin_unlock_irqrestore(&lt_pinner.lock, flags);
452 return 0;
453}
454
455static int pp_threshold_get(void *data, unsigned long long *val)
456{
457 *val = (unsigned long long)threshold_usec;
458
459 return 0;
460}
461DEFINE_DEBUGFS_ATTRIBUTE(pp_threshold_fops, pp_threshold_get,
462 pp_threshold_set, "%lld\n");
463
Minchan Kimddc4a482021-03-29 16:48:47 -0700464static int failure_tracking_set(void *data, u64 val)
465{
466 bool on;
467
468 on = (bool)val;
469 if (on)
470 static_branch_enable(&failure_tracking);
471 else
472 static_branch_disable(&failure_tracking);
473 return 0;
474}
475
476static int failure_tracking_get(void *data, u64 *val)
477{
478 *val = static_branch_unlikely(&failure_tracking);
479 return 0;
480}
481DEFINE_DEBUGFS_ATTRIBUTE(failure_tracking_fops,
482 failure_tracking_get,
483 failure_tracking_set, "%llu\n");
484
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700485static int __init page_pinner_init(void)
486{
487 struct dentry *pp_debugfs_root;
488
489 if (!static_branch_unlikely(&page_pinner_inited))
490 return 0;
491
492 pr_info("page_pinner enabled\n");
Minchan Kimddc4a482021-03-29 16:48:47 -0700493
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700494 pp_debugfs_root = debugfs_create_dir("page_pinner", NULL);
495
Minchan Kim7d3618b2021-06-22 19:49:51 -0700496 debugfs_create_file("longterm_pinner", 0444, pp_debugfs_root, NULL,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700497 &proc_longterm_pinner_operations);
498
Minchan Kim7d3618b2021-06-22 19:49:51 -0700499 debugfs_create_file("threshold", 0644, pp_debugfs_root, NULL,
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700500 &pp_threshold_fops);
Minchan Kimddc4a482021-03-29 16:48:47 -0700501
Minchan Kim7d3618b2021-06-22 19:49:51 -0700502 debugfs_create_file("alloc_contig_failed", 0444,
Minchan Kimddc4a482021-03-29 16:48:47 -0700503 pp_debugfs_root, NULL,
504 &proc_alloc_contig_failed_operations);
505
Minchan Kim7d3618b2021-06-22 19:49:51 -0700506 debugfs_create_file("failure_tracking", 0644,
Minchan Kimddc4a482021-03-29 16:48:47 -0700507 pp_debugfs_root, NULL,
508 &failure_tracking_fops);
Minchan Kim6e12c5b2021-03-18 09:56:10 -0700509 return 0;
510}
511late_initcall(page_pinner_init)