blob: 0a4d5501f805302731813430d47f5854a8ef2656 [file] [log] [blame]
Nadav Amit8b4770e2018-06-19 16:00:29 -07001// SPDX-License-Identifier: GPL-2.0
Dmitry Torokhov453dc652010-04-23 13:18:08 -04002/*
3 * VMware Balloon driver.
4 *
Nadav Amit8b4770e2018-06-19 16:00:29 -07005 * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved.
Dmitry Torokhov453dc652010-04-23 13:18:08 -04006 *
Dmitry Torokhov453dc652010-04-23 13:18:08 -04007 * This is VMware physical memory management driver for Linux. The driver
8 * acts like a "balloon" that can be inflated to reclaim physical pages by
9 * reserving them in the guest and invalidating them in the monitor,
10 * freeing up the underlying machine pages so they can be allocated to
11 * other guests. The balloon can also be deflated to allow the guest to
12 * use more physical memory. Higher level policies can control the sizes
13 * of balloons in VMs in order to manage physical memory resources.
14 */
15
16//#define DEBUG
17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19#include <linux/types.h>
20#include <linux/kernel.h>
21#include <linux/mm.h>
Xavier Deguillardf220a802015-08-06 15:17:58 -070022#include <linux/vmalloc.h>
Dmitry Torokhov453dc652010-04-23 13:18:08 -040023#include <linux/sched.h>
24#include <linux/module.h>
25#include <linux/workqueue.h>
26#include <linux/debugfs.h>
27#include <linux/seq_file.h>
Philip P. Moltmann48e3d662015-08-06 15:18:01 -070028#include <linux/vmw_vmci_defs.h>
29#include <linux/vmw_vmci_api.h>
H. Peter Anvina10a5692010-05-09 01:13:42 -070030#include <asm/hypervisor.h>
Dmitry Torokhov453dc652010-04-23 13:18:08 -040031
32MODULE_AUTHOR("VMware, Inc.");
33MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
Philip P. Moltmann48e3d662015-08-06 15:18:01 -070034MODULE_VERSION("1.5.0.0-k");
Dmitry Torokhov453dc652010-04-23 13:18:08 -040035MODULE_ALIAS("dmi:*:svnVMware*:*");
36MODULE_ALIAS("vmware_vmmemctl");
37MODULE_LICENSE("GPL");
38
39/*
Dmitry Torokhov453dc652010-04-23 13:18:08 -040040 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
Mel Gorman71baba42015-11-06 16:28:28 -080041 * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use
Dmitry Torokhov453dc652010-04-23 13:18:08 -040042 * __GFP_NOWARN, to suppress page allocation failure warnings.
43 */
44#define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN)
45
46/*
47 * Use GFP_HIGHUSER when executing in a separate kernel thread
48 * context and allocation can sleep. This is less stressful to
49 * the guest memory system, since it allows the thread to block
50 * while memory is reclaimed, and won't take pages from emergency
51 * low-memory pools.
52 */
53#define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER)
54
Dmitry Torokhov55adaa42010-06-04 14:14:52 -070055/* Maximum number of refused pages we accumulate during inflation cycle */
56#define VMW_BALLOON_MAX_REFUSED 16
Dmitry Torokhov453dc652010-04-23 13:18:08 -040057
58/*
59 * Hypervisor communication port definitions.
60 */
61#define VMW_BALLOON_HV_PORT 0x5670
62#define VMW_BALLOON_HV_MAGIC 0x456c6d6f
Dmitry Torokhov453dc652010-04-23 13:18:08 -040063#define VMW_BALLOON_GUEST_ID 1 /* Linux */
64
Xavier Deguillardeb791002015-06-12 11:43:23 -070065enum vmwballoon_capabilities {
66 /*
67 * Bit 0 is reserved and not associated to any capability.
68 */
Philip P. Moltmann48e3d662015-08-06 15:18:01 -070069 VMW_BALLOON_BASIC_CMDS = (1 << 1),
70 VMW_BALLOON_BATCHED_CMDS = (1 << 2),
71 VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3),
72 VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4),
Xavier Deguillardeb791002015-06-12 11:43:23 -070073};
74
Xavier Deguillardf220a802015-08-06 15:17:58 -070075#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -070076 | VMW_BALLOON_BATCHED_CMDS \
Philip P. Moltmann48e3d662015-08-06 15:18:01 -070077 | VMW_BALLOON_BATCHED_2M_CMDS \
78 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -070079
80#define VMW_BALLOON_2M_SHIFT (9)
81#define VMW_BALLOON_NUM_PAGE_SIZES (2)
Xavier Deguillardeb791002015-06-12 11:43:23 -070082
Xavier Deguillardf220a802015-08-06 15:17:58 -070083/*
84 * Backdoor commands availability:
85 *
86 * START, GET_TARGET and GUEST_ID are always available,
87 *
88 * VMW_BALLOON_BASIC_CMDS:
89 * LOCK and UNLOCK commands,
90 * VMW_BALLOON_BATCHED_CMDS:
91 * BATCHED_LOCK and BATCHED_UNLOCK commands.
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -070092 * VMW BALLOON_BATCHED_2M_CMDS:
Philip P. Moltmann48e3d662015-08-06 15:18:01 -070093 * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
94 * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
95 * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
Xavier Deguillardf220a802015-08-06 15:17:58 -070096 */
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -070097#define VMW_BALLOON_CMD_START 0
98#define VMW_BALLOON_CMD_GET_TARGET 1
99#define VMW_BALLOON_CMD_LOCK 2
100#define VMW_BALLOON_CMD_UNLOCK 3
101#define VMW_BALLOON_CMD_GUEST_ID 4
102#define VMW_BALLOON_CMD_BATCHED_LOCK 6
103#define VMW_BALLOON_CMD_BATCHED_UNLOCK 7
104#define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8
105#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9
Philip P. Moltmann48e3d662015-08-06 15:18:01 -0700106#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700107
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400108
109/* error codes */
Xavier Deguillardeb791002015-06-12 11:43:23 -0700110#define VMW_BALLOON_SUCCESS 0
111#define VMW_BALLOON_FAILURE -1
112#define VMW_BALLOON_ERROR_CMD_INVALID 1
113#define VMW_BALLOON_ERROR_PPN_INVALID 2
114#define VMW_BALLOON_ERROR_PPN_LOCKED 3
115#define VMW_BALLOON_ERROR_PPN_UNLOCKED 4
116#define VMW_BALLOON_ERROR_PPN_PINNED 5
117#define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6
118#define VMW_BALLOON_ERROR_RESET 7
119#define VMW_BALLOON_ERROR_BUSY 8
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400120
Xavier Deguillardeb791002015-06-12 11:43:23 -0700121#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000)
122
Xavier Deguillardf220a802015-08-06 15:17:58 -0700123/* Batch page description */
124
125/*
126 * Layout of a page in the batch page:
127 *
128 * +-------------+----------+--------+
129 * | | | |
130 * | Page number | Reserved | Status |
131 * | | | |
132 * +-------------+----------+--------+
133 * 64 PAGE_SHIFT 6 0
134 *
Xavier Deguillardf220a802015-08-06 15:17:58 -0700135 * The reserved field should be set to 0.
136 */
137#define VMW_BALLOON_BATCH_MAX_PAGES (PAGE_SIZE / sizeof(u64))
138#define VMW_BALLOON_BATCH_STATUS_MASK ((1UL << 5) - 1)
139#define VMW_BALLOON_BATCH_PAGE_MASK (~((1UL << PAGE_SHIFT) - 1))
140
Nadav Amit10a95d52018-09-20 10:30:07 -0700141#define VMW_BALLOON_CMD_WITH_TARGET_MASK \
142 ((1UL << VMW_BALLOON_CMD_GET_TARGET) | \
143 (1UL << VMW_BALLOON_CMD_LOCK) | \
144 (1UL << VMW_BALLOON_CMD_UNLOCK) | \
145 (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) | \
146 (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) | \
147 (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) | \
148 (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK))
149
Xavier Deguillardf220a802015-08-06 15:17:58 -0700150struct vmballoon_batch_page {
151 u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
152};
153
154static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
155{
156 return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
157}
158
159static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
160 int idx)
161{
162 return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
163}
164
165static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
166 u64 pa)
167{
168 batch->pages[idx] = pa;
169}
170
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400171#ifdef CONFIG_DEBUG_FS
172struct vmballoon_stats {
173 unsigned int timer;
Philip P. Moltmann48e3d662015-08-06 15:18:01 -0700174 unsigned int doorbell;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400175
Rakib Mullick2ca02df2011-11-02 13:40:07 -0700176 /* allocation statistics */
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700177 unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
178 unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400179 unsigned int sleep_alloc;
180 unsigned int sleep_alloc_fail;
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700181 unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
182 unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
183 unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400184
185 /* monitor operations */
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700186 unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES];
187 unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
188 unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES];
189 unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400190 unsigned int target;
191 unsigned int target_fail;
192 unsigned int start;
193 unsigned int start_fail;
194 unsigned int guest_type;
195 unsigned int guest_type_fail;
Philip P. Moltmann48e3d662015-08-06 15:18:01 -0700196 unsigned int doorbell_set;
197 unsigned int doorbell_unset;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400198};
199
200#define STATS_INC(stat) (stat)++
201#else
202#define STATS_INC(stat)
203#endif
204
Xavier Deguillardf220a802015-08-06 15:17:58 -0700205struct vmballoon;
206
207struct vmballoon_ops {
208 void (*add_page)(struct vmballoon *b, int idx, struct page *p);
Xavier Deguillard4670de4d2015-08-06 15:17:59 -0700209 int (*lock)(struct vmballoon *b, unsigned int num_pages,
Nadav Amit10a95d52018-09-20 10:30:07 -0700210 bool is_2m_pages);
Xavier Deguillard4670de4d2015-08-06 15:17:59 -0700211 int (*unlock)(struct vmballoon *b, unsigned int num_pages,
Nadav Amit10a95d52018-09-20 10:30:07 -0700212 bool is_2m_pages);
Xavier Deguillardf220a802015-08-06 15:17:58 -0700213};
214
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700215struct vmballoon_page_size {
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400216 /* list of reserved physical pages */
217 struct list_head pages;
218
219 /* transient list of non-balloonable pages */
220 struct list_head refused_pages;
Dmitry Torokhov55adaa42010-06-04 14:14:52 -0700221 unsigned int n_refused_pages;
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700222};
223
224struct vmballoon {
225 struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
226
227 /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
228 unsigned supported_page_sizes;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400229
230 /* balloon size in pages */
231 unsigned int size;
232 unsigned int target;
233
234 /* reset flag */
235 bool reset_required;
236
Xavier Deguillardf220a802015-08-06 15:17:58 -0700237 unsigned long capabilities;
238
239 struct vmballoon_batch_page *batch_page;
240 unsigned int batch_max_pages;
241 struct page *page;
242
243 const struct vmballoon_ops *ops;
244
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400245#ifdef CONFIG_DEBUG_FS
246 /* statistics */
247 struct vmballoon_stats stats;
248
249 /* debugfs file exporting statistics */
250 struct dentry *dbg_entry;
251#endif
252
253 struct sysinfo sysinfo;
254
255 struct delayed_work dwork;
Philip P. Moltmann48e3d662015-08-06 15:18:01 -0700256
257 struct vmci_handle vmci_doorbell;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400258};
259
260static struct vmballoon balloon;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400261
Nadav Amit10a95d52018-09-20 10:30:07 -0700262static inline unsigned long
263__vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
264 unsigned long arg2, unsigned long *result)
265{
266 unsigned long status, dummy1, dummy2, dummy3, local_result;
267
268 asm volatile ("inl %%dx" :
269 "=a"(status),
270 "=c"(dummy1),
271 "=d"(dummy2),
272 "=b"(local_result),
273 "=S"(dummy3) :
274 "0"(VMW_BALLOON_HV_MAGIC),
275 "1"(cmd),
276 "2"(VMW_BALLOON_HV_PORT),
277 "3"(arg1),
278 "4"(arg2) :
279 "memory");
280
281 /* update the result if needed */
282 if (result)
283 *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 :
284 local_result;
285
286 /* update target when applicable */
287 if (status == VMW_BALLOON_SUCCESS &&
288 ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK))
289 b->target = local_result;
290
291 /* mark reset required accordingly */
292 if (status == VMW_BALLOON_ERROR_RESET)
293 b->reset_required = true;
294
295 return status;
296}
297
298static __always_inline unsigned long
299vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
300 unsigned long arg2)
301{
302 unsigned long dummy;
303
304 return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy);
305}
306
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400307/*
308 * Send "start" command to the host, communicating supported version
309 * of the protocol.
310 */
Xavier Deguillardf220a802015-08-06 15:17:58 -0700311static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400312{
Nadav Amit10a95d52018-09-20 10:30:07 -0700313 unsigned long status, capabilities;
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700314 bool success;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400315
316 STATS_INC(b->stats.start);
317
Nadav Amit10a95d52018-09-20 10:30:07 -0700318 status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0,
319 &capabilities);
Xavier Deguillardf220a802015-08-06 15:17:58 -0700320
321 switch (status) {
322 case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
323 b->capabilities = capabilities;
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700324 success = true;
325 break;
Xavier Deguillardf220a802015-08-06 15:17:58 -0700326 case VMW_BALLOON_SUCCESS:
327 b->capabilities = VMW_BALLOON_BASIC_CMDS;
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700328 success = true;
329 break;
330 default:
331 success = false;
Xavier Deguillardf220a802015-08-06 15:17:58 -0700332 }
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400333
Nadav Amit5081efd2018-06-19 16:00:25 -0700334 /*
335 * 2MB pages are only supported with batching. If batching is for some
336 * reason disabled, do not use 2MB pages, since otherwise the legacy
337 * mechanism is used with 2MB pages, causing a failure.
338 */
339 if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
340 (b->capabilities & VMW_BALLOON_BATCHED_CMDS))
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700341 b->supported_page_sizes = 2;
342 else
343 b->supported_page_sizes = 1;
344
345 if (!success) {
346 pr_debug("%s - failed, hv returns %ld\n", __func__, status);
347 STATS_INC(b->stats.start_fail);
348 }
349 return success;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400350}
351
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400352/*
353 * Communicate guest type to the host so that it can adjust ballooning
354 * algorithm to the one most appropriate for the guest. This command
355 * is normally issued after sending "start" command and is part of
356 * standard reset sequence.
357 */
358static bool vmballoon_send_guest_id(struct vmballoon *b)
359{
Nadav Amit10a95d52018-09-20 10:30:07 -0700360 unsigned long status;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400361
Nadav Amit10a95d52018-09-20 10:30:07 -0700362 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID,
363 VMW_BALLOON_GUEST_ID, 0);
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400364
365 STATS_INC(b->stats.guest_type);
366
Nadav Amit10a95d52018-09-20 10:30:07 -0700367 if (status == VMW_BALLOON_SUCCESS)
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400368 return true;
369
370 pr_debug("%s - failed, hv returns %ld\n", __func__, status);
371 STATS_INC(b->stats.guest_type_fail);
372 return false;
373}
374
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700375static u16 vmballoon_page_size(bool is_2m_page)
376{
377 if (is_2m_page)
378 return 1 << VMW_BALLOON_2M_SHIFT;
379
380 return 1;
381}
382
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400383/*
384 * Retrieve desired balloon size from the host.
385 */
Nadav Amit10a95d52018-09-20 10:30:07 -0700386static bool vmballoon_send_get_target(struct vmballoon *b)
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400387{
388 unsigned long status;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400389 unsigned long limit;
390 u32 limit32;
391
392 /*
393 * si_meminfo() is cheap. Moreover, we want to provide dynamic
394 * max balloon size later. So let us call si_meminfo() every
395 * iteration.
396 */
397 si_meminfo(&b->sysinfo);
398 limit = b->sysinfo.totalram;
399
400 /* Ensure limit fits in 32-bits */
401 limit32 = (u32)limit;
402 if (limit != limit32)
403 return false;
404
405 /* update stats */
406 STATS_INC(b->stats.target);
407
Nadav Amit10a95d52018-09-20 10:30:07 -0700408 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0);
409
410 if (status == VMW_BALLOON_SUCCESS)
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400411 return true;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400412
413 pr_debug("%s - failed, hv returns %ld\n", __func__, status);
414 STATS_INC(b->stats.target_fail);
415 return false;
416}
417
418/*
419 * Notify the host about allocated page so that host can use it without
420 * fear that guest will need it. Host may reject some pages, we need to
421 * check the return value and maybe submit a different page.
422 */
Danny Kukawka3e5ba462012-01-30 23:00:08 +0100423static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
Nadav Amit10a95d52018-09-20 10:30:07 -0700424 unsigned int *hv_status)
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400425{
Nadav Amit10a95d52018-09-20 10:30:07 -0700426 unsigned long status;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400427 u32 pfn32;
428
429 pfn32 = (u32)pfn;
430 if (pfn32 != pfn)
Nadav Amit09755692018-06-19 16:00:24 -0700431 return -EINVAL;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400432
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700433 STATS_INC(b->stats.lock[false]);
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400434
Nadav Amit10a95d52018-09-20 10:30:07 -0700435 *hv_status = status = vmballoon_cmd(b, VMW_BALLOON_CMD_LOCK, pfn, 0);
436
437 if (status == VMW_BALLOON_SUCCESS)
Danny Kukawka3e5ba462012-01-30 23:00:08 +0100438 return 0;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400439
440 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700441 STATS_INC(b->stats.lock_fail[false]);
Nadav Amit09755692018-06-19 16:00:24 -0700442 return -EIO;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400443}
444
Xavier Deguillardf220a802015-08-06 15:17:58 -0700445static int vmballoon_send_batched_lock(struct vmballoon *b,
Nadav Amit10a95d52018-09-20 10:30:07 -0700446 unsigned int num_pages, bool is_2m_pages)
Xavier Deguillardf220a802015-08-06 15:17:58 -0700447{
Nadav Amit90d72ce2018-07-02 19:27:13 -0700448 unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
Nadav Amit10a95d52018-09-20 10:30:07 -0700449 unsigned long status, cmd;
Xavier Deguillardf220a802015-08-06 15:17:58 -0700450
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700451 STATS_INC(b->stats.lock[is_2m_pages]);
Xavier Deguillardf220a802015-08-06 15:17:58 -0700452
Nadav Amit10a95d52018-09-20 10:30:07 -0700453 cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_LOCK :
454 VMW_BALLOON_CMD_BATCHED_LOCK;
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700455
Nadav Amit10a95d52018-09-20 10:30:07 -0700456 status = vmballoon_cmd(b, cmd, pfn, num_pages);
457
458 if (status == VMW_BALLOON_SUCCESS)
Xavier Deguillardf220a802015-08-06 15:17:58 -0700459 return 0;
460
461 pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700462 STATS_INC(b->stats.lock_fail[is_2m_pages]);
Xavier Deguillardf220a802015-08-06 15:17:58 -0700463 return 1;
464}
465
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400466/*
467 * Notify the host that guest intends to release given page back into
468 * the pool of available (to the guest) pages.
469 */
Nadav Amit10a95d52018-09-20 10:30:07 -0700470static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn)
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400471{
Nadav Amit10a95d52018-09-20 10:30:07 -0700472 unsigned long status;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400473 u32 pfn32;
474
475 pfn32 = (u32)pfn;
476 if (pfn32 != pfn)
477 return false;
478
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700479 STATS_INC(b->stats.unlock[false]);
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400480
Nadav Amit10a95d52018-09-20 10:30:07 -0700481 status = vmballoon_cmd(b, VMW_BALLOON_CMD_UNLOCK, pfn, 0);
482 if (status == VMW_BALLOON_SUCCESS)
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400483 return true;
484
485 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700486 STATS_INC(b->stats.unlock_fail[false]);
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400487 return false;
488}
489
Xavier Deguillardf220a802015-08-06 15:17:58 -0700490static bool vmballoon_send_batched_unlock(struct vmballoon *b,
Nadav Amit10a95d52018-09-20 10:30:07 -0700491 unsigned int num_pages, bool is_2m_pages)
Xavier Deguillardf220a802015-08-06 15:17:58 -0700492{
Nadav Amit90d72ce2018-07-02 19:27:13 -0700493 unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
Nadav Amit10a95d52018-09-20 10:30:07 -0700494 unsigned long status, cmd;
Xavier Deguillardf220a802015-08-06 15:17:58 -0700495
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700496 STATS_INC(b->stats.unlock[is_2m_pages]);
Xavier Deguillardf220a802015-08-06 15:17:58 -0700497
Nadav Amit10a95d52018-09-20 10:30:07 -0700498 cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_UNLOCK :
499 VMW_BALLOON_CMD_BATCHED_UNLOCK;
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700500
Nadav Amit10a95d52018-09-20 10:30:07 -0700501 status = vmballoon_cmd(b, cmd, pfn, num_pages);
502
503 if (status == VMW_BALLOON_SUCCESS)
Xavier Deguillardf220a802015-08-06 15:17:58 -0700504 return true;
505
506 pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700507 STATS_INC(b->stats.unlock_fail[is_2m_pages]);
Xavier Deguillardf220a802015-08-06 15:17:58 -0700508 return false;
509}
510
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700511static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page)
512{
513 if (is_2m_page)
514 return alloc_pages(flags, VMW_BALLOON_2M_SHIFT);
515
516 return alloc_page(flags);
517}
518
519static void vmballoon_free_page(struct page *page, bool is_2m_page)
520{
521 if (is_2m_page)
522 __free_pages(page, VMW_BALLOON_2M_SHIFT);
523 else
524 __free_page(page);
525}
526
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400527/*
528 * Quickly release all pages allocated for the balloon. This function is
529 * called when host decides to "reset" balloon for one reason or another.
530 * Unlike normal "deflate" we do not (shall not) notify host of the pages
531 * being released.
532 */
533static void vmballoon_pop(struct vmballoon *b)
534{
535 struct page *page, *next;
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700536 unsigned is_2m_pages;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400537
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700538 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
539 is_2m_pages++) {
540 struct vmballoon_page_size *page_size =
541 &b->page_sizes[is_2m_pages];
542 u16 size_per_page = vmballoon_page_size(is_2m_pages);
543
544 list_for_each_entry_safe(page, next, &page_size->pages, lru) {
545 list_del(&page->lru);
546 vmballoon_free_page(page, is_2m_pages);
547 STATS_INC(b->stats.free[is_2m_pages]);
548 b->size -= size_per_page;
549 cond_resched();
550 }
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400551 }
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400552
Gil Kupferb23220f2018-06-01 00:47:47 -0700553 /* Clearing the batch_page unconditionally has no adverse effect */
554 free_page((unsigned long)b->batch_page);
555 b->batch_page = NULL;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400556}
557
558/*
Xavier Deguillardef0f8f12015-06-12 11:43:22 -0700559 * Notify the host of a ballooned page. If host rejects the page put it on the
560 * refuse list, those refused page are then released at the end of the
561 * inflation cycle.
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400562 */
Xavier Deguillard4670de4d2015-08-06 15:17:59 -0700563static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
Nadav Amit10a95d52018-09-20 10:30:07 -0700564 bool is_2m_pages)
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400565{
Xavier Deguillardef0f8f12015-06-12 11:43:22 -0700566 int locked, hv_status;
Xavier Deguillardf220a802015-08-06 15:17:58 -0700567 struct page *page = b->page;
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700568 struct vmballoon_page_size *page_size = &b->page_sizes[false];
569
570 /* is_2m_pages can never happen as 2m pages support implies batching */
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400571
Nadav Amit10a95d52018-09-20 10:30:07 -0700572 locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status);
573
Nadav Amit09755692018-06-19 16:00:24 -0700574 if (locked) {
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700575 STATS_INC(b->stats.refused_alloc[false]);
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400576
Nadav Amit09755692018-06-19 16:00:24 -0700577 if (locked == -EIO &&
578 (hv_status == VMW_BALLOON_ERROR_RESET ||
579 hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED)) {
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700580 vmballoon_free_page(page, false);
Xavier Deguillardef0f8f12015-06-12 11:43:22 -0700581 return -EIO;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400582 }
583
Xavier Deguillardef0f8f12015-06-12 11:43:22 -0700584 /*
585 * Place page on the list of non-balloonable pages
586 * and retry allocation, unless we already accumulated
587 * too many of them, in which case take a breather.
588 */
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700589 if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
590 page_size->n_refused_pages++;
591 list_add(&page->lru, &page_size->refused_pages);
Xavier Deguillardef0f8f12015-06-12 11:43:22 -0700592 } else {
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700593 vmballoon_free_page(page, false);
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400594 }
Nadav Amit09755692018-06-19 16:00:24 -0700595 return locked;
Xavier Deguillardef0f8f12015-06-12 11:43:22 -0700596 }
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400597
598 /* track allocated page */
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700599 list_add(&page->lru, &page_size->pages);
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400600
601 /* update balloon size */
602 b->size++;
603
604 return 0;
605}
606
Xavier Deguillardf220a802015-08-06 15:17:58 -0700607static int vmballoon_lock_batched_page(struct vmballoon *b,
Nadav Amit10a95d52018-09-20 10:30:07 -0700608 unsigned int num_pages, bool is_2m_pages)
Xavier Deguillardf220a802015-08-06 15:17:58 -0700609{
610 int locked, i;
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700611 u16 size_per_page = vmballoon_page_size(is_2m_pages);
Xavier Deguillardf220a802015-08-06 15:17:58 -0700612
Nadav Amit10a95d52018-09-20 10:30:07 -0700613 locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages);
614
Xavier Deguillardf220a802015-08-06 15:17:58 -0700615 if (locked > 0) {
616 for (i = 0; i < num_pages; i++) {
617 u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
618 struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
619
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700620 vmballoon_free_page(p, is_2m_pages);
Xavier Deguillardf220a802015-08-06 15:17:58 -0700621 }
622
623 return -EIO;
624 }
625
626 for (i = 0; i < num_pages; i++) {
627 u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
628 struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700629 struct vmballoon_page_size *page_size =
630 &b->page_sizes[is_2m_pages];
Xavier Deguillardf220a802015-08-06 15:17:58 -0700631
632 locked = vmballoon_batch_get_status(b->batch_page, i);
633
634 switch (locked) {
635 case VMW_BALLOON_SUCCESS:
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700636 list_add(&p->lru, &page_size->pages);
637 b->size += size_per_page;
Xavier Deguillardf220a802015-08-06 15:17:58 -0700638 break;
639 case VMW_BALLOON_ERROR_PPN_PINNED:
640 case VMW_BALLOON_ERROR_PPN_INVALID:
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700641 if (page_size->n_refused_pages
642 < VMW_BALLOON_MAX_REFUSED) {
643 list_add(&p->lru, &page_size->refused_pages);
644 page_size->n_refused_pages++;
Xavier Deguillardf220a802015-08-06 15:17:58 -0700645 break;
646 }
647 /* Fallthrough */
648 case VMW_BALLOON_ERROR_RESET:
649 case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700650 vmballoon_free_page(p, is_2m_pages);
Xavier Deguillardf220a802015-08-06 15:17:58 -0700651 break;
652 default:
653 /* This should never happen */
654 WARN_ON_ONCE(true);
655 }
656 }
657
658 return 0;
659}
660
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400661/*
662 * Release the page allocated for the balloon. Note that we first notify
663 * the host so it can make sure the page will be available for the guest
664 * to use, if needed.
665 */
Xavier Deguillard4670de4d2015-08-06 15:17:59 -0700666static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
Nadav Amit10a95d52018-09-20 10:30:07 -0700667 bool is_2m_pages)
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400668{
Xavier Deguillardf220a802015-08-06 15:17:58 -0700669 struct page *page = b->page;
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700670 struct vmballoon_page_size *page_size = &b->page_sizes[false];
671
672 /* is_2m_pages can never happen as 2m pages support implies batching */
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400673
Nadav Amit10a95d52018-09-20 10:30:07 -0700674 if (!vmballoon_send_unlock_page(b, page_to_pfn(page))) {
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700675 list_add(&page->lru, &page_size->pages);
Xavier Deguillardf220a802015-08-06 15:17:58 -0700676 return -EIO;
677 }
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400678
679 /* deallocate page */
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700680 vmballoon_free_page(page, false);
681 STATS_INC(b->stats.free[false]);
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400682
683 /* update balloon size */
684 b->size--;
685
686 return 0;
687}
688
Xavier Deguillardf220a802015-08-06 15:17:58 -0700689static int vmballoon_unlock_batched_page(struct vmballoon *b,
Nadav Amit10a95d52018-09-20 10:30:07 -0700690 unsigned int num_pages, bool is_2m_pages)
Xavier Deguillardf220a802015-08-06 15:17:58 -0700691{
692 int locked, i, ret = 0;
693 bool hv_success;
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700694 u16 size_per_page = vmballoon_page_size(is_2m_pages);
Xavier Deguillardf220a802015-08-06 15:17:58 -0700695
Nadav Amit10a95d52018-09-20 10:30:07 -0700696 hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages);
697
Xavier Deguillardf220a802015-08-06 15:17:58 -0700698 if (!hv_success)
699 ret = -EIO;
700
701 for (i = 0; i < num_pages; i++) {
702 u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
703 struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700704 struct vmballoon_page_size *page_size =
705 &b->page_sizes[is_2m_pages];
Xavier Deguillardf220a802015-08-06 15:17:58 -0700706
707 locked = vmballoon_batch_get_status(b->batch_page, i);
708 if (!hv_success || locked != VMW_BALLOON_SUCCESS) {
709 /*
710 * That page wasn't successfully unlocked by the
711 * hypervisor, re-add it to the list of pages owned by
712 * the balloon driver.
713 */
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700714 list_add(&p->lru, &page_size->pages);
Xavier Deguillardf220a802015-08-06 15:17:58 -0700715 } else {
716 /* deallocate page */
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700717 vmballoon_free_page(p, is_2m_pages);
718 STATS_INC(b->stats.free[is_2m_pages]);
Xavier Deguillardf220a802015-08-06 15:17:58 -0700719
720 /* update balloon size */
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700721 b->size -= size_per_page;
Xavier Deguillardf220a802015-08-06 15:17:58 -0700722 }
723 }
724
725 return ret;
726}
727
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400728/*
729 * Release pages that were allocated while attempting to inflate the
730 * balloon but were refused by the host for one reason or another.
731 */
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700732static void vmballoon_release_refused_pages(struct vmballoon *b,
733 bool is_2m_pages)
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400734{
735 struct page *page, *next;
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700736 struct vmballoon_page_size *page_size =
737 &b->page_sizes[is_2m_pages];
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400738
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700739 list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400740 list_del(&page->lru);
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700741 vmballoon_free_page(page, is_2m_pages);
742 STATS_INC(b->stats.refused_free[is_2m_pages]);
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400743 }
Dmitry Torokhov55adaa42010-06-04 14:14:52 -0700744
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700745 page_size->n_refused_pages = 0;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400746}
747
Xavier Deguillardf220a802015-08-06 15:17:58 -0700748static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
749{
750 b->page = p;
751}
752
753static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
754 struct page *p)
755{
756 vmballoon_batch_set_pa(b->batch_page, idx,
757 (u64)page_to_pfn(p) << PAGE_SHIFT);
758}
759
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400760/*
761 * Inflate the balloon towards its target size. Note that we try to limit
762 * the rate of allocation to make sure we are not choking the rest of the
763 * system.
764 */
765static void vmballoon_inflate(struct vmballoon *b)
766{
Xavier Deguillardf220a802015-08-06 15:17:58 -0700767 unsigned int num_pages = 0;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400768 int error = 0;
Xavier Deguillardef0f8f12015-06-12 11:43:22 -0700769 gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700770 bool is_2m_pages;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400771
772 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
773
774 /*
775 * First try NOSLEEP page allocations to inflate balloon.
776 *
777 * If we do not throttle nosleep allocations, we can drain all
778 * free pages in the guest quickly (if the balloon target is high).
779 * As a side-effect, draining free pages helps to inform (force)
780 * the guest to start swapping if balloon target is not met yet,
781 * which is a desired behavior. However, balloon driver can consume
782 * all available CPU cycles if too many pages are allocated in a
783 * second. Therefore, we throttle nosleep allocations even when
784 * the guest is not under memory pressure. OTOH, if we have already
785 * predicted that the guest is under memory pressure, then we
786 * slowdown page allocations considerably.
787 */
788
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400789 /*
790 * Start with no sleep allocation rate which may be higher
791 * than sleeping allocation rate.
792 */
Nadav Amitec992cc2018-06-19 16:00:28 -0700793 is_2m_pages = b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400794
Nadav Amitec992cc2018-06-19 16:00:28 -0700795 pr_debug("%s - goal: %d", __func__, b->target - b->size);
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400796
Philip P. Moltmann33d268e2015-08-06 15:18:01 -0700797 while (!b->reset_required &&
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700798 b->size + num_pages * vmballoon_page_size(is_2m_pages)
799 < b->target) {
Xavier Deguillard4670de4d2015-08-06 15:17:59 -0700800 struct page *page;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400801
Xavier Deguillardef0f8f12015-06-12 11:43:22 -0700802 if (flags == VMW_PAGE_ALLOC_NOSLEEP)
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700803 STATS_INC(b->stats.alloc[is_2m_pages]);
Xavier Deguillardef0f8f12015-06-12 11:43:22 -0700804 else
805 STATS_INC(b->stats.sleep_alloc);
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400806
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700807 page = vmballoon_alloc_page(flags, is_2m_pages);
Xavier Deguillardef0f8f12015-06-12 11:43:22 -0700808 if (!page) {
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700809 STATS_INC(b->stats.alloc_fail[is_2m_pages]);
810
811 if (is_2m_pages) {
Nadav Amit10a95d52018-09-20 10:30:07 -0700812 b->ops->lock(b, num_pages, true);
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700813
814 /*
815 * ignore errors from locking as we now switch
816 * to 4k pages and we might get different
817 * errors.
818 */
819
820 num_pages = 0;
821 is_2m_pages = false;
822 continue;
823 }
824
Xavier Deguillardef0f8f12015-06-12 11:43:22 -0700825 if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400826 /*
827 * CANSLEEP page allocation failed, so guest
Nadav Amitec992cc2018-06-19 16:00:28 -0700828 * is under severe memory pressure. We just log
829 * the event, but do not stop the inflation
830 * due to its negative impact on performance.
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400831 */
Xavier Deguillardef0f8f12015-06-12 11:43:22 -0700832 STATS_INC(b->stats.sleep_alloc_fail);
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400833 break;
834 }
835
836 /*
837 * NOSLEEP page allocation failed, so the guest is
Nadav Amitec992cc2018-06-19 16:00:28 -0700838 * under memory pressure. Slowing down page alloctions
839 * seems to be reasonable, but doing so might actually
840 * cause the hypervisor to throttle us down, resulting
841 * in degraded performance. We will count on the
842 * scheduler and standard memory management mechanisms
843 * for now.
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400844 */
Xavier Deguillardef0f8f12015-06-12 11:43:22 -0700845 flags = VMW_PAGE_ALLOC_CANSLEEP;
Xavier Deguillardef0f8f12015-06-12 11:43:22 -0700846 continue;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400847 }
848
Xavier Deguillardf220a802015-08-06 15:17:58 -0700849 b->ops->add_page(b, num_pages++, page);
850 if (num_pages == b->batch_max_pages) {
Nadav Amit10a95d52018-09-20 10:30:07 -0700851 error = b->ops->lock(b, num_pages, is_2m_pages);
852
Xavier Deguillardf220a802015-08-06 15:17:58 -0700853 num_pages = 0;
854 if (error)
855 break;
856 }
Xavier Deguillardef0f8f12015-06-12 11:43:22 -0700857
Philip P. Moltmann33d268e2015-08-06 15:18:01 -0700858 cond_resched();
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400859 }
860
Xavier Deguillardf220a802015-08-06 15:17:58 -0700861 if (num_pages > 0)
Nadav Amit10a95d52018-09-20 10:30:07 -0700862 b->ops->lock(b, num_pages, is_2m_pages);
Xavier Deguillardf220a802015-08-06 15:17:58 -0700863
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700864 vmballoon_release_refused_pages(b, true);
865 vmballoon_release_refused_pages(b, false);
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400866}
867
868/*
869 * Decrease the size of the balloon allowing guest to use more memory.
870 */
871static void vmballoon_deflate(struct vmballoon *b)
872{
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700873 unsigned is_2m_pages;
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400874
Philip P. Moltmann33d268e2015-08-06 15:18:01 -0700875 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400876
877 /* free pages to reach target */
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700878 for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
879 is_2m_pages++) {
880 struct page *page, *next;
881 unsigned int num_pages = 0;
882 struct vmballoon_page_size *page_size =
883 &b->page_sizes[is_2m_pages];
Xavier Deguillardf220a802015-08-06 15:17:58 -0700884
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700885 list_for_each_entry_safe(page, next, &page_size->pages, lru) {
886 if (b->reset_required ||
887 (b->target > 0 &&
888 b->size - num_pages
889 * vmballoon_page_size(is_2m_pages)
890 < b->target + vmballoon_page_size(true)))
891 break;
Philip P. Moltmann33d268e2015-08-06 15:18:01 -0700892
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700893 list_del(&page->lru);
894 b->ops->add_page(b, num_pages++, page);
895
896 if (num_pages == b->batch_max_pages) {
897 int error;
898
899 error = b->ops->unlock(b, num_pages,
Nadav Amit10a95d52018-09-20 10:30:07 -0700900 is_2m_pages);
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700901 num_pages = 0;
902 if (error)
903 return;
904 }
905
906 cond_resched();
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400907 }
908
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -0700909 if (num_pages > 0)
Nadav Amit10a95d52018-09-20 10:30:07 -0700910 b->ops->unlock(b, num_pages, is_2m_pages);
Dmitry Torokhov453dc652010-04-23 13:18:08 -0400911 }
Xavier Deguillardf220a802015-08-06 15:17:58 -0700912}
913
914static const struct vmballoon_ops vmballoon_basic_ops = {
915 .add_page = vmballoon_add_page,
916 .lock = vmballoon_lock_page,
917 .unlock = vmballoon_unlock_page
918};
919
920static const struct vmballoon_ops vmballoon_batched_ops = {
921 .add_page = vmballoon_add_batched_page,
922 .lock = vmballoon_lock_batched_page,
923 .unlock = vmballoon_unlock_batched_page
924};
925
926static bool vmballoon_init_batching(struct vmballoon *b)
927{
Gil Kupferb23220f2018-06-01 00:47:47 -0700928 struct page *page;
929
930 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
931 if (!page)
Xavier Deguillardf220a802015-08-06 15:17:58 -0700932 return false;
933
Gil Kupferb23220f2018-06-01 00:47:47 -0700934 b->batch_page = page_address(page);
Xavier Deguillardf220a802015-08-06 15:17:58 -0700935 return true;
936}
937
938/*
Philip P. Moltmann48e3d662015-08-06 15:18:01 -0700939 * Receive notification and resize balloon
940 */
941static void vmballoon_doorbell(void *client_data)
942{
943 struct vmballoon *b = client_data;
944
945 STATS_INC(b->stats.doorbell);
946
947 mod_delayed_work(system_freezable_wq, &b->dwork, 0);
948}
949
950/*
951 * Clean up vmci doorbell
952 */
953static void vmballoon_vmci_cleanup(struct vmballoon *b)
954{
Nadav Amit10a95d52018-09-20 10:30:07 -0700955 vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
956 VMCI_INVALID_ID, VMCI_INVALID_ID);
Philip P. Moltmann48e3d662015-08-06 15:18:01 -0700957
Philip P. Moltmann48e3d662015-08-06 15:18:01 -0700958 STATS_INC(b->stats.doorbell_unset);
959
960 if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
961 vmci_doorbell_destroy(b->vmci_doorbell);
962 b->vmci_doorbell = VMCI_INVALID_HANDLE;
963 }
964}
965
966/*
967 * Initialize vmci doorbell, to get notified as soon as balloon changes
968 */
969static int vmballoon_vmci_init(struct vmballoon *b)
970{
Nadav Amit10a95d52018-09-20 10:30:07 -0700971 unsigned long error;
Philip P. Moltmann48e3d662015-08-06 15:18:01 -0700972
Nadav Amitce664332018-06-19 16:00:26 -0700973 if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0)
974 return 0;
Philip P. Moltmann48e3d662015-08-06 15:18:01 -0700975
Nadav Amitce664332018-06-19 16:00:26 -0700976 error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB,
977 VMCI_PRIVILEGE_FLAG_RESTRICTED,
978 vmballoon_doorbell, b);
Philip P. Moltmann48e3d662015-08-06 15:18:01 -0700979
Nadav Amitce664332018-06-19 16:00:26 -0700980 if (error != VMCI_SUCCESS)
981 goto fail;
Philip P. Moltmann48e3d662015-08-06 15:18:01 -0700982
Nadav Amit10a95d52018-09-20 10:30:07 -0700983 error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
984 b->vmci_doorbell.context,
985 b->vmci_doorbell.resource, NULL);
Nadav Amitce664332018-06-19 16:00:26 -0700986
987 STATS_INC(b->stats.doorbell_set);
988
989 if (error != VMW_BALLOON_SUCCESS)
990 goto fail;
Philip P. Moltmann48e3d662015-08-06 15:18:01 -0700991
992 return 0;
Nadav Amitce664332018-06-19 16:00:26 -0700993fail:
994 vmballoon_vmci_cleanup(b);
995 return -EIO;
Philip P. Moltmann48e3d662015-08-06 15:18:01 -0700996}
997
998/*
Xavier Deguillardf220a802015-08-06 15:17:58 -0700999 * Perform standard reset sequence by popping the balloon (in case it
1000 * is not empty) and then restarting protocol. This operation normally
1001 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
1002 */
1003static void vmballoon_reset(struct vmballoon *b)
1004{
Philip P. Moltmann48e3d662015-08-06 15:18:01 -07001005 int error;
1006
1007 vmballoon_vmci_cleanup(b);
1008
Xavier Deguillardf220a802015-08-06 15:17:58 -07001009 /* free all pages, skipping monitor unlock */
1010 vmballoon_pop(b);
1011
1012 if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
1013 return;
1014
1015 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
1016 b->ops = &vmballoon_batched_ops;
1017 b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
1018 if (!vmballoon_init_batching(b)) {
1019 /*
1020 * We failed to initialize batching, inform the monitor
1021 * about it by sending a null capability.
1022 *
1023 * The guest will retry in one second.
1024 */
1025 vmballoon_send_start(b, 0);
1026 return;
1027 }
1028 } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
1029 b->ops = &vmballoon_basic_ops;
1030 b->batch_max_pages = 1;
1031 }
1032
1033 b->reset_required = false;
Philip P. Moltmann48e3d662015-08-06 15:18:01 -07001034
1035 error = vmballoon_vmci_init(b);
1036 if (error)
1037 pr_err("failed to initialize vmci doorbell\n");
1038
Xavier Deguillardf220a802015-08-06 15:17:58 -07001039 if (!vmballoon_send_guest_id(b))
1040 pr_err("failed to send guest ID to the host\n");
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001041}
1042
1043/*
1044 * Balloon work function: reset protocol, if needed, get the new size and
1045 * adjust balloon as needed. Repeat in 1 sec.
1046 */
1047static void vmballoon_work(struct work_struct *work)
1048{
1049 struct delayed_work *dwork = to_delayed_work(work);
1050 struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001051
1052 STATS_INC(b->stats.timer);
1053
1054 if (b->reset_required)
1055 vmballoon_reset(b);
1056
Nadav Amit10a95d52018-09-20 10:30:07 -07001057 if (!b->reset_required && vmballoon_send_get_target(b)) {
1058 unsigned long target = b->target;
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001059
Nadav Amit10a95d52018-09-20 10:30:07 -07001060 /* update target, adjust size */
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001061 if (b->size < target)
1062 vmballoon_inflate(b);
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -07001063 else if (target == 0 ||
1064 b->size > target + vmballoon_page_size(true))
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001065 vmballoon_deflate(b);
1066 }
1067
Dmitry Torokhovbeda94d2011-07-26 16:08:56 -07001068 /*
1069 * We are using a freezable workqueue so that balloon operations are
1070 * stopped while the system transitions to/from sleep/hibernation.
1071 */
1072 queue_delayed_work(system_freezable_wq,
1073 dwork, round_jiffies_relative(HZ));
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001074}
1075
1076/*
1077 * DEBUGFS Interface
1078 */
1079#ifdef CONFIG_DEBUG_FS
1080
1081static int vmballoon_debug_show(struct seq_file *f, void *offset)
1082{
1083 struct vmballoon *b = f->private;
1084 struct vmballoon_stats *stats = &b->stats;
1085
Philip P. Moltmannb36e89d2015-08-06 15:18:00 -07001086 /* format capabilities info */
1087 seq_printf(f,
1088 "balloon capabilities: %#4x\n"
Philip P. Moltmannd7568c12015-08-06 15:18:01 -07001089 "used capabilities: %#4lx\n"
1090 "is resetting: %c\n",
1091 VMW_BALLOON_CAPABILITIES, b->capabilities,
1092 b->reset_required ? 'y' : 'n');
Philip P. Moltmannb36e89d2015-08-06 15:18:00 -07001093
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001094 /* format size info */
1095 seq_printf(f,
1096 "target: %8d pages\n"
1097 "current: %8d pages\n",
1098 b->target, b->size);
1099
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001100 seq_printf(f,
1101 "\n"
1102 "timer: %8u\n"
Philip P. Moltmann48e3d662015-08-06 15:18:01 -07001103 "doorbell: %8u\n"
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001104 "start: %8u (%4u failed)\n"
1105 "guestType: %8u (%4u failed)\n"
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -07001106 "2m-lock: %8u (%4u failed)\n"
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001107 "lock: %8u (%4u failed)\n"
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -07001108 "2m-unlock: %8u (%4u failed)\n"
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001109 "unlock: %8u (%4u failed)\n"
1110 "target: %8u (%4u failed)\n"
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -07001111 "prim2mAlloc: %8u (%4u failed)\n"
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001112 "primNoSleepAlloc: %8u (%4u failed)\n"
1113 "primCanSleepAlloc: %8u (%4u failed)\n"
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -07001114 "prim2mFree: %8u\n"
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001115 "primFree: %8u\n"
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -07001116 "err2mAlloc: %8u\n"
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001117 "errAlloc: %8u\n"
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -07001118 "err2mFree: %8u\n"
Philip P. Moltmann48e3d662015-08-06 15:18:01 -07001119 "errFree: %8u\n"
1120 "doorbellSet: %8u\n"
1121 "doorbellUnset: %8u\n",
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001122 stats->timer,
Philip P. Moltmann48e3d662015-08-06 15:18:01 -07001123 stats->doorbell,
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001124 stats->start, stats->start_fail,
1125 stats->guest_type, stats->guest_type_fail,
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -07001126 stats->lock[true], stats->lock_fail[true],
1127 stats->lock[false], stats->lock_fail[false],
1128 stats->unlock[true], stats->unlock_fail[true],
1129 stats->unlock[false], stats->unlock_fail[false],
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001130 stats->target, stats->target_fail,
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -07001131 stats->alloc[true], stats->alloc_fail[true],
1132 stats->alloc[false], stats->alloc_fail[false],
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001133 stats->sleep_alloc, stats->sleep_alloc_fail,
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -07001134 stats->free[true],
1135 stats->free[false],
1136 stats->refused_alloc[true], stats->refused_alloc[false],
Philip P. Moltmann48e3d662015-08-06 15:18:01 -07001137 stats->refused_free[true], stats->refused_free[false],
1138 stats->doorbell_set, stats->doorbell_unset);
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001139
1140 return 0;
1141}
1142
1143static int vmballoon_debug_open(struct inode *inode, struct file *file)
1144{
1145 return single_open(file, vmballoon_debug_show, inode->i_private);
1146}
1147
1148static const struct file_operations vmballoon_debug_fops = {
1149 .owner = THIS_MODULE,
1150 .open = vmballoon_debug_open,
1151 .read = seq_read,
1152 .llseek = seq_lseek,
1153 .release = single_release,
1154};
1155
1156static int __init vmballoon_debugfs_init(struct vmballoon *b)
1157{
1158 int error;
1159
1160 b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
1161 &vmballoon_debug_fops);
1162 if (IS_ERR(b->dbg_entry)) {
1163 error = PTR_ERR(b->dbg_entry);
1164 pr_err("failed to create debugfs entry, error: %d\n", error);
1165 return error;
1166 }
1167
1168 return 0;
1169}
1170
1171static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
1172{
1173 debugfs_remove(b->dbg_entry);
1174}
1175
1176#else
1177
1178static inline int vmballoon_debugfs_init(struct vmballoon *b)
1179{
1180 return 0;
1181}
1182
1183static inline void vmballoon_debugfs_exit(struct vmballoon *b)
1184{
1185}
1186
1187#endif /* CONFIG_DEBUG_FS */
1188
1189static int __init vmballoon_init(void)
1190{
1191 int error;
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -07001192 unsigned is_2m_pages;
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001193 /*
1194 * Check if we are running on VMware's hypervisor and bail out
1195 * if we are not.
1196 */
Juergen Gross03b2a322017-11-09 14:27:36 +01001197 if (x86_hyper_type != X86_HYPER_VMWARE)
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001198 return -ENODEV;
1199
Philip P. Moltmann365bd7e2015-08-06 15:18:01 -07001200 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
1201 is_2m_pages++) {
1202 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
1203 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
1204 }
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001205
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001206 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
1207
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001208 error = vmballoon_debugfs_init(&balloon);
1209 if (error)
Dmitry Torokhovbeda94d2011-07-26 16:08:56 -07001210 return error;
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001211
Philip P. Moltmann48e3d662015-08-06 15:18:01 -07001212 balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
Philip P. Moltmannd7568c12015-08-06 15:18:01 -07001213 balloon.batch_page = NULL;
1214 balloon.page = NULL;
1215 balloon.reset_required = true;
1216
Dmitry Torokhovbeda94d2011-07-26 16:08:56 -07001217 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001218
1219 return 0;
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001220}
Nadav Amitc3cc1b02018-06-19 16:00:27 -07001221
1222/*
1223 * Using late_initcall() instead of module_init() allows the balloon to use the
1224 * VMCI doorbell even when the balloon is built into the kernel. Otherwise the
1225 * VMCI is probed only after the balloon is initialized. If the balloon is used
1226 * as a module, late_initcall() is equivalent to module_init().
1227 */
1228late_initcall(vmballoon_init);
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001229
1230static void __exit vmballoon_exit(void)
1231{
Philip P. Moltmann48e3d662015-08-06 15:18:01 -07001232 vmballoon_vmci_cleanup(&balloon);
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001233 cancel_delayed_work_sync(&balloon.dwork);
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001234
1235 vmballoon_debugfs_exit(&balloon);
1236
1237 /*
1238 * Deallocate all reserved memory, and reset connection with monitor.
1239 * Reset connection before deallocating memory to avoid potential for
1240 * additional spurious resets from guest touching deallocated pages.
1241 */
Philip P. Moltmannd7568c12015-08-06 15:18:01 -07001242 vmballoon_send_start(&balloon, 0);
Dmitry Torokhov453dc652010-04-23 13:18:08 -04001243 vmballoon_pop(&balloon);
1244}
1245module_exit(vmballoon_exit);