1 /*
2  * Xen implementation for transcendent memory (tmem)
3  *
4  * Copyright (C) 2009-2011 Oracle Corp.  All rights reserved.
5  * Author: Dan Magenheimer
6  */
7 
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/init.h>
11 #include <linux/pagemap.h>
12 #include <linux/module.h>
13 #include <linux/cleancache.h>
14 
15 /* temporary ifdef until include/linux/frontswap.h is upstream */
16 #ifdef CONFIG_FRONTSWAP
17 #include <linux/frontswap.h>
18 #endif
19 
20 #include <xen/xen.h>
21 #include <xen/interface/xen.h>
22 #include <asm/xen/hypercall.h>
23 #include <asm/xen/page.h>
24 #include <asm/xen/hypervisor.h>
25 
26 #define TMEM_CONTROL               0
27 #define TMEM_NEW_POOL              1
28 #define TMEM_DESTROY_POOL          2
29 #define TMEM_NEW_PAGE              3
30 #define TMEM_PUT_PAGE              4
31 #define TMEM_GET_PAGE              5
32 #define TMEM_FLUSH_PAGE            6
33 #define TMEM_FLUSH_OBJECT          7
34 #define TMEM_READ                  8
35 #define TMEM_WRITE                 9
36 #define TMEM_XCHG                 10
37 
38 /* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */
39 #define TMEM_POOL_PERSIST          1
40 #define TMEM_POOL_SHARED           2
41 #define TMEM_POOL_PAGESIZE_SHIFT   4
42 #define TMEM_VERSION_SHIFT        24
43 
44 
45 struct tmem_pool_uuid {
46 	u64 uuid_lo;
47 	u64 uuid_hi;
48 };
49 
50 struct tmem_oid {
51 	u64 oid[3];
52 };
53 
54 #define TMEM_POOL_PRIVATE_UUID	{ 0, 0 }
55 
56 /* flags for tmem_ops.new_pool */
57 #define TMEM_POOL_PERSIST          1
58 #define TMEM_POOL_SHARED           2
59 
60 /* xen tmem foundation ops/hypercalls */
61 
xen_tmem_op(u32 tmem_cmd,u32 tmem_pool,struct tmem_oid oid,u32 index,unsigned long gmfn,u32 tmem_offset,u32 pfn_offset,u32 len)62 static inline int xen_tmem_op(u32 tmem_cmd, u32 tmem_pool, struct tmem_oid oid,
63 	u32 index, unsigned long gmfn, u32 tmem_offset, u32 pfn_offset, u32 len)
64 {
65 	struct tmem_op op;
66 	int rc = 0;
67 
68 	op.cmd = tmem_cmd;
69 	op.pool_id = tmem_pool;
70 	op.u.gen.oid[0] = oid.oid[0];
71 	op.u.gen.oid[1] = oid.oid[1];
72 	op.u.gen.oid[2] = oid.oid[2];
73 	op.u.gen.index = index;
74 	op.u.gen.tmem_offset = tmem_offset;
75 	op.u.gen.pfn_offset = pfn_offset;
76 	op.u.gen.len = len;
77 	set_xen_guest_handle(op.u.gen.gmfn, (void *)gmfn);
78 	rc = HYPERVISOR_tmem_op(&op);
79 	return rc;
80 }
81 
xen_tmem_new_pool(struct tmem_pool_uuid uuid,u32 flags,unsigned long pagesize)82 static int xen_tmem_new_pool(struct tmem_pool_uuid uuid,
83 				u32 flags, unsigned long pagesize)
84 {
85 	struct tmem_op op;
86 	int rc = 0, pageshift;
87 
88 	for (pageshift = 0; pagesize != 1; pageshift++)
89 		pagesize >>= 1;
90 	flags |= (pageshift - 12) << TMEM_POOL_PAGESIZE_SHIFT;
91 	flags |= TMEM_SPEC_VERSION << TMEM_VERSION_SHIFT;
92 	op.cmd = TMEM_NEW_POOL;
93 	op.u.new.uuid[0] = uuid.uuid_lo;
94 	op.u.new.uuid[1] = uuid.uuid_hi;
95 	op.u.new.flags = flags;
96 	rc = HYPERVISOR_tmem_op(&op);
97 	return rc;
98 }
99 
100 /* xen generic tmem ops */
101 
xen_tmem_put_page(u32 pool_id,struct tmem_oid oid,u32 index,unsigned long pfn)102 static int xen_tmem_put_page(u32 pool_id, struct tmem_oid oid,
103 			     u32 index, unsigned long pfn)
104 {
105 	unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn;
106 
107 	return xen_tmem_op(TMEM_PUT_PAGE, pool_id, oid, index,
108 		gmfn, 0, 0, 0);
109 }
110 
xen_tmem_get_page(u32 pool_id,struct tmem_oid oid,u32 index,unsigned long pfn)111 static int xen_tmem_get_page(u32 pool_id, struct tmem_oid oid,
112 			     u32 index, unsigned long pfn)
113 {
114 	unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn;
115 
116 	return xen_tmem_op(TMEM_GET_PAGE, pool_id, oid, index,
117 		gmfn, 0, 0, 0);
118 }
119 
xen_tmem_flush_page(u32 pool_id,struct tmem_oid oid,u32 index)120 static int xen_tmem_flush_page(u32 pool_id, struct tmem_oid oid, u32 index)
121 {
122 	return xen_tmem_op(TMEM_FLUSH_PAGE, pool_id, oid, index,
123 		0, 0, 0, 0);
124 }
125 
xen_tmem_flush_object(u32 pool_id,struct tmem_oid oid)126 static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid)
127 {
128 	return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0);
129 }
130 
131 int tmem_enabled __read_mostly;
132 EXPORT_SYMBOL(tmem_enabled);
133 
enable_tmem(char * s)134 static int __init enable_tmem(char *s)
135 {
136 	tmem_enabled = 1;
137 	return 1;
138 }
139 
140 __setup("tmem", enable_tmem);
141 
142 #ifdef CONFIG_CLEANCACHE
xen_tmem_destroy_pool(u32 pool_id)143 static int xen_tmem_destroy_pool(u32 pool_id)
144 {
145 	struct tmem_oid oid = { { 0 } };
146 
147 	return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
148 }
149 
150 /* cleancache ops */
151 
tmem_cleancache_put_page(int pool,struct cleancache_filekey key,pgoff_t index,struct page * page)152 static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key,
153 				     pgoff_t index, struct page *page)
154 {
155 	u32 ind = (u32) index;
156 	struct tmem_oid oid = *(struct tmem_oid *)&key;
157 	unsigned long pfn = page_to_pfn(page);
158 
159 	if (pool < 0)
160 		return;
161 	if (ind != index)
162 		return;
163 	mb(); /* ensure page is quiescent; tmem may address it with an alias */
164 	(void)xen_tmem_put_page((u32)pool, oid, ind, pfn);
165 }
166 
tmem_cleancache_get_page(int pool,struct cleancache_filekey key,pgoff_t index,struct page * page)167 static int tmem_cleancache_get_page(int pool, struct cleancache_filekey key,
168 				    pgoff_t index, struct page *page)
169 {
170 	u32 ind = (u32) index;
171 	struct tmem_oid oid = *(struct tmem_oid *)&key;
172 	unsigned long pfn = page_to_pfn(page);
173 	int ret;
174 
175 	/* translate return values to linux semantics */
176 	if (pool < 0)
177 		return -1;
178 	if (ind != index)
179 		return -1;
180 	ret = xen_tmem_get_page((u32)pool, oid, ind, pfn);
181 	if (ret == 1)
182 		return 0;
183 	else
184 		return -1;
185 }
186 
tmem_cleancache_flush_page(int pool,struct cleancache_filekey key,pgoff_t index)187 static void tmem_cleancache_flush_page(int pool, struct cleancache_filekey key,
188 				       pgoff_t index)
189 {
190 	u32 ind = (u32) index;
191 	struct tmem_oid oid = *(struct tmem_oid *)&key;
192 
193 	if (pool < 0)
194 		return;
195 	if (ind != index)
196 		return;
197 	(void)xen_tmem_flush_page((u32)pool, oid, ind);
198 }
199 
tmem_cleancache_flush_inode(int pool,struct cleancache_filekey key)200 static void tmem_cleancache_flush_inode(int pool, struct cleancache_filekey key)
201 {
202 	struct tmem_oid oid = *(struct tmem_oid *)&key;
203 
204 	if (pool < 0)
205 		return;
206 	(void)xen_tmem_flush_object((u32)pool, oid);
207 }
208 
tmem_cleancache_flush_fs(int pool)209 static void tmem_cleancache_flush_fs(int pool)
210 {
211 	if (pool < 0)
212 		return;
213 	(void)xen_tmem_destroy_pool((u32)pool);
214 }
215 
tmem_cleancache_init_fs(size_t pagesize)216 static int tmem_cleancache_init_fs(size_t pagesize)
217 {
218 	struct tmem_pool_uuid uuid_private = TMEM_POOL_PRIVATE_UUID;
219 
220 	return xen_tmem_new_pool(uuid_private, 0, pagesize);
221 }
222 
tmem_cleancache_init_shared_fs(char * uuid,size_t pagesize)223 static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize)
224 {
225 	struct tmem_pool_uuid shared_uuid;
226 
227 	shared_uuid.uuid_lo = *(u64 *)uuid;
228 	shared_uuid.uuid_hi = *(u64 *)(&uuid[8]);
229 	return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
230 }
231 
232 static int use_cleancache = 1;
233 
no_cleancache(char * s)234 static int __init no_cleancache(char *s)
235 {
236 	use_cleancache = 0;
237 	return 1;
238 }
239 
240 __setup("nocleancache", no_cleancache);
241 
242 static struct cleancache_ops tmem_cleancache_ops = {
243 	.put_page = tmem_cleancache_put_page,
244 	.get_page = tmem_cleancache_get_page,
245 	.flush_page = tmem_cleancache_flush_page,
246 	.flush_inode = tmem_cleancache_flush_inode,
247 	.flush_fs = tmem_cleancache_flush_fs,
248 	.init_shared_fs = tmem_cleancache_init_shared_fs,
249 	.init_fs = tmem_cleancache_init_fs
250 };
251 #endif
252 
253 #ifdef CONFIG_FRONTSWAP
254 /* frontswap tmem operations */
255 
256 /* a single tmem poolid is used for all frontswap "types" (swapfiles) */
257 static int tmem_frontswap_poolid;
258 
259 /*
260  * Swizzling increases objects per swaptype, increasing tmem concurrency
261  * for heavy swaploads.  Later, larger nr_cpus -> larger SWIZ_BITS
262  */
263 #define SWIZ_BITS		4
264 #define SWIZ_MASK		((1 << SWIZ_BITS) - 1)
265 #define _oswiz(_type, _ind)	((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
266 #define iswiz(_ind)		(_ind >> SWIZ_BITS)
267 
oswiz(unsigned type,u32 ind)268 static inline struct tmem_oid oswiz(unsigned type, u32 ind)
269 {
270 	struct tmem_oid oid = { .oid = { 0 } };
271 	oid.oid[0] = _oswiz(type, ind);
272 	return oid;
273 }
274 
275 /* returns 0 if the page was successfully put into frontswap, -1 if not */
tmem_frontswap_put_page(unsigned type,pgoff_t offset,struct page * page)276 static int tmem_frontswap_put_page(unsigned type, pgoff_t offset,
277 				   struct page *page)
278 {
279 	u64 ind64 = (u64)offset;
280 	u32 ind = (u32)offset;
281 	unsigned long pfn = page_to_pfn(page);
282 	int pool = tmem_frontswap_poolid;
283 	int ret;
284 
285 	if (pool < 0)
286 		return -1;
287 	if (ind64 != ind)
288 		return -1;
289 	mb(); /* ensure page is quiescent; tmem may address it with an alias */
290 	ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn);
291 	/* translate Xen tmem return values to linux semantics */
292 	if (ret == 1)
293 		return 0;
294 	else
295 		return -1;
296 }
297 
298 /*
299  * returns 0 if the page was successfully gotten from frontswap, -1 if
300  * was not present (should never happen!)
301  */
tmem_frontswap_get_page(unsigned type,pgoff_t offset,struct page * page)302 static int tmem_frontswap_get_page(unsigned type, pgoff_t offset,
303 				   struct page *page)
304 {
305 	u64 ind64 = (u64)offset;
306 	u32 ind = (u32)offset;
307 	unsigned long pfn = page_to_pfn(page);
308 	int pool = tmem_frontswap_poolid;
309 	int ret;
310 
311 	if (pool < 0)
312 		return -1;
313 	if (ind64 != ind)
314 		return -1;
315 	ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn);
316 	/* translate Xen tmem return values to linux semantics */
317 	if (ret == 1)
318 		return 0;
319 	else
320 		return -1;
321 }
322 
323 /* flush a single page from frontswap */
tmem_frontswap_flush_page(unsigned type,pgoff_t offset)324 static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset)
325 {
326 	u64 ind64 = (u64)offset;
327 	u32 ind = (u32)offset;
328 	int pool = tmem_frontswap_poolid;
329 
330 	if (pool < 0)
331 		return;
332 	if (ind64 != ind)
333 		return;
334 	(void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind));
335 }
336 
337 /* flush all pages from the passed swaptype */
tmem_frontswap_flush_area(unsigned type)338 static void tmem_frontswap_flush_area(unsigned type)
339 {
340 	int pool = tmem_frontswap_poolid;
341 	int ind;
342 
343 	if (pool < 0)
344 		return;
345 	for (ind = SWIZ_MASK; ind >= 0; ind--)
346 		(void)xen_tmem_flush_object(pool, oswiz(type, ind));
347 }
348 
tmem_frontswap_init(unsigned ignored)349 static void tmem_frontswap_init(unsigned ignored)
350 {
351 	struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID;
352 
353 	/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
354 	if (tmem_frontswap_poolid < 0)
355 		tmem_frontswap_poolid =
356 		    xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE);
357 }
358 
359 static int __initdata use_frontswap = 1;
360 
no_frontswap(char * s)361 static int __init no_frontswap(char *s)
362 {
363 	use_frontswap = 0;
364 	return 1;
365 }
366 
367 __setup("nofrontswap", no_frontswap);
368 
369 static struct frontswap_ops tmem_frontswap_ops = {
370 	.put_page = tmem_frontswap_put_page,
371 	.get_page = tmem_frontswap_get_page,
372 	.flush_page = tmem_frontswap_flush_page,
373 	.flush_area = tmem_frontswap_flush_area,
374 	.init = tmem_frontswap_init
375 };
376 #endif
377 
xen_tmem_init(void)378 static int __init xen_tmem_init(void)
379 {
380 	if (!xen_domain())
381 		return 0;
382 #ifdef CONFIG_FRONTSWAP
383 	if (tmem_enabled && use_frontswap) {
384 		char *s = "";
385 		struct frontswap_ops old_ops =
386 			frontswap_register_ops(&tmem_frontswap_ops);
387 
388 		tmem_frontswap_poolid = -1;
389 		if (old_ops.init != NULL)
390 			s = " (WARNING: frontswap_ops overridden)";
391 		printk(KERN_INFO "frontswap enabled, RAM provided by "
392 				 "Xen Transcendent Memory\n");
393 	}
394 #endif
395 #ifdef CONFIG_CLEANCACHE
396 	BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
397 	if (tmem_enabled && use_cleancache) {
398 		char *s = "";
399 		struct cleancache_ops old_ops =
400 			cleancache_register_ops(&tmem_cleancache_ops);
401 		if (old_ops.init_fs != NULL)
402 			s = " (WARNING: cleancache_ops overridden)";
403 		printk(KERN_INFO "cleancache enabled, RAM provided by "
404 				 "Xen Transcendent Memory%s\n", s);
405 	}
406 #endif
407 	return 0;
408 }
409 
410 module_init(xen_tmem_init)
411