1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved.
7  *
8  * SGI Altix topology and hardware performance monitoring API.
9  * Mark Goodwin <markgw@sgi.com>.
10  *
11  * Creates /proc/sgi_sn/sn_topology (read-only) to export
12  * info about Altix nodes, routers, CPUs and NumaLink
13  * interconnection/topology.
14  *
15  * Also creates a dynamic misc device named "sn_hwperf"
16  * that supports an ioctl interface to call down into SAL
17  * to discover hw objects, topology and to read/write
18  * memory mapped registers, e.g. for performance monitoring.
19  * The "sn_hwperf" device is registered only after the procfs
20  * file is first opened, i.e. only if/when it's needed.
21  *
22  * This API is used by SGI Performance Co-Pilot and other
23  * tools, see http://oss.sgi.com/projects/pcp
24  */
25 
26 #include <linux/fs.h>
27 #include <linux/slab.h>
28 #include <linux/vmalloc.h>
29 #include <linux/seq_file.h>
30 #include <linux/miscdevice.h>
31 #include <linux/utsname.h>
32 #include <linux/cpumask.h>
33 #include <linux/nodemask.h>
34 #include <linux/smp.h>
35 #include <linux/mutex.h>
36 
37 #include <asm/processor.h>
38 #include <asm/topology.h>
39 #include <asm/uaccess.h>
40 #include <asm/sal.h>
41 #include <asm/sn/io.h>
42 #include <asm/sn/sn_sal.h>
43 #include <asm/sn/module.h>
44 #include <asm/sn/geo.h>
45 #include <asm/sn/sn2/sn_hwperf.h>
46 #include <asm/sn/addrs.h>
47 
48 static void *sn_hwperf_salheap = NULL;
49 static int sn_hwperf_obj_cnt = 0;
50 static nasid_t sn_hwperf_master_nasid = INVALID_NASID;
51 static int sn_hwperf_init(void);
52 static DEFINE_MUTEX(sn_hwperf_init_mutex);
53 
54 #define cnode_possible(n)	((n) < num_cnodes)
55 
sn_hwperf_enum_objects(int * nobj,struct sn_hwperf_object_info ** ret)56 static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret)
57 {
58 	int e;
59 	u64 sz;
60 	struct sn_hwperf_object_info *objbuf = NULL;
61 
62 	if ((e = sn_hwperf_init()) < 0) {
63 		printk(KERN_ERR "sn_hwperf_init failed: err %d\n", e);
64 		goto out;
65 	}
66 
67 	sz = sn_hwperf_obj_cnt * sizeof(struct sn_hwperf_object_info);
68 	objbuf = vmalloc(sz);
69 	if (objbuf == NULL) {
70 		printk("sn_hwperf_enum_objects: vmalloc(%d) failed\n", (int)sz);
71 		e = -ENOMEM;
72 		goto out;
73 	}
74 
75 	e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, SN_HWPERF_ENUM_OBJECTS,
76 		0, sz, (u64) objbuf, 0, 0, NULL);
77 	if (e != SN_HWPERF_OP_OK) {
78 		e = -EINVAL;
79 		vfree(objbuf);
80 	}
81 
82 out:
83 	*nobj = sn_hwperf_obj_cnt;
84 	*ret = objbuf;
85 	return e;
86 }
87 
sn_hwperf_location_to_bpos(char * location,int * rack,int * bay,int * slot,int * slab)88 static int sn_hwperf_location_to_bpos(char *location,
89 	int *rack, int *bay, int *slot, int *slab)
90 {
91 	char type;
92 
93 	/* first scan for an old style geoid string */
94 	if (sscanf(location, "%03d%c%02d#%d",
95 		rack, &type, bay, slab) == 4)
96 		*slot = 0;
97 	else /* scan for a new bladed geoid string */
98 	if (sscanf(location, "%03d%c%02d^%02d#%d",
99 		rack, &type, bay, slot, slab) != 5)
100 		return -1;
101 	/* success */
102 	return 0;
103 }
104 
sn_hwperf_geoid_to_cnode(char * location)105 static int sn_hwperf_geoid_to_cnode(char *location)
106 {
107 	int cnode;
108 	geoid_t geoid;
109 	moduleid_t module_id;
110 	int rack, bay, slot, slab;
111 	int this_rack, this_bay, this_slot, this_slab;
112 
113 	if (sn_hwperf_location_to_bpos(location, &rack, &bay, &slot, &slab))
114 		return -1;
115 
116 	/*
117 	 * FIXME: replace with cleaner for_each_XXX macro which addresses
118 	 * both compute and IO nodes once ACPI3.0 is available.
119 	 */
120 	for (cnode = 0; cnode < num_cnodes; cnode++) {
121 		geoid = cnodeid_get_geoid(cnode);
122 		module_id = geo_module(geoid);
123 		this_rack = MODULE_GET_RACK(module_id);
124 		this_bay = MODULE_GET_BPOS(module_id);
125 		this_slot = geo_slot(geoid);
126 		this_slab = geo_slab(geoid);
127 		if (rack == this_rack && bay == this_bay &&
128 			slot == this_slot && slab == this_slab) {
129 			break;
130 		}
131 	}
132 
133 	return cnode_possible(cnode) ? cnode : -1;
134 }
135 
sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj)136 static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj)
137 {
138 	if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj))
139 		BUG();
140 	if (SN_HWPERF_FOREIGN(obj))
141 		return -1;
142 	return sn_hwperf_geoid_to_cnode(obj->location);
143 }
144 
sn_hwperf_generic_ordinal(struct sn_hwperf_object_info * obj,struct sn_hwperf_object_info * objs)145 static int sn_hwperf_generic_ordinal(struct sn_hwperf_object_info *obj,
146 				struct sn_hwperf_object_info *objs)
147 {
148 	int ordinal;
149 	struct sn_hwperf_object_info *p;
150 
151 	for (ordinal=0, p=objs; p != obj; p++) {
152 		if (SN_HWPERF_FOREIGN(p))
153 			continue;
154 		if (SN_HWPERF_SAME_OBJTYPE(p, obj))
155 			ordinal++;
156 	}
157 
158 	return ordinal;
159 }
160 
161 static const char *slabname_node =	"node"; /* SHub asic */
162 static const char *slabname_ionode =	"ionode"; /* TIO asic */
163 static const char *slabname_router =	"router"; /* NL3R or NL4R */
164 static const char *slabname_other =	"other"; /* unknown asic */
165 
sn_hwperf_get_slabname(struct sn_hwperf_object_info * obj,struct sn_hwperf_object_info * objs,int * ordinal)166 static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj,
167 			struct sn_hwperf_object_info *objs, int *ordinal)
168 {
169 	int isnode;
170 	const char *slabname = slabname_other;
171 
172 	if ((isnode = SN_HWPERF_IS_NODE(obj)) || SN_HWPERF_IS_IONODE(obj)) {
173 	    	slabname = isnode ? slabname_node : slabname_ionode;
174 		*ordinal = sn_hwperf_obj_to_cnode(obj);
175 	}
176 	else {
177 		*ordinal = sn_hwperf_generic_ordinal(obj, objs);
178 		if (SN_HWPERF_IS_ROUTER(obj))
179 			slabname = slabname_router;
180 	}
181 
182 	return slabname;
183 }
184 
print_pci_topology(struct seq_file * s)185 static void print_pci_topology(struct seq_file *s)
186 {
187 	char *p;
188 	size_t sz;
189 	int e;
190 
191 	for (sz = PAGE_SIZE; sz < 16 * PAGE_SIZE; sz += PAGE_SIZE) {
192 		if (!(p = kmalloc(sz, GFP_KERNEL)))
193 			break;
194 		e = ia64_sn_ioif_get_pci_topology(__pa(p), sz);
195 		if (e == SALRET_OK)
196 			seq_puts(s, p);
197 		kfree(p);
198 		if (e == SALRET_OK || e == SALRET_NOT_IMPLEMENTED)
199 			break;
200 	}
201 }
202 
sn_hwperf_has_cpus(cnodeid_t node)203 static inline int sn_hwperf_has_cpus(cnodeid_t node)
204 {
205 	return node < MAX_NUMNODES && node_online(node) && nr_cpus_node(node);
206 }
207 
sn_hwperf_has_mem(cnodeid_t node)208 static inline int sn_hwperf_has_mem(cnodeid_t node)
209 {
210 	return node < MAX_NUMNODES && node_online(node) && NODE_DATA(node)->node_present_pages;
211 }
212 
213 static struct sn_hwperf_object_info *
sn_hwperf_findobj_id(struct sn_hwperf_object_info * objbuf,int nobj,int id)214 sn_hwperf_findobj_id(struct sn_hwperf_object_info *objbuf,
215 	int nobj, int id)
216 {
217 	int i;
218 	struct sn_hwperf_object_info *p = objbuf;
219 
220 	for (i=0; i < nobj; i++, p++) {
221 		if (p->id == id)
222 			return p;
223 	}
224 
225 	return NULL;
226 
227 }
228 
sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info * objbuf,int nobj,cnodeid_t node,cnodeid_t * near_mem_node,cnodeid_t * near_cpu_node)229 static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objbuf,
230 	int nobj, cnodeid_t node, cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node)
231 {
232 	int e;
233 	struct sn_hwperf_object_info *nodeobj = NULL;
234 	struct sn_hwperf_object_info *op;
235 	struct sn_hwperf_object_info *dest;
236 	struct sn_hwperf_object_info *router;
237 	struct sn_hwperf_port_info ptdata[16];
238 	int sz, i, j;
239 	cnodeid_t c;
240 	int found_mem = 0;
241 	int found_cpu = 0;
242 
243 	if (!cnode_possible(node))
244 		return -EINVAL;
245 
246 	if (sn_hwperf_has_cpus(node)) {
247 		if (near_cpu_node)
248 			*near_cpu_node = node;
249 		found_cpu++;
250 	}
251 
252 	if (sn_hwperf_has_mem(node)) {
253 		if (near_mem_node)
254 			*near_mem_node = node;
255 		found_mem++;
256 	}
257 
258 	if (found_cpu && found_mem)
259 		return 0; /* trivially successful */
260 
261 	/* find the argument node object */
262 	for (i=0, op=objbuf; i < nobj; i++, op++) {
263 		if (!SN_HWPERF_IS_NODE(op) && !SN_HWPERF_IS_IONODE(op))
264 			continue;
265 		if (node == sn_hwperf_obj_to_cnode(op)) {
266 			nodeobj = op;
267 			break;
268 		}
269 	}
270 	if (!nodeobj) {
271 		e = -ENOENT;
272 		goto err;
273 	}
274 
275 	/* get it's interconnect topology */
276 	sz = op->ports * sizeof(struct sn_hwperf_port_info);
277 	BUG_ON(sz > sizeof(ptdata));
278 	e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
279 			      SN_HWPERF_ENUM_PORTS, nodeobj->id, sz,
280 			      (u64)&ptdata, 0, 0, NULL);
281 	if (e != SN_HWPERF_OP_OK) {
282 		e = -EINVAL;
283 		goto err;
284 	}
285 
286 	/* find nearest node with cpus and nearest memory */
287 	for (router=NULL, j=0; j < op->ports; j++) {
288 		dest = sn_hwperf_findobj_id(objbuf, nobj, ptdata[j].conn_id);
289 		if (dest && SN_HWPERF_IS_ROUTER(dest))
290 			router = dest;
291 		if (!dest || SN_HWPERF_FOREIGN(dest) ||
292 		    !SN_HWPERF_IS_NODE(dest) || SN_HWPERF_IS_IONODE(dest)) {
293 			continue;
294 		}
295 		c = sn_hwperf_obj_to_cnode(dest);
296 		if (!found_cpu && sn_hwperf_has_cpus(c)) {
297 			if (near_cpu_node)
298 				*near_cpu_node = c;
299 			found_cpu++;
300 		}
301 		if (!found_mem && sn_hwperf_has_mem(c)) {
302 			if (near_mem_node)
303 				*near_mem_node = c;
304 			found_mem++;
305 		}
306 	}
307 
308 	if (router && (!found_cpu || !found_mem)) {
309 		/* search for a node connected to the same router */
310 		sz = router->ports * sizeof(struct sn_hwperf_port_info);
311 		BUG_ON(sz > sizeof(ptdata));
312 		e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
313 				      SN_HWPERF_ENUM_PORTS, router->id, sz,
314 				      (u64)&ptdata, 0, 0, NULL);
315 		if (e != SN_HWPERF_OP_OK) {
316 			e = -EINVAL;
317 			goto err;
318 		}
319 		for (j=0; j < router->ports; j++) {
320 			dest = sn_hwperf_findobj_id(objbuf, nobj,
321 				ptdata[j].conn_id);
322 			if (!dest || dest->id == node ||
323 			    SN_HWPERF_FOREIGN(dest) ||
324 			    !SN_HWPERF_IS_NODE(dest) ||
325 			    SN_HWPERF_IS_IONODE(dest)) {
326 				continue;
327 			}
328 			c = sn_hwperf_obj_to_cnode(dest);
329 			if (!found_cpu && sn_hwperf_has_cpus(c)) {
330 				if (near_cpu_node)
331 					*near_cpu_node = c;
332 				found_cpu++;
333 			}
334 			if (!found_mem && sn_hwperf_has_mem(c)) {
335 				if (near_mem_node)
336 					*near_mem_node = c;
337 				found_mem++;
338 			}
339 			if (found_cpu && found_mem)
340 				break;
341 		}
342 	}
343 
344 	if (!found_cpu || !found_mem) {
345 		/* resort to _any_ node with CPUs and memory */
346 		for (i=0, op=objbuf; i < nobj; i++, op++) {
347 			if (SN_HWPERF_FOREIGN(op) ||
348 			    SN_HWPERF_IS_IONODE(op) ||
349 			    !SN_HWPERF_IS_NODE(op)) {
350 				continue;
351 			}
352 			c = sn_hwperf_obj_to_cnode(op);
353 			if (!found_cpu && sn_hwperf_has_cpus(c)) {
354 				if (near_cpu_node)
355 					*near_cpu_node = c;
356 				found_cpu++;
357 			}
358 			if (!found_mem && sn_hwperf_has_mem(c)) {
359 				if (near_mem_node)
360 					*near_mem_node = c;
361 				found_mem++;
362 			}
363 			if (found_cpu && found_mem)
364 				break;
365 		}
366 	}
367 
368 	if (!found_cpu || !found_mem)
369 		e = -ENODATA;
370 
371 err:
372 	return e;
373 }
374 
375 
sn_topology_show(struct seq_file * s,void * d)376 static int sn_topology_show(struct seq_file *s, void *d)
377 {
378 	int sz;
379 	int pt;
380 	int e = 0;
381 	int i;
382 	int j;
383 	const char *slabname;
384 	int ordinal;
385 	char slice;
386 	struct cpuinfo_ia64 *c;
387 	struct sn_hwperf_port_info *ptdata;
388 	struct sn_hwperf_object_info *p;
389 	struct sn_hwperf_object_info *obj = d;	/* this object */
390 	struct sn_hwperf_object_info *objs = s->private; /* all objects */
391 	u8 shubtype;
392 	u8 system_size;
393 	u8 sharing_size;
394 	u8 partid;
395 	u8 coher;
396 	u8 nasid_shift;
397 	u8 region_size;
398 	u16 nasid_mask;
399 	int nasid_msb;
400 
401 	if (obj == objs) {
402 		seq_printf(s, "# sn_topology version 2\n");
403 		seq_printf(s, "# objtype ordinal location partition"
404 			" [attribute value [, ...]]\n");
405 
406 		if (ia64_sn_get_sn_info(0,
407 			&shubtype, &nasid_mask, &nasid_shift, &system_size,
408 			&sharing_size, &partid, &coher, &region_size))
409 			BUG();
410 		for (nasid_msb=63; nasid_msb > 0; nasid_msb--) {
411 			if (((u64)nasid_mask << nasid_shift) & (1ULL << nasid_msb))
412 				break;
413 		}
414 		seq_printf(s, "partition %u %s local "
415 			"shubtype %s, "
416 			"nasid_mask 0x%016llx, "
417 			"nasid_bits %d:%d, "
418 			"system_size %d, "
419 			"sharing_size %d, "
420 			"coherency_domain %d, "
421 			"region_size %d\n",
422 
423 			partid, utsname()->nodename,
424 			shubtype ? "shub2" : "shub1",
425 			(u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift,
426 			system_size, sharing_size, coher, region_size);
427 
428 		print_pci_topology(s);
429 	}
430 
431 	if (SN_HWPERF_FOREIGN(obj)) {
432 		/* private in another partition: not interesting */
433 		return 0;
434 	}
435 
436 	for (i = 0; i < SN_HWPERF_MAXSTRING && obj->name[i]; i++) {
437 		if (obj->name[i] == ' ')
438 			obj->name[i] = '_';
439 	}
440 
441 	slabname = sn_hwperf_get_slabname(obj, objs, &ordinal);
442 	seq_printf(s, "%s %d %s %s asic %s", slabname, ordinal, obj->location,
443 		obj->sn_hwp_this_part ? "local" : "shared", obj->name);
444 
445 	if (ordinal < 0 || (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)))
446 		seq_putc(s, '\n');
447 	else {
448 		cnodeid_t near_mem = -1;
449 		cnodeid_t near_cpu = -1;
450 
451 		seq_printf(s, ", nasid 0x%x", cnodeid_to_nasid(ordinal));
452 
453 		if (sn_hwperf_get_nearest_node_objdata(objs, sn_hwperf_obj_cnt,
454 			ordinal, &near_mem, &near_cpu) == 0) {
455 			seq_printf(s, ", near_mem_nodeid %d, near_cpu_nodeid %d",
456 				near_mem, near_cpu);
457 		}
458 
459 		if (!SN_HWPERF_IS_IONODE(obj)) {
460 			for_each_online_node(i) {
461 				seq_printf(s, i ? ":%d" : ", dist %d",
462 					node_distance(ordinal, i));
463 			}
464 		}
465 
466 		seq_putc(s, '\n');
467 
468 		/*
469 		 * CPUs on this node, if any
470 		 */
471 		if (!SN_HWPERF_IS_IONODE(obj)) {
472 			for_each_cpu_and(i, cpu_online_mask,
473 					 cpumask_of_node(ordinal)) {
474 				slice = 'a' + cpuid_to_slice(i);
475 				c = cpu_data(i);
476 				seq_printf(s, "cpu %d %s%c local"
477 					   " freq %luMHz, arch ia64",
478 					   i, obj->location, slice,
479 					   c->proc_freq / 1000000);
480 				for_each_online_cpu(j) {
481 					seq_printf(s, j ? ":%d" : ", dist %d",
482 						   node_distance(
483 						    	cpu_to_node(i),
484 						    	cpu_to_node(j)));
485 				}
486 				seq_putc(s, '\n');
487 			}
488 		}
489 	}
490 
491 	if (obj->ports) {
492 		/*
493 		 * numalink ports
494 		 */
495 		sz = obj->ports * sizeof(struct sn_hwperf_port_info);
496 		if ((ptdata = kmalloc(sz, GFP_KERNEL)) == NULL)
497 			return -ENOMEM;
498 		e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
499 				      SN_HWPERF_ENUM_PORTS, obj->id, sz,
500 				      (u64) ptdata, 0, 0, NULL);
501 		if (e != SN_HWPERF_OP_OK)
502 			return -EINVAL;
503 		for (ordinal=0, p=objs; p != obj; p++) {
504 			if (!SN_HWPERF_FOREIGN(p))
505 				ordinal += p->ports;
506 		}
507 		for (pt = 0; pt < obj->ports; pt++) {
508 			for (p = objs, i = 0; i < sn_hwperf_obj_cnt; i++, p++) {
509 				if (ptdata[pt].conn_id == p->id) {
510 					break;
511 				}
512 			}
513 			seq_printf(s, "numalink %d %s-%d",
514 			    ordinal+pt, obj->location, ptdata[pt].port);
515 
516 			if (i >= sn_hwperf_obj_cnt) {
517 				/* no connection */
518 				seq_puts(s, " local endpoint disconnected"
519 					    ", protocol unknown\n");
520 				continue;
521 			}
522 
523 			if (obj->sn_hwp_this_part && p->sn_hwp_this_part)
524 				/* both ends local to this partition */
525 				seq_puts(s, " local");
526 			else if (SN_HWPERF_FOREIGN(p))
527 				/* both ends of the link in foreign partiton */
528 				seq_puts(s, " foreign");
529 			else
530 				/* link straddles a partition */
531 				seq_puts(s, " shared");
532 
533 			/*
534 			 * Unlikely, but strictly should query the LLP config
535 			 * registers because an NL4R can be configured to run
536 			 * NL3 protocol, even when not talking to an NL3 router.
537 			 * Ditto for node-node.
538 			 */
539 			seq_printf(s, " endpoint %s-%d, protocol %s\n",
540 				p->location, ptdata[pt].conn_port,
541 				(SN_HWPERF_IS_NL3ROUTER(obj) ||
542 				SN_HWPERF_IS_NL3ROUTER(p)) ?  "LLP3" : "LLP4");
543 		}
544 		kfree(ptdata);
545 	}
546 
547 	return 0;
548 }
549 
sn_topology_start(struct seq_file * s,loff_t * pos)550 static void *sn_topology_start(struct seq_file *s, loff_t * pos)
551 {
552 	struct sn_hwperf_object_info *objs = s->private;
553 
554 	if (*pos < sn_hwperf_obj_cnt)
555 		return (void *)(objs + *pos);
556 
557 	return NULL;
558 }
559 
sn_topology_next(struct seq_file * s,void * v,loff_t * pos)560 static void *sn_topology_next(struct seq_file *s, void *v, loff_t * pos)
561 {
562 	++*pos;
563 	return sn_topology_start(s, pos);
564 }
565 
sn_topology_stop(struct seq_file * m,void * v)566 static void sn_topology_stop(struct seq_file *m, void *v)
567 {
568 	return;
569 }
570 
571 /*
572  * /proc/sgi_sn/sn_topology, read-only using seq_file
573  */
574 static const struct seq_operations sn_topology_seq_ops = {
575 	.start = sn_topology_start,
576 	.next = sn_topology_next,
577 	.stop = sn_topology_stop,
578 	.show = sn_topology_show
579 };
580 
581 struct sn_hwperf_op_info {
582 	u64 op;
583 	struct sn_hwperf_ioctl_args *a;
584 	void *p;
585 	int *v0;
586 	int ret;
587 };
588 
sn_hwperf_call_sal(void * info)589 static void sn_hwperf_call_sal(void *info)
590 {
591 	struct sn_hwperf_op_info *op_info = info;
592 	int r;
593 
594 	r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op_info->op,
595 		      op_info->a->arg, op_info->a->sz,
596 		      (u64) op_info->p, 0, 0, op_info->v0);
597 	op_info->ret = r;
598 }
599 
sn_hwperf_op_cpu(struct sn_hwperf_op_info * op_info)600 static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info)
601 {
602 	u32 cpu;
603 	u32 use_ipi;
604 	int r = 0;
605 	cpumask_t save_allowed;
606 
607 	cpu = (op_info->a->arg & SN_HWPERF_ARG_CPU_MASK) >> 32;
608 	use_ipi = op_info->a->arg & SN_HWPERF_ARG_USE_IPI_MASK;
609 	op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK;
610 
611 	if (cpu != SN_HWPERF_ARG_ANY_CPU) {
612 		if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
613 			r = -EINVAL;
614 			goto out;
615 		}
616 	}
617 
618 	if (cpu == SN_HWPERF_ARG_ANY_CPU) {
619 		/* don't care which cpu */
620 		sn_hwperf_call_sal(op_info);
621 	} else if (cpu == get_cpu()) {
622 		/* already on correct cpu */
623 		sn_hwperf_call_sal(op_info);
624 		put_cpu();
625 	} else {
626 		put_cpu();
627 		if (use_ipi) {
628 			/* use an interprocessor interrupt to call SAL */
629 			smp_call_function_single(cpu, sn_hwperf_call_sal,
630 				op_info, 1);
631 		}
632 		else {
633 			/* migrate the task before calling SAL */
634 			save_allowed = current->cpus_allowed;
635 			set_cpus_allowed_ptr(current, cpumask_of(cpu));
636 			sn_hwperf_call_sal(op_info);
637 			set_cpus_allowed_ptr(current, &save_allowed);
638 		}
639 	}
640 	r = op_info->ret;
641 
642 out:
643 	return r;
644 }
645 
646 /* map SAL hwperf error code to system error code */
sn_hwperf_map_err(int hwperf_err)647 static int sn_hwperf_map_err(int hwperf_err)
648 {
649 	int e;
650 
651 	switch(hwperf_err) {
652 	case SN_HWPERF_OP_OK:
653 		e = 0;
654 		break;
655 
656 	case SN_HWPERF_OP_NOMEM:
657 		e = -ENOMEM;
658 		break;
659 
660 	case SN_HWPERF_OP_NO_PERM:
661 		e = -EPERM;
662 		break;
663 
664 	case SN_HWPERF_OP_IO_ERROR:
665 		e = -EIO;
666 		break;
667 
668 	case SN_HWPERF_OP_BUSY:
669 		e = -EBUSY;
670 		break;
671 
672 	case SN_HWPERF_OP_RECONFIGURE:
673 		e = -EAGAIN;
674 		break;
675 
676 	case SN_HWPERF_OP_INVAL:
677 	default:
678 		e = -EINVAL;
679 		break;
680 	}
681 
682 	return e;
683 }
684 
685 /*
686  * ioctl for "sn_hwperf" misc device
687  */
sn_hwperf_ioctl(struct file * fp,u32 op,unsigned long arg)688 static long sn_hwperf_ioctl(struct file *fp, u32 op, unsigned long arg)
689 {
690 	struct sn_hwperf_ioctl_args a;
691 	struct cpuinfo_ia64 *cdata;
692 	struct sn_hwperf_object_info *objs;
693 	struct sn_hwperf_object_info *cpuobj;
694 	struct sn_hwperf_op_info op_info;
695 	void *p = NULL;
696 	int nobj;
697 	char slice;
698 	int node;
699 	int r;
700 	int v0;
701 	int i;
702 	int j;
703 
704 	/* only user requests are allowed here */
705 	if ((op & SN_HWPERF_OP_MASK) < 10) {
706 		r = -EINVAL;
707 		goto error;
708 	}
709 	r = copy_from_user(&a, (const void __user *)arg,
710 		sizeof(struct sn_hwperf_ioctl_args));
711 	if (r != 0) {
712 		r = -EFAULT;
713 		goto error;
714 	}
715 
716 	/*
717 	 * Allocate memory to hold a kernel copy of the user buffer. The
718 	 * buffer contents are either copied in or out (or both) of user
719 	 * space depending on the flags encoded in the requested operation.
720 	 */
721 	if (a.ptr) {
722 		p = vmalloc(a.sz);
723 		if (!p) {
724 			r = -ENOMEM;
725 			goto error;
726 		}
727 	}
728 
729 	if (op & SN_HWPERF_OP_MEM_COPYIN) {
730 		r = copy_from_user(p, (const void __user *)a.ptr, a.sz);
731 		if (r != 0) {
732 			r = -EFAULT;
733 			goto error;
734 		}
735 	}
736 
737 	switch (op) {
738 	case SN_HWPERF_GET_CPU_INFO:
739 		if (a.sz == sizeof(u64)) {
740 			/* special case to get size needed */
741 			*(u64 *) p = (u64) num_online_cpus() *
742 				sizeof(struct sn_hwperf_object_info);
743 		} else
744 		if (a.sz < num_online_cpus() * sizeof(struct sn_hwperf_object_info)) {
745 			r = -ENOMEM;
746 			goto error;
747 		} else
748 		if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) {
749 			int cpuobj_index = 0;
750 
751 			memset(p, 0, a.sz);
752 			for (i = 0; i < nobj; i++) {
753 				if (!SN_HWPERF_IS_NODE(objs + i))
754 					continue;
755 				node = sn_hwperf_obj_to_cnode(objs + i);
756 				for_each_online_cpu(j) {
757 					if (node != cpu_to_node(j))
758 						continue;
759 					cpuobj = (struct sn_hwperf_object_info *) p + cpuobj_index++;
760 					slice = 'a' + cpuid_to_slice(j);
761 					cdata = cpu_data(j);
762 					cpuobj->id = j;
763 					snprintf(cpuobj->name,
764 						 sizeof(cpuobj->name),
765 						 "CPU %luMHz %s",
766 						 cdata->proc_freq / 1000000,
767 						 cdata->vendor);
768 					snprintf(cpuobj->location,
769 						 sizeof(cpuobj->location),
770 						 "%s%c", objs[i].location,
771 						 slice);
772 				}
773 			}
774 
775 			vfree(objs);
776 		}
777 		break;
778 
779 	case SN_HWPERF_GET_NODE_NASID:
780 		if (a.sz != sizeof(u64) ||
781 		   (node = a.arg) < 0 || !cnode_possible(node)) {
782 			r = -EINVAL;
783 			goto error;
784 		}
785 		*(u64 *)p = (u64)cnodeid_to_nasid(node);
786 		break;
787 
788 	case SN_HWPERF_GET_OBJ_NODE:
789 		i = a.arg;
790 		if (a.sz != sizeof(u64) || i < 0) {
791 			r = -EINVAL;
792 			goto error;
793 		}
794 		if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) {
795 			if (i >= nobj) {
796 				r = -EINVAL;
797 				vfree(objs);
798 				goto error;
799 			}
800 			if (objs[i].id != a.arg) {
801 				for (i = 0; i < nobj; i++) {
802 					if (objs[i].id == a.arg)
803 						break;
804 				}
805 			}
806 			if (i == nobj) {
807 				r = -EINVAL;
808 				vfree(objs);
809 				goto error;
810 			}
811 
812 			if (!SN_HWPERF_IS_NODE(objs + i) &&
813 			    !SN_HWPERF_IS_IONODE(objs + i)) {
814 			    	r = -ENOENT;
815 				vfree(objs);
816 				goto error;
817 			}
818 
819 			*(u64 *)p = (u64)sn_hwperf_obj_to_cnode(objs + i);
820 			vfree(objs);
821 		}
822 		break;
823 
824 	case SN_HWPERF_GET_MMRS:
825 	case SN_HWPERF_SET_MMRS:
826 	case SN_HWPERF_OBJECT_DISTANCE:
827 		op_info.p = p;
828 		op_info.a = &a;
829 		op_info.v0 = &v0;
830 		op_info.op = op;
831 		r = sn_hwperf_op_cpu(&op_info);
832 		if (r) {
833 			r = sn_hwperf_map_err(r);
834 			a.v0 = v0;
835 			goto error;
836 		}
837 		break;
838 
839 	default:
840 		/* all other ops are a direct SAL call */
841 		r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op,
842 			      a.arg, a.sz, (u64) p, 0, 0, &v0);
843 		if (r) {
844 			r = sn_hwperf_map_err(r);
845 			goto error;
846 		}
847 		a.v0 = v0;
848 		break;
849 	}
850 
851 	if (op & SN_HWPERF_OP_MEM_COPYOUT) {
852 		r = copy_to_user((void __user *)a.ptr, p, a.sz);
853 		if (r != 0) {
854 			r = -EFAULT;
855 			goto error;
856 		}
857 	}
858 
859 error:
860 	vfree(p);
861 
862 	return r;
863 }
864 
865 static const struct file_operations sn_hwperf_fops = {
866 	.unlocked_ioctl = sn_hwperf_ioctl,
867 	.llseek = noop_llseek,
868 };
869 
870 static struct miscdevice sn_hwperf_dev = {
871 	MISC_DYNAMIC_MINOR,
872 	"sn_hwperf",
873 	&sn_hwperf_fops
874 };
875 
sn_hwperf_init(void)876 static int sn_hwperf_init(void)
877 {
878 	u64 v;
879 	int salr;
880 	int e = 0;
881 
882 	/* single threaded, once-only initialization */
883 	mutex_lock(&sn_hwperf_init_mutex);
884 
885 	if (sn_hwperf_salheap) {
886 		mutex_unlock(&sn_hwperf_init_mutex);
887 		return e;
888 	}
889 
890 	/*
891 	 * The PROM code needs a fixed reference node. For convenience the
892 	 * same node as the console I/O is used.
893 	 */
894 	sn_hwperf_master_nasid = (nasid_t) ia64_sn_get_console_nasid();
895 
896 	/*
897 	 * Request the needed size and install the PROM scratch area.
898 	 * The PROM keeps various tracking bits in this memory area.
899 	 */
900 	salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
901 				 (u64) SN_HWPERF_GET_HEAPSIZE, 0,
902 				 (u64) sizeof(u64), (u64) &v, 0, 0, NULL);
903 	if (salr != SN_HWPERF_OP_OK) {
904 		e = -EINVAL;
905 		goto out;
906 	}
907 
908 	if ((sn_hwperf_salheap = vmalloc(v)) == NULL) {
909 		e = -ENOMEM;
910 		goto out;
911 	}
912 	salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
913 				 SN_HWPERF_INSTALL_HEAP, 0, v,
914 				 (u64) sn_hwperf_salheap, 0, 0, NULL);
915 	if (salr != SN_HWPERF_OP_OK) {
916 		e = -EINVAL;
917 		goto out;
918 	}
919 
920 	salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
921 				 SN_HWPERF_OBJECT_COUNT, 0,
922 				 sizeof(u64), (u64) &v, 0, 0, NULL);
923 	if (salr != SN_HWPERF_OP_OK) {
924 		e = -EINVAL;
925 		goto out;
926 	}
927 	sn_hwperf_obj_cnt = (int)v;
928 
929 out:
930 	if (e < 0 && sn_hwperf_salheap) {
931 		vfree(sn_hwperf_salheap);
932 		sn_hwperf_salheap = NULL;
933 		sn_hwperf_obj_cnt = 0;
934 	}
935 	mutex_unlock(&sn_hwperf_init_mutex);
936 	return e;
937 }
938 
sn_topology_open(struct inode * inode,struct file * file)939 int sn_topology_open(struct inode *inode, struct file *file)
940 {
941 	int e;
942 	struct seq_file *seq;
943 	struct sn_hwperf_object_info *objbuf;
944 	int nobj;
945 
946 	if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) {
947 		e = seq_open(file, &sn_topology_seq_ops);
948 		seq = file->private_data;
949 		seq->private = objbuf;
950 	}
951 
952 	return e;
953 }
954 
sn_topology_release(struct inode * inode,struct file * file)955 int sn_topology_release(struct inode *inode, struct file *file)
956 {
957 	struct seq_file *seq = file->private_data;
958 
959 	vfree(seq->private);
960 	return seq_release(inode, file);
961 }
962 
sn_hwperf_get_nearest_node(cnodeid_t node,cnodeid_t * near_mem_node,cnodeid_t * near_cpu_node)963 int sn_hwperf_get_nearest_node(cnodeid_t node,
964 	cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node)
965 {
966 	int e;
967 	int nobj;
968 	struct sn_hwperf_object_info *objbuf;
969 
970 	if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) {
971 		e = sn_hwperf_get_nearest_node_objdata(objbuf, nobj,
972 			node, near_mem_node, near_cpu_node);
973 		vfree(objbuf);
974 	}
975 
976 	return e;
977 }
978 
sn_hwperf_misc_register_init(void)979 static int __devinit sn_hwperf_misc_register_init(void)
980 {
981 	int e;
982 
983 	if (!ia64_platform_is("sn2"))
984 		return 0;
985 
986 	sn_hwperf_init();
987 
988 	/*
989 	 * Register a dynamic misc device for hwperf ioctls. Platforms
990 	 * supporting hotplug will create /dev/sn_hwperf, else user
991 	 * can to look up the minor number in /proc/misc.
992 	 */
993 	if ((e = misc_register(&sn_hwperf_dev)) != 0) {
994 		printk(KERN_ERR "sn_hwperf_misc_register_init: failed to "
995 		"register misc device for \"%s\"\n", sn_hwperf_dev.name);
996 	}
997 
998 	return e;
999 }
1000 
1001 device_initcall(sn_hwperf_misc_register_init); /* after misc_init() */
1002 EXPORT_SYMBOL(sn_hwperf_get_nearest_node);
1003