1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 *
22 * Portions Copyright 2012,2013 Justin Hibbits <jhibbits@freebsd.org>
23 */
24 /*
25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
27 */
28 #include <sys/cdefs.h>
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/dtrace_impl.h>
33 #include <sys/kernel.h>
34 #include <sys/stack.h>
35 #include <sys/sysent.h>
36 #include <sys/pcpu.h>
37
38 #include <machine/frame.h>
39 #include <machine/md_var.h>
40 #include <machine/psl.h>
41 #include <machine/reg.h>
42 #include <machine/stack.h>
43
44 #include <vm/vm.h>
45 #include <vm/vm_param.h>
46 #include <vm/pmap.h>
47
48 #include "regset.h"
49
50 /* Offset to the LR Save word (ppc32) */
51 #define RETURN_OFFSET 4
52 /* Offset to LR Save word (ppc64). CR Save area sits between back chain and LR */
53 #define RETURN_OFFSET64 16
54
55 #ifdef __powerpc64__
56 #define OFFSET 4 /* Account for the TOC reload slot */
57 #define FRAME_OFFSET 48
58 #else
59 #define OFFSET 0
60 #define FRAME_OFFSET 8
61 #endif
62
63 #define INKERNEL(x) (((x) <= VM_MAX_KERNEL_ADDRESS && \
64 (x) >= VM_MIN_KERNEL_ADDRESS) || \
65 (PMAP_HAS_DMAP && (x) >= DMAP_BASE_ADDRESS && \
66 (x) <= DMAP_MAX_ADDRESS))
67
68 static __inline int
dtrace_sp_inkernel(uintptr_t sp)69 dtrace_sp_inkernel(uintptr_t sp)
70 {
71 struct trapframe *frame;
72 vm_offset_t callpc;
73
74 /* Not within the kernel, or not aligned. */
75 if (!INKERNEL(sp) || (sp & 0xf) != 0)
76 return (0);
77 #ifdef __powerpc64__
78 callpc = *(vm_offset_t *)(sp + RETURN_OFFSET64);
79 #else
80 callpc = *(vm_offset_t *)(sp + RETURN_OFFSET);
81 #endif
82 if ((callpc & 3) || (callpc < 0x100))
83 return (0);
84
85 /*
86 * trapexit() and asttrapexit() are sentinels
87 * for kernel stack tracing.
88 */
89 if (callpc + OFFSET == (vm_offset_t) &trapexit ||
90 callpc + OFFSET == (vm_offset_t) &asttrapexit) {
91 frame = (struct trapframe *)(sp + FRAME_OFFSET);
92
93 return ((frame->srr1 & PSL_PR) == 0);
94 }
95
96 return (1);
97 }
98
99 static __inline void
dtrace_next_sp_pc(uintptr_t sp,uintptr_t * nsp,uintptr_t * pc,uintptr_t * lr)100 dtrace_next_sp_pc(uintptr_t sp, uintptr_t *nsp, uintptr_t *pc, uintptr_t *lr)
101 {
102 vm_offset_t callpc;
103 struct trapframe *frame;
104
105 if (lr != 0 && *lr != 0)
106 callpc = *lr;
107 else
108 #ifdef __powerpc64__
109 callpc = *(vm_offset_t *)(sp + RETURN_OFFSET64);
110 #else
111 callpc = *(vm_offset_t *)(sp + RETURN_OFFSET);
112 #endif
113
114 /*
115 * trapexit() and asttrapexit() are sentinels
116 * for kernel stack tracing.
117 */
118 if ((callpc + OFFSET == (vm_offset_t) &trapexit ||
119 callpc + OFFSET == (vm_offset_t) &asttrapexit)) {
120 /* Access the trap frame */
121 frame = (struct trapframe *)(sp + FRAME_OFFSET);
122
123 if (nsp != NULL)
124 *nsp = frame->fixreg[1];
125 if (pc != NULL)
126 *pc = frame->srr0;
127 if (lr != NULL)
128 *lr = frame->lr;
129 return;
130 }
131
132 if (nsp != NULL)
133 *nsp = *(uintptr_t *)sp;
134 if (pc != NULL)
135 *pc = callpc;
136 /* lr is only valid for trap frames */
137 if (lr != NULL)
138 *lr = 0;
139 }
140
141 void
dtrace_getpcstack(pc_t * pcstack,int pcstack_limit,int aframes,uint32_t * intrpc)142 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
143 uint32_t *intrpc)
144 {
145 int depth = 0;
146 uintptr_t osp, sp, lr = 0;
147 vm_offset_t callpc;
148 pc_t caller = (pc_t) solaris_cpu[curcpu].cpu_dtrace_caller;
149
150 osp = PAGE_SIZE;
151 if (intrpc != 0)
152 pcstack[depth++] = (pc_t) intrpc;
153
154 aframes++;
155
156 sp = (uintptr_t)__builtin_frame_address(0);
157
158 while (depth < pcstack_limit) {
159 if (sp <= osp)
160 break;
161
162 if (!dtrace_sp_inkernel(sp))
163 break;
164 osp = sp;
165 dtrace_next_sp_pc(osp, &sp, &callpc, &lr);
166
167 if (aframes > 0) {
168 aframes--;
169 if ((aframes == 0) && (caller != 0)) {
170 pcstack[depth++] = caller;
171 }
172 }
173 else {
174 pcstack[depth++] = callpc;
175 }
176 }
177
178 for (; depth < pcstack_limit; depth++) {
179 pcstack[depth] = 0;
180 }
181 }
182
183 static int
dtrace_getustack_common(uint64_t * pcstack,int pcstack_limit,uintptr_t pc,uintptr_t sp)184 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
185 uintptr_t sp)
186 {
187 proc_t *p = curproc;
188 int ret = 0;
189
190 ASSERT(pcstack == NULL || pcstack_limit > 0);
191
192 while (pc != 0) {
193 ret++;
194 if (pcstack != NULL) {
195 *pcstack++ = (uint64_t)pc;
196 pcstack_limit--;
197 if (pcstack_limit <= 0)
198 break;
199 }
200
201 if (sp == 0)
202 break;
203
204 if (SV_PROC_FLAG(p, SV_ILP32)) {
205 pc = dtrace_fuword32((void *)(sp + RETURN_OFFSET));
206 sp = dtrace_fuword32((void *)sp);
207 }
208 else {
209 pc = dtrace_fuword64((void *)(sp + RETURN_OFFSET64));
210 sp = dtrace_fuword64((void *)sp);
211 }
212 }
213
214 return (ret);
215 }
216
217 void
dtrace_getupcstack(uint64_t * pcstack,int pcstack_limit)218 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
219 {
220 proc_t *p = curproc;
221 struct trapframe *tf;
222 uintptr_t pc, sp;
223 volatile uint16_t *flags =
224 (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
225 int n;
226
227 if (*flags & CPU_DTRACE_FAULT)
228 return;
229
230 if (pcstack_limit <= 0)
231 return;
232
233 /*
234 * If there's no user context we still need to zero the stack.
235 */
236 if (p == NULL || (tf = curthread->td_frame) == NULL)
237 goto zero;
238
239 *pcstack++ = (uint64_t)p->p_pid;
240 pcstack_limit--;
241
242 if (pcstack_limit <= 0)
243 return;
244
245 pc = tf->srr0;
246 sp = tf->fixreg[1];
247
248 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
249 /*
250 * In an entry probe. The frame pointer has not yet been
251 * pushed (that happens in the function prologue). The
252 * best approach is to add the current pc as a missing top
253 * of stack and back the pc up to the caller, which is stored
254 * at the current stack pointer address since the call
255 * instruction puts it there right before the branch.
256 */
257
258 *pcstack++ = (uint64_t)pc;
259 pcstack_limit--;
260 if (pcstack_limit <= 0)
261 return;
262
263 pc = tf->lr;
264 }
265
266 n = dtrace_getustack_common(pcstack, pcstack_limit, pc, sp);
267 ASSERT(n >= 0);
268 ASSERT(n <= pcstack_limit);
269
270 pcstack += n;
271 pcstack_limit -= n;
272
273 zero:
274 while (pcstack_limit-- > 0)
275 *pcstack++ = 0;
276 }
277
278 int
dtrace_getustackdepth(void)279 dtrace_getustackdepth(void)
280 {
281 proc_t *p = curproc;
282 struct trapframe *tf;
283 uintptr_t pc, sp;
284 int n = 0;
285
286 if (p == NULL || (tf = curthread->td_frame) == NULL)
287 return (0);
288
289 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
290 return (-1);
291
292 pc = tf->srr0;
293 sp = tf->fixreg[1];
294
295 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
296 /*
297 * In an entry probe. The frame pointer has not yet been
298 * pushed (that happens in the function prologue). The
299 * best approach is to add the current pc as a missing top
300 * of stack and back the pc up to the caller, which is stored
301 * at the current stack pointer address since the call
302 * instruction puts it there right before the branch.
303 */
304
305 if (SV_PROC_FLAG(p, SV_ILP32)) {
306 pc = dtrace_fuword32((void *) sp);
307 }
308 else
309 pc = dtrace_fuword64((void *) sp);
310 n++;
311 }
312
313 n += dtrace_getustack_common(NULL, 0, pc, sp);
314
315 return (n);
316 }
317
318 void
dtrace_getufpstack(uint64_t * pcstack,uint64_t * fpstack,int pcstack_limit)319 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
320 {
321 proc_t *p = curproc;
322 struct trapframe *tf;
323 uintptr_t pc, sp;
324 volatile uint16_t *flags =
325 (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
326 #ifdef notyet /* XXX signal stack */
327 uintptr_t oldcontext;
328 size_t s1, s2;
329 #endif
330
331 if (*flags & CPU_DTRACE_FAULT)
332 return;
333
334 if (pcstack_limit <= 0)
335 return;
336
337 /*
338 * If there's no user context we still need to zero the stack.
339 */
340 if (p == NULL || (tf = curthread->td_frame) == NULL)
341 goto zero;
342
343 *pcstack++ = (uint64_t)p->p_pid;
344 pcstack_limit--;
345
346 if (pcstack_limit <= 0)
347 return;
348
349 pc = tf->srr0;
350 sp = tf->fixreg[1];
351
352 #ifdef notyet /* XXX signal stack */
353 oldcontext = lwp->lwp_oldcontext;
354 s1 = sizeof (struct xframe) + 2 * sizeof (long);
355 s2 = s1 + sizeof (siginfo_t);
356 #endif
357
358 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
359 *pcstack++ = (uint64_t)pc;
360 *fpstack++ = 0;
361 pcstack_limit--;
362 if (pcstack_limit <= 0)
363 return;
364
365 if (SV_PROC_FLAG(p, SV_ILP32)) {
366 pc = dtrace_fuword32((void *)sp);
367 }
368 else {
369 pc = dtrace_fuword64((void *)sp);
370 }
371 }
372
373 while (pc != 0) {
374 *pcstack++ = (uint64_t)pc;
375 *fpstack++ = sp;
376 pcstack_limit--;
377 if (pcstack_limit <= 0)
378 break;
379
380 if (sp == 0)
381 break;
382
383 #ifdef notyet /* XXX signal stack */
384 if (oldcontext == sp + s1 || oldcontext == sp + s2) {
385 ucontext_t *ucp = (ucontext_t *)oldcontext;
386 greg_t *gregs = ucp->uc_mcontext.gregs;
387
388 sp = dtrace_fulword(&gregs[REG_FP]);
389 pc = dtrace_fulword(&gregs[REG_PC]);
390
391 oldcontext = dtrace_fulword(&ucp->uc_link);
392 } else
393 #endif /* XXX */
394 {
395 if (SV_PROC_FLAG(p, SV_ILP32)) {
396 pc = dtrace_fuword32((void *)(sp + RETURN_OFFSET));
397 sp = dtrace_fuword32((void *)sp);
398 }
399 else {
400 pc = dtrace_fuword64((void *)(sp + RETURN_OFFSET64));
401 sp = dtrace_fuword64((void *)sp);
402 }
403 }
404
405 /*
406 * This is totally bogus: if we faulted, we're going to clear
407 * the fault and break. This is to deal with the apparently
408 * broken Java stacks on x86.
409 */
410 if (*flags & CPU_DTRACE_FAULT) {
411 *flags &= ~CPU_DTRACE_FAULT;
412 break;
413 }
414 }
415
416 zero:
417 while (pcstack_limit-- > 0)
418 *pcstack++ = 0;
419 }
420
421 /*ARGSUSED*/
422 uint64_t
dtrace_getarg(int arg,int aframes)423 dtrace_getarg(int arg, int aframes)
424 {
425 uintptr_t val;
426 uintptr_t *fp = (uintptr_t *)__builtin_frame_address(0);
427 uintptr_t *stack;
428 int i;
429
430 /*
431 * A total of 8 arguments are passed via registers; any argument with
432 * index of 7 or lower is therefore in a register.
433 */
434 int inreg = 7;
435
436 for (i = 1; i <= aframes; i++) {
437 fp = (uintptr_t *)*fp;
438
439 /*
440 * On ppc32 trapexit() is the immediately following label. On
441 * ppc64 AIM trapexit() follows a nop.
442 */
443 #ifdef __powerpc64__
444 if ((long)(fp[2]) + 4 == (long)trapexit) {
445 #else
446 if ((long)(fp[1]) == (long)trapexit) {
447 #endif
448 /*
449 * In the case of powerpc, we will use the pointer to the regs
450 * structure that was pushed when we took the trap. To get this
451 * structure, we must increment beyond the frame structure. If the
452 * argument that we're seeking is passed on the stack, we'll pull
453 * the true stack pointer out of the saved registers and decrement
454 * our argument by the number of arguments passed in registers; if
455 * the argument we're seeking is passed in regsiters, we can just
456 * load it directly.
457 */
458 #ifdef __powerpc64__
459 struct reg *rp = (struct reg *)((uintptr_t)fp[0] + 48);
460 #else
461 struct reg *rp = (struct reg *)((uintptr_t)fp[0] + 8);
462 #endif
463
464 if (arg <= inreg) {
465 stack = &rp->fixreg[3];
466 } else {
467 stack = (uintptr_t *)(rp->fixreg[1]);
468 arg -= inreg;
469 }
470 goto load;
471 }
472
473 }
474
475 /*
476 * We know that we did not come through a trap to get into
477 * dtrace_probe() -- the provider simply called dtrace_probe()
478 * directly. As this is the case, we need to shift the argument
479 * that we're looking for: the probe ID is the first argument to
480 * dtrace_probe(), so the argument n will actually be found where
481 * one would expect to find argument (n + 1).
482 */
483 arg++;
484
485 if (arg <= inreg) {
486 /*
487 * This shouldn't happen. If the argument is passed in a
488 * register then it should have been, well, passed in a
489 * register...
490 */
491 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
492 return (0);
493 }
494
495 arg -= (inreg + 1);
496 stack = fp + 2;
497
498 load:
499 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
500 val = stack[arg];
501 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
502
503 return (val);
504 }
505
506 int
507 dtrace_getstackdepth(int aframes)
508 {
509 int depth = 0;
510 uintptr_t osp, sp;
511 vm_offset_t callpc;
512
513 osp = PAGE_SIZE;
514 sp = (uintptr_t)__builtin_frame_address(0);
515 for(;;) {
516 if (sp <= osp)
517 break;
518
519 if (!dtrace_sp_inkernel(sp))
520 break;
521
522 depth++;
523 osp = sp;
524 dtrace_next_sp_pc(sp, &sp, NULL, NULL);
525 }
526 if (depth < aframes)
527 return (0);
528
529 return (depth - aframes);
530 }
531
532 ulong_t
533 dtrace_getreg(struct trapframe *frame, uint_t reg)
534 {
535 if (reg < 32)
536 return (frame->fixreg[reg]);
537
538 switch (reg) {
539 case 32:
540 return (frame->lr);
541 case 33:
542 return (frame->cr);
543 case 34:
544 return (frame->xer);
545 case 35:
546 return (frame->ctr);
547 case 36:
548 return (frame->srr0);
549 case 37:
550 return (frame->srr1);
551 case 38:
552 return (frame->exc);
553 default:
554 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
555 return (0);
556 }
557 }
558
559 static int
560 dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
561 {
562 ASSERT(INKERNEL(kaddr) && kaddr + size >= kaddr);
563
564 if (uaddr + size > VM_MAXUSER_ADDRESS || uaddr + size < uaddr) {
565 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
566 cpu_core[curcpu].cpuc_dtrace_illval = uaddr;
567 return (0);
568 }
569
570 return (1);
571 }
572
573 void
574 dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
575 volatile uint16_t *flags)
576 {
577 if (dtrace_copycheck(uaddr, kaddr, size))
578 if (copyin((const void *)uaddr, (void *)kaddr, size)) {
579 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
580 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
581 }
582 }
583
584 void
585 dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
586 volatile uint16_t *flags)
587 {
588 if (dtrace_copycheck(uaddr, kaddr, size)) {
589 if (copyout((const void *)kaddr, (void *)uaddr, size)) {
590 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
591 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
592 }
593 }
594 }
595
596 void
597 dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
598 volatile uint16_t *flags)
599 {
600 size_t actual;
601 int error;
602
603 if (dtrace_copycheck(uaddr, kaddr, size)) {
604 error = copyinstr((const void *)uaddr, (void *)kaddr,
605 size, &actual);
606
607 /* ENAMETOOLONG is not a fault condition. */
608 if (error && error != ENAMETOOLONG) {
609 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
610 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
611 }
612 }
613 }
614
615 /*
616 * The bulk of this function could be replaced to match dtrace_copyinstr()
617 * if we ever implement a copyoutstr().
618 */
619 void
620 dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
621 volatile uint16_t *flags)
622 {
623 size_t len;
624
625 if (dtrace_copycheck(uaddr, kaddr, size)) {
626 len = strlen((const char *)kaddr);
627 if (len > size)
628 len = size;
629
630 if (copyout((const void *)kaddr, (void *)uaddr, len)) {
631 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
632 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
633 }
634 }
635 }
636
637 uint8_t
638 dtrace_fuword8(void *uaddr)
639 {
640 if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
641 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
642 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
643 return (0);
644 }
645 return (fubyte(uaddr));
646 }
647
648 uint16_t
649 dtrace_fuword16(void *uaddr)
650 {
651 uint16_t ret = 0;
652
653 if (dtrace_copycheck((uintptr_t)uaddr, (uintptr_t)&ret, sizeof(ret))) {
654 if (copyin((const void *)uaddr, (void *)&ret, sizeof(ret))) {
655 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
656 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
657 }
658 }
659 return ret;
660 }
661
662 uint32_t
663 dtrace_fuword32(void *uaddr)
664 {
665 if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
666 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
667 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
668 return (0);
669 }
670 return (fuword32(uaddr));
671 }
672
673 uint64_t
674 dtrace_fuword64(void *uaddr)
675 {
676 uint64_t ret = 0;
677
678 if (dtrace_copycheck((uintptr_t)uaddr, (uintptr_t)&ret, sizeof(ret))) {
679 if (copyin((const void *)uaddr, (void *)&ret, sizeof(ret))) {
680 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
681 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
682 }
683 }
684 return ret;
685 }
686
687 uintptr_t
688 dtrace_fulword(void *uaddr)
689 {
690 uintptr_t ret = 0;
691
692 if (dtrace_copycheck((uintptr_t)uaddr, (uintptr_t)&ret, sizeof(ret))) {
693 if (copyin((const void *)uaddr, (void *)&ret, sizeof(ret))) {
694 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
695 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
696 }
697 }
698 return ret;
699 }
700