1cad9931fSMasami Hiramatsu // SPDX-License-Identifier: GPL-2.0
2cad9931fSMasami Hiramatsu /*
3cad9931fSMasami Hiramatsu * fprobe - Simple ftrace probe wrapper for function entry.
4cad9931fSMasami Hiramatsu */
5cad9931fSMasami Hiramatsu #define pr_fmt(fmt) "fprobe: " fmt
6cad9931fSMasami Hiramatsu
7cad9931fSMasami Hiramatsu #include <linux/err.h>
8cad9931fSMasami Hiramatsu #include <linux/fprobe.h>
9cad9931fSMasami Hiramatsu #include <linux/kallsyms.h>
10cad9931fSMasami Hiramatsu #include <linux/kprobes.h>
115b0ab789SMasami Hiramatsu #include <linux/list.h>
12cad9931fSMasami Hiramatsu #include <linux/mutex.h>
13cad9931fSMasami Hiramatsu #include <linux/slab.h>
14cad9931fSMasami Hiramatsu #include <linux/sort.h>
155b0ab789SMasami Hiramatsu
165b0ab789SMasami Hiramatsu #include <asm/fprobe.h>
175b0ab789SMasami Hiramatsu
185b0ab789SMasami Hiramatsu #include "trace.h"
195b0ab789SMasami Hiramatsu
2027527410SZe Gao #define FPROBE_IP_HASH_BITS 8
2176d0de57SMasami Hiramatsu (Google) #define FPROBE_IP_TABLE_SIZE (1 << FPROBE_IP_HASH_BITS)
225b0ab789SMasami Hiramatsu
235b0ab789SMasami Hiramatsu #define FPROBE_HASH_BITS 6
243cc4e2c5SZe Gao #define FPROBE_TABLE_SIZE (1 << FPROBE_HASH_BITS)
25cad9931fSMasami Hiramatsu
26cad9931fSMasami Hiramatsu #define SIZE_IN_LONG(x) ((x + sizeof(long) - 1) >> (sizeof(long) == 8 ? 3 : 2))
275b0ab789SMasami Hiramatsu
2876d0de57SMasami Hiramatsu (Google) /*
29cad9931fSMasami Hiramatsu * fprobe_table: hold 'fprobe_hlist::hlist' for checking the fprobe still
3076d0de57SMasami Hiramatsu (Google) * exists. The key is the address of fprobe instance.
313cc4e2c5SZe Gao * fprobe_ip_table: hold 'fprobe_hlist::array[*]' for searching the fprobe
32cad9931fSMasami Hiramatsu * instance related to the funciton address. The key is the ftrace IP
33cad9931fSMasami Hiramatsu * address.
34cad9931fSMasami Hiramatsu *
355b0ab789SMasami Hiramatsu * When unregistering the fprobe, fprobe_hlist::fp and fprobe_hlist::array[*].fp
365b0ab789SMasami Hiramatsu * are set NULL and delete those from both hash tables (by hlist_del_rcu).
375b0ab789SMasami Hiramatsu * After an RCU grace period, the fprobe_hlist itself will be released.
385b0ab789SMasami Hiramatsu *
393cc4e2c5SZe Gao * fprobe_table and fprobe_ip_table can be accessed from either
405b0ab789SMasami Hiramatsu * - Normal hlist traversal and RCU add/del under 'fprobe_mutex' is held.
415b0ab789SMasami Hiramatsu * - RCU hlist traversal under disabling preempt
425b0ab789SMasami Hiramatsu */
4327527410SZe Gao static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE];
4476d0de57SMasami Hiramatsu (Google) static struct hlist_head fprobe_ip_table[FPROBE_IP_TABLE_SIZE];
4576d0de57SMasami Hiramatsu (Google) static DEFINE_MUTEX(fprobe_mutex);
465b0ab789SMasami Hiramatsu
475b0ab789SMasami Hiramatsu /*
4876d0de57SMasami Hiramatsu (Google) * Find first fprobe in the hlist. It will be iterated twice in the entry
49cb16330dSMasami Hiramatsu (Google) * probe, once for correcting the total required size, the second time is
5076d0de57SMasami Hiramatsu (Google) * calling back the user handlers.
5139d95420SMasami Hiramatsu (Google) * Thus the hlist in the fprobe_table must be sorted and new probe needs to
5239d95420SMasami Hiramatsu (Google) * be added *before* the first fprobe.
5339d95420SMasami Hiramatsu (Google) */
find_first_fprobe_node(unsigned long ip)5439d95420SMasami Hiramatsu (Google) static struct fprobe_hlist_node *find_first_fprobe_node(unsigned long ip)
5539d95420SMasami Hiramatsu (Google) {
5676d0de57SMasami Hiramatsu (Google) struct fprobe_hlist_node *node;
5739d95420SMasami Hiramatsu (Google) struct hlist_head *head;
583cc4e2c5SZe Gao
593cc4e2c5SZe Gao head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
603cc4e2c5SZe Gao hlist_for_each_entry_rcu(node, head, hlist,
613cc4e2c5SZe Gao lockdep_is_held(&fprobe_mutex)) {
623cc4e2c5SZe Gao if (node->addr == ip)
633cc4e2c5SZe Gao return node;
643cc4e2c5SZe Gao }
653cc4e2c5SZe Gao return NULL;
663cc4e2c5SZe Gao }
673cc4e2c5SZe Gao NOKPROBE_SYMBOL(find_first_fprobe_node);
683cc4e2c5SZe Gao
693cc4e2c5SZe Gao /* Node insertion and deletion requires the fprobe_mutex */
insert_fprobe_node(struct fprobe_hlist_node * node)703cc4e2c5SZe Gao static void insert_fprobe_node(struct fprobe_hlist_node *node)
713cc4e2c5SZe Gao {
723cc4e2c5SZe Gao unsigned long ip = node->addr;
733cc4e2c5SZe Gao struct fprobe_hlist_node *next;
743cc4e2c5SZe Gao struct hlist_head *head;
753cc4e2c5SZe Gao
763cc4e2c5SZe Gao lockdep_assert_held(&fprobe_mutex);
773cc4e2c5SZe Gao
783cc4e2c5SZe Gao next = find_first_fprobe_node(ip);
79cad9931fSMasami Hiramatsu if (next) {
803cc4e2c5SZe Gao hlist_add_before_rcu(&node->hlist, &next->hlist);
81cad9931fSMasami Hiramatsu return;
82cad9931fSMasami Hiramatsu }
83cad9931fSMasami Hiramatsu head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
84ab51e15dSMasami Hiramatsu hlist_add_head_rcu(&node->hlist, head);
85ab51e15dSMasami Hiramatsu }
86ab51e15dSMasami Hiramatsu
873cc4e2c5SZe Gao /* Return true if there are synonims */
delete_fprobe_node(struct fprobe_hlist_node * node)883cc4e2c5SZe Gao static bool delete_fprobe_node(struct fprobe_hlist_node *node)
893cc4e2c5SZe Gao {
903cc4e2c5SZe Gao lockdep_assert_held(&fprobe_mutex);
913cc4e2c5SZe Gao
923cc4e2c5SZe Gao /* Avoid double deleting */
933cc4e2c5SZe Gao if (READ_ONCE(node->fp) != NULL) {
943cc4e2c5SZe Gao WRITE_ONCE(node->fp, NULL);
953cc4e2c5SZe Gao hlist_del_rcu(&node->hlist);
963cc4e2c5SZe Gao }
973cc4e2c5SZe Gao return !!find_first_fprobe_node(node->addr);
983cc4e2c5SZe Gao }
993cc4e2c5SZe Gao
1003cc4e2c5SZe Gao /* Check existence of the fprobe */
is_fprobe_still_exist(struct fprobe * fp)1013cc4e2c5SZe Gao static bool is_fprobe_still_exist(struct fprobe *fp)
102ab51e15dSMasami Hiramatsu {
103d5f28bb1SMasami Hiramatsu (Google) struct hlist_head *head;
104d5f28bb1SMasami Hiramatsu (Google) struct fprobe_hlist *fph;
105d5f28bb1SMasami Hiramatsu (Google)
106d5f28bb1SMasami Hiramatsu (Google) head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
107d5f28bb1SMasami Hiramatsu (Google) hlist_for_each_entry_rcu(fph, head, hlist,
108d5f28bb1SMasami Hiramatsu (Google) lockdep_is_held(&fprobe_mutex)) {
109ab51e15dSMasami Hiramatsu if (fph->fp == fp)
110ab51e15dSMasami Hiramatsu return true;
1115f0c584dSZe Gao }
112ab51e15dSMasami Hiramatsu return false;
1133cc4e2c5SZe Gao }
114ab51e15dSMasami Hiramatsu NOKPROBE_SYMBOL(is_fprobe_still_exist);
1153cc4e2c5SZe Gao
add_fprobe_hash(struct fprobe * fp)116ab51e15dSMasami Hiramatsu static int add_fprobe_hash(struct fprobe *fp)
1175f0c584dSZe Gao {
1185f0c584dSZe Gao struct fprobe_hlist *fph = fp->hlist_array;
1193cc4e2c5SZe Gao struct hlist_head *head;
120ab51e15dSMasami Hiramatsu
121ab51e15dSMasami Hiramatsu lockdep_assert_held(&fprobe_mutex);
1225b0ab789SMasami Hiramatsu
123cb16330dSMasami Hiramatsu (Google) if (WARN_ON_ONCE(!fph))
1245b0ab789SMasami Hiramatsu return -EINVAL;
1255b0ab789SMasami Hiramatsu
1265b0ab789SMasami Hiramatsu if (is_fprobe_still_exist(fp))
12727527410SZe Gao return -EEXIST;
1285b0ab789SMasami Hiramatsu
1295b0ab789SMasami Hiramatsu head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
1305b0ab789SMasami Hiramatsu hlist_add_head_rcu(&fp->hlist_array->hlist, head);
1315b0ab789SMasami Hiramatsu return 0;
1325b0ab789SMasami Hiramatsu }
1335b0ab789SMasami Hiramatsu
del_fprobe_hash(struct fprobe * fp)13427527410SZe Gao static int del_fprobe_hash(struct fprobe *fp)
13527527410SZe Gao {
13627527410SZe Gao struct fprobe_hlist *fph = fp->hlist_array;
13727527410SZe Gao
13827527410SZe Gao lockdep_assert_held(&fprobe_mutex);
13927527410SZe Gao
14027527410SZe Gao if (WARN_ON_ONCE(!fph))
14127527410SZe Gao return -EINVAL;
14227527410SZe Gao
14327527410SZe Gao if (!is_fprobe_still_exist(fp))
144cb16330dSMasami Hiramatsu (Google) return -ENOENT;
14576d0de57SMasami Hiramatsu (Google)
14627527410SZe Gao fph->fp = NULL;
1475b0ab789SMasami Hiramatsu hlist_del_rcu(&fph->hlist);
1485b0ab789SMasami Hiramatsu return 0;
1495b0ab789SMasami Hiramatsu }
1508be92533SJiri Olsa
1518be92533SJiri Olsa #ifdef ARCH_DEFINE_ENCODE_FPROBE_HEADER
1528be92533SJiri Olsa
1538be92533SJiri Olsa /* The arch should encode fprobe_header info into one unsigned long */
1548be92533SJiri Olsa #define FPROBE_HEADER_SIZE_IN_LONG 1
1558be92533SJiri Olsa
write_fprobe_header(unsigned long * stack,struct fprobe * fp,unsigned int size_words)1568be92533SJiri Olsa static inline bool write_fprobe_header(unsigned long *stack,
1578be92533SJiri Olsa struct fprobe *fp, unsigned int size_words)
158cad9931fSMasami Hiramatsu {
159cad9931fSMasami Hiramatsu if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD ||
160cad9931fSMasami Hiramatsu !arch_fprobe_header_encodable(fp)))
161cad9931fSMasami Hiramatsu return false;
162cad9931fSMasami Hiramatsu
163cad9931fSMasami Hiramatsu *stack = arch_encode_fprobe_header(fp, size_words);
164cad9931fSMasami Hiramatsu return true;
165cad9931fSMasami Hiramatsu }
166cad9931fSMasami Hiramatsu
read_fprobe_header(unsigned long * stack,struct fprobe ** fp,unsigned int * size_words)167cad9931fSMasami Hiramatsu static inline void read_fprobe_header(unsigned long *stack,
1688be92533SJiri Olsa struct fprobe **fp, unsigned int *size_words)
1698be92533SJiri Olsa {
170cad9931fSMasami Hiramatsu *fp = arch_decode_fprobe_header_fp(*stack);
1718be92533SJiri Olsa *size_words = arch_decode_fprobe_header_size(*stack);
172cad9931fSMasami Hiramatsu }
173cad9931fSMasami Hiramatsu
174cad9931fSMasami Hiramatsu #else
175cad9931fSMasami Hiramatsu
176cad9931fSMasami Hiramatsu /* Generic fprobe_header */
177cad9931fSMasami Hiramatsu struct __fprobe_header {
178cad9931fSMasami Hiramatsu struct fprobe *fp;
179cad9931fSMasami Hiramatsu unsigned long size_words;
180cad9931fSMasami Hiramatsu } __packed;
181ab51e15dSMasami Hiramatsu
182ab51e15dSMasami Hiramatsu #define FPROBE_HEADER_SIZE_IN_LONG SIZE_IN_LONG(sizeof(struct __fprobe_header))
183ab51e15dSMasami Hiramatsu
write_fprobe_header(unsigned long * stack,struct fprobe * fp,unsigned int size_words)184cad9931fSMasami Hiramatsu static inline bool write_fprobe_header(unsigned long *stack,
185cad9931fSMasami Hiramatsu struct fprobe *fp, unsigned int size_words)
186cad9931fSMasami Hiramatsu {
187cad9931fSMasami Hiramatsu struct __fprobe_header *fph = (struct __fprobe_header *)stack;
1885b0ab789SMasami Hiramatsu
1895b0ab789SMasami Hiramatsu if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD))
1904bbd9345Swuqiang.matt return false;
1915b0ab789SMasami Hiramatsu
1925b0ab789SMasami Hiramatsu fph->fp = fp;
1935b0ab789SMasami Hiramatsu fph->size_words = size_words;
1945b0ab789SMasami Hiramatsu return true;
1955b0ab789SMasami Hiramatsu }
1965b0ab789SMasami Hiramatsu
read_fprobe_header(unsigned long * stack,struct fprobe ** fp,unsigned int * size_words)1975b0ab789SMasami Hiramatsu static inline void read_fprobe_header(unsigned long *stack,
19859a7a298SMasami Hiramatsu (Google) struct fprobe **fp, unsigned int *size_words)
199*65727860SMasami Hiramatsu (Google) {
20059a7a298SMasami Hiramatsu (Google) struct __fprobe_header *fph = (struct __fprobe_header *)stack;
201*65727860SMasami Hiramatsu (Google)
202*65727860SMasami Hiramatsu (Google) *fp = fph->fp;
203700b2b43SMasami Hiramatsu (Google) *size_words = fph->size_words;
2045b0ab789SMasami Hiramatsu }
205*65727860SMasami Hiramatsu (Google)
206*65727860SMasami Hiramatsu (Google) #endif
2074bbd9345Swuqiang.matt
208*65727860SMasami Hiramatsu (Google) /*
2094bbd9345Swuqiang.matt * fprobe shadow stack management:
2104bbd9345Swuqiang.matt * Since fprobe shares a single fgraph_ops, it needs to share the stack entry
2115b0ab789SMasami Hiramatsu * among the probes on the same function exit. Note that a new probe can be
2125b0ab789SMasami Hiramatsu * registered before a target function is returning, we can not use the hash
2135b0ab789SMasami Hiramatsu * table to find the corresponding probes. Thus the probe address is stored on
2145b0ab789SMasami Hiramatsu * the shadow stack with its entry data size.
2155b0ab789SMasami Hiramatsu *
2165b0ab789SMasami Hiramatsu */
__fprobe_handler(unsigned long ip,unsigned long parent_ip,struct fprobe * fp,struct ftrace_regs * fregs,void * data)2174bbd9345Swuqiang.matt static inline int __fprobe_handler(unsigned long ip, unsigned long parent_ip,
2185b0ab789SMasami Hiramatsu struct fprobe *fp, struct ftrace_regs *fregs,
2195b0ab789SMasami Hiramatsu void *data)
2205b0ab789SMasami Hiramatsu {
2215b0ab789SMasami Hiramatsu if (!fp->entry_handler)
2225b0ab789SMasami Hiramatsu return 0;
2235b0ab789SMasami Hiramatsu
2245b0ab789SMasami Hiramatsu return fp->entry_handler(fp, ip, parent_ip, fregs, data);
225cad9931fSMasami Hiramatsu }
226cad9931fSMasami Hiramatsu
__fprobe_kprobe_handler(unsigned long ip,unsigned long parent_ip,struct fprobe * fp,struct ftrace_regs * fregs,void * data)227cad9931fSMasami Hiramatsu static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
228cad9931fSMasami Hiramatsu struct fprobe *fp, struct ftrace_regs *fregs,
229cad9931fSMasami Hiramatsu void *data)
230cad9931fSMasami Hiramatsu {
231cad9931fSMasami Hiramatsu int ret;
232cad9931fSMasami Hiramatsu /*
233cad9931fSMasami Hiramatsu * This user handler is shared with other kprobes and is not expected to be
234cad9931fSMasami Hiramatsu * called recursively. So if any other kprobe handler is running, this will
235cad9931fSMasami Hiramatsu * exit as kprobe does. See the section 'Share the callbacks with kprobes'
236cad9931fSMasami Hiramatsu * in Documentation/trace/fprobe.rst for more information.
237cad9931fSMasami Hiramatsu */
2385b0ab789SMasami Hiramatsu if (unlikely(kprobe_running())) {
239cad9931fSMasami Hiramatsu fp->nmissed++;
240cad9931fSMasami Hiramatsu return 0;
241cad9931fSMasami Hiramatsu }
242cad9931fSMasami Hiramatsu
243cad9931fSMasami Hiramatsu kprobe_busy_begin();
244cad9931fSMasami Hiramatsu ret = __fprobe_handler(ip, parent_ip, fp, fregs, data);
245cad9931fSMasami Hiramatsu kprobe_busy_end();
246cad9931fSMasami Hiramatsu return ret;
247cad9931fSMasami Hiramatsu }
248cad9931fSMasami Hiramatsu
fprobe_entry(struct ftrace_graph_ent * trace,struct fgraph_ops * gops,struct ftrace_regs * fregs)249cad9931fSMasami Hiramatsu static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
250cad9931fSMasami Hiramatsu struct ftrace_regs *fregs)
251cad9931fSMasami Hiramatsu {
252cad9931fSMasami Hiramatsu struct fprobe_hlist_node *node, *first;
253cad9931fSMasami Hiramatsu unsigned long *fgraph_data = NULL;
254cad9931fSMasami Hiramatsu unsigned long func = trace->func;
255cad9931fSMasami Hiramatsu unsigned long ret_ip;
256cad9931fSMasami Hiramatsu int reserved_words;
257cad9931fSMasami Hiramatsu struct fprobe *fp;
258cad9931fSMasami Hiramatsu int used, ret;
259cad9931fSMasami Hiramatsu
260cad9931fSMasami Hiramatsu if (WARN_ON_ONCE(!fregs))
261cad9931fSMasami Hiramatsu return 0;
262cad9931fSMasami Hiramatsu
2635b0ab789SMasami Hiramatsu first = node = find_first_fprobe_node(func);
2645b0ab789SMasami Hiramatsu if (unlikely(!first))
2655b0ab789SMasami Hiramatsu return 0;
2665b0ab789SMasami Hiramatsu
267261608f3SMasami Hiramatsu reserved_words = 0;
2685b0ab789SMasami Hiramatsu hlist_for_each_entry_from_rcu(node, hlist) {
2695b0ab789SMasami Hiramatsu if (node->addr != func)
2705b0ab789SMasami Hiramatsu break;
2715b0ab789SMasami Hiramatsu fp = READ_ONCE(node->fp);
2725b0ab789SMasami Hiramatsu if (!fp || !fp->exit_handler)
273cad9931fSMasami Hiramatsu continue;
2745b0ab789SMasami Hiramatsu /*
275cad9931fSMasami Hiramatsu * Since fprobe can be enabled until the next loop, we ignore the
276cad9931fSMasami Hiramatsu * fprobe's disabled flag in this loop.
2775b0ab789SMasami Hiramatsu */
278cad9931fSMasami Hiramatsu reserved_words +=
279cad9931fSMasami Hiramatsu FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size);
280cad9931fSMasami Hiramatsu }
281cad9931fSMasami Hiramatsu node = first;
282cad9931fSMasami Hiramatsu if (reserved_words) {
283cad9931fSMasami Hiramatsu fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long));
284cad9931fSMasami Hiramatsu if (unlikely(!fgraph_data)) {
285cad9931fSMasami Hiramatsu hlist_for_each_entry_from_rcu(node, hlist) {
286cad9931fSMasami Hiramatsu if (node->addr != func)
287cad9931fSMasami Hiramatsu break;
288cad9931fSMasami Hiramatsu fp = READ_ONCE(node->fp);
289cad9931fSMasami Hiramatsu if (fp && !fprobe_disabled(fp))
290cad9931fSMasami Hiramatsu fp->nmissed++;
291cad9931fSMasami Hiramatsu }
292cad9931fSMasami Hiramatsu return 0;
293cad9931fSMasami Hiramatsu }
294cad9931fSMasami Hiramatsu }
295cad9931fSMasami Hiramatsu
296cad9931fSMasami Hiramatsu /*
297cad9931fSMasami Hiramatsu * TODO: recursion detection has been done in the fgraph. Thus we need
298cad9931fSMasami Hiramatsu * to add a callback to increment missed counter.
299cad9931fSMasami Hiramatsu */
300cad9931fSMasami Hiramatsu ret_ip = ftrace_regs_get_return_address(fregs);
301cad9931fSMasami Hiramatsu used = 0;
302cad9931fSMasami Hiramatsu hlist_for_each_entry_from_rcu(node, hlist) {
303cad9931fSMasami Hiramatsu int data_size;
304cad9931fSMasami Hiramatsu void *data;
3055b0ab789SMasami Hiramatsu
3065b0ab789SMasami Hiramatsu if (node->addr != func)
3075b0ab789SMasami Hiramatsu break;
3085b0ab789SMasami Hiramatsu fp = READ_ONCE(node->fp);
309cad9931fSMasami Hiramatsu if (!fp || fprobe_disabled(fp))
310cad9931fSMasami Hiramatsu continue;
311cad9931fSMasami Hiramatsu
312cad9931fSMasami Hiramatsu data_size = fp->entry_data_size;
3135b0ab789SMasami Hiramatsu if (data_size && fp->exit_handler)
314cad9931fSMasami Hiramatsu data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG;
315cad9931fSMasami Hiramatsu else
316cad9931fSMasami Hiramatsu data = NULL;
317cad9931fSMasami Hiramatsu
318cad9931fSMasami Hiramatsu if (fprobe_shared_with_kprobes(fp))
319cad9931fSMasami Hiramatsu ret = __fprobe_kprobe_handler(func, ret_ip, fp, fregs, data);
320cad9931fSMasami Hiramatsu else
321cad9931fSMasami Hiramatsu ret = __fprobe_handler(func, ret_ip, fp, fregs, data);
322cad9931fSMasami Hiramatsu
323cad9931fSMasami Hiramatsu /* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */
324cad9931fSMasami Hiramatsu if (!ret && fp->exit_handler) {
325cad9931fSMasami Hiramatsu int size_words = SIZE_IN_LONG(data_size);
326cad9931fSMasami Hiramatsu
327cad9931fSMasami Hiramatsu if (write_fprobe_header(&fgraph_data[used], fp, size_words))
328cad9931fSMasami Hiramatsu used += FPROBE_HEADER_SIZE_IN_LONG + size_words;
329cad9931fSMasami Hiramatsu }
330cad9931fSMasami Hiramatsu }
331cad9931fSMasami Hiramatsu if (used < reserved_words)
332cad9931fSMasami Hiramatsu memset(fgraph_data + used, 0, reserved_words - used);
333cad9931fSMasami Hiramatsu
334cad9931fSMasami Hiramatsu /* If any exit_handler is set, data must be used. */
335cad9931fSMasami Hiramatsu return used != 0;
336cad9931fSMasami Hiramatsu }
337cad9931fSMasami Hiramatsu NOKPROBE_SYMBOL(fprobe_entry);
338cad9931fSMasami Hiramatsu
fprobe_return(struct ftrace_graph_ret * trace,struct fgraph_ops * gops,struct ftrace_regs * fregs)339cad9931fSMasami Hiramatsu static void fprobe_return(struct ftrace_graph_ret *trace,
340cad9931fSMasami Hiramatsu struct fgraph_ops *gops,
341cad9931fSMasami Hiramatsu struct ftrace_regs *fregs)
342cad9931fSMasami Hiramatsu {
343cad9931fSMasami Hiramatsu unsigned long *fgraph_data = NULL;
344cad9931fSMasami Hiramatsu unsigned long ret_ip;
345cad9931fSMasami Hiramatsu struct fprobe *fp;
346cad9931fSMasami Hiramatsu int size, curr;
347cad9931fSMasami Hiramatsu int size_words;
348cad9931fSMasami Hiramatsu
349334e5519SMasami Hiramatsu (Google) fgraph_data = (unsigned long *)fgraph_retrieve_data(gops->idx, &size);
350334e5519SMasami Hiramatsu (Google) if (WARN_ON_ONCE(!fgraph_data))
351334e5519SMasami Hiramatsu (Google) return;
352334e5519SMasami Hiramatsu (Google) size_words = SIZE_IN_LONG(size);
353334e5519SMasami Hiramatsu (Google) ret_ip = ftrace_regs_get_instruction_pointer(fregs);
354334e5519SMasami Hiramatsu (Google)
355334e5519SMasami Hiramatsu (Google) preempt_disable_notrace();
356334e5519SMasami Hiramatsu (Google)
357cad9931fSMasami Hiramatsu curr = 0;
358cad9931fSMasami Hiramatsu while (size_words > curr) {
359cad9931fSMasami Hiramatsu read_fprobe_header(&fgraph_data[curr], &fp, &size);
360cad9931fSMasami Hiramatsu if (!fp)
361cad9931fSMasami Hiramatsu break;
362cad9931fSMasami Hiramatsu curr += FPROBE_HEADER_SIZE_IN_LONG;
363cad9931fSMasami Hiramatsu if (is_fprobe_still_exist(fp) && !fprobe_disabled(fp)) {
364cad9931fSMasami Hiramatsu if (WARN_ON_ONCE(curr + size > size_words))
365cad9931fSMasami Hiramatsu break;
366cad9931fSMasami Hiramatsu fp->exit_handler(fp, trace->func, ret_ip, fregs,
367cad9931fSMasami Hiramatsu size ? fgraph_data + curr : NULL);
368cad9931fSMasami Hiramatsu }
369334e5519SMasami Hiramatsu (Google) curr += size;
370cad9931fSMasami Hiramatsu }
371cad9931fSMasami Hiramatsu preempt_enable_notrace();
3724bbd9345Swuqiang.matt }
373195b9cb5SMasami Hiramatsu (Google) NOKPROBE_SYMBOL(fprobe_return);
374cad9931fSMasami Hiramatsu
3755b0ab789SMasami Hiramatsu static struct fgraph_ops fprobe_graph_ops = {
3765b0ab789SMasami Hiramatsu .entryfunc = fprobe_entry,
3775b0ab789SMasami Hiramatsu .retfunc = fprobe_return,
3785b0ab789SMasami Hiramatsu };
3794bbd9345Swuqiang.matt static int fprobe_graph_active;
3805f810187SJiri Olsa
3815f810187SJiri Olsa /* Add @addrs to the ftrace filter and register fgraph if needed. */
fprobe_graph_add_ips(unsigned long * addrs,int num)382cad9931fSMasami Hiramatsu static int fprobe_graph_add_ips(unsigned long *addrs, int num)
383cad9931fSMasami Hiramatsu {
384cad9931fSMasami Hiramatsu int ret;
385cad9931fSMasami Hiramatsu
386cad9931fSMasami Hiramatsu lockdep_assert_held(&fprobe_mutex);
387
388 ret = ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 0, 0);
389 if (ret)
390 return ret;
391
392 if (!fprobe_graph_active) {
393 ret = register_ftrace_graph(&fprobe_graph_ops);
394 if (WARN_ON_ONCE(ret)) {
395 ftrace_free_filter(&fprobe_graph_ops.ops);
396 return ret;
397 }
398 }
399 fprobe_graph_active++;
400 return 0;
401 }
402
403 /* Remove @addrs from the ftrace filter and unregister fgraph if possible. */
fprobe_graph_remove_ips(unsigned long * addrs,int num)404 static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
405 {
406 lockdep_assert_held(&fprobe_mutex);
407
408 fprobe_graph_active--;
409 /* Q: should we unregister it ? */
410 if (!fprobe_graph_active)
411 unregister_ftrace_graph(&fprobe_graph_ops);
412
413 if (num)
414 ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
415 }
416
417 #ifdef CONFIG_MODULES
418
419 #define FPROBE_IPS_BATCH_INIT 8
420 /* instruction pointer address list */
421 struct fprobe_addr_list {
422 int index;
423 int size;
424 unsigned long *addrs;
425 };
426
fprobe_addr_list_add(struct fprobe_addr_list * alist,unsigned long addr)427 static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long addr)
428 {
429 unsigned long *addrs;
430
431 /* Previously we failed to expand the list. */
432 if (alist->index == alist->size)
433 return -ENOSPC;
434
435 alist->addrs[alist->index++] = addr;
436 if (alist->index < alist->size)
437 return 0;
438
439 /* Expand the address list */
440 addrs = kcalloc(alist->size * 2, sizeof(*addrs), GFP_KERNEL);
441 if (!addrs)
442 return -ENOMEM;
443
444 memcpy(addrs, alist->addrs, alist->size * sizeof(*addrs));
445 alist->size *= 2;
446 kfree(alist->addrs);
447 alist->addrs = addrs;
448
449 return 0;
450 }
451
fprobe_remove_node_in_module(struct module * mod,struct hlist_head * head,struct fprobe_addr_list * alist)452 static void fprobe_remove_node_in_module(struct module *mod, struct hlist_head *head,
453 struct fprobe_addr_list *alist)
454 {
455 struct fprobe_hlist_node *node;
456 int ret = 0;
457
458 hlist_for_each_entry_rcu(node, head, hlist,
459 lockdep_is_held(&fprobe_mutex)) {
460 if (!within_module(node->addr, mod))
461 continue;
462 if (delete_fprobe_node(node))
463 continue;
464 /*
465 * If failed to update alist, just continue to update hlist.
466 * Therefore, at list user handler will not hit anymore.
467 */
468 if (!ret)
469 ret = fprobe_addr_list_add(alist, node->addr);
470 }
471 }
472
473 /* Handle module unloading to manage fprobe_ip_table. */
fprobe_module_callback(struct notifier_block * nb,unsigned long val,void * data)474 static int fprobe_module_callback(struct notifier_block *nb,
475 unsigned long val, void *data)
476 {
477 struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT};
478 struct module *mod = data;
479 int i;
480
481 if (val != MODULE_STATE_GOING)
482 return NOTIFY_DONE;
483
484 alist.addrs = kcalloc(alist.size, sizeof(*alist.addrs), GFP_KERNEL);
485 /* If failed to alloc memory, we can not remove ips from hash. */
486 if (!alist.addrs)
487 return NOTIFY_DONE;
488
489 mutex_lock(&fprobe_mutex);
490 for (i = 0; i < FPROBE_IP_TABLE_SIZE; i++)
491 fprobe_remove_node_in_module(mod, &fprobe_ip_table[i], &alist);
492
493 if (alist.index > 0)
494 ftrace_set_filter_ips(&fprobe_graph_ops.ops,
495 alist.addrs, alist.index, 1, 0);
496 mutex_unlock(&fprobe_mutex);
497
498 kfree(alist.addrs);
499
500 return NOTIFY_DONE;
501 }
502
503 static struct notifier_block fprobe_module_nb = {
504 .notifier_call = fprobe_module_callback,
505 .priority = 0,
506 };
507
init_fprobe_module(void)508 static int __init init_fprobe_module(void)
509 {
510 return register_module_notifier(&fprobe_module_nb);
511 }
512 early_initcall(init_fprobe_module);
513 #endif
514
symbols_cmp(const void * a,const void * b)515 static int symbols_cmp(const void *a, const void *b)
516 {
517 const char **str_a = (const char **) a;
518 const char **str_b = (const char **) b;
519
520 return strcmp(*str_a, *str_b);
521 }
522
523 /* Convert ftrace location address from symbols */
get_ftrace_locations(const char ** syms,int num)524 static unsigned long *get_ftrace_locations(const char **syms, int num)
525 {
526 unsigned long *addrs;
527
528 /* Convert symbols to symbol address */
529 addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL);
530 if (!addrs)
531 return ERR_PTR(-ENOMEM);
532
533 /* ftrace_lookup_symbols expects sorted symbols */
534 sort(syms, num, sizeof(*syms), symbols_cmp, NULL);
535
536 if (!ftrace_lookup_symbols(syms, num, addrs))
537 return addrs;
538
539 kfree(addrs);
540 return ERR_PTR(-ENOENT);
541 }
542
543 struct filter_match_data {
544 const char *filter;
545 const char *notfilter;
546 size_t index;
547 size_t size;
548 unsigned long *addrs;
549 struct module **mods;
550 };
551
filter_match_callback(void * data,const char * name,unsigned long addr)552 static int filter_match_callback(void *data, const char *name, unsigned long addr)
553 {
554 struct filter_match_data *match = data;
555
556 if (!glob_match(match->filter, name) ||
557 (match->notfilter && glob_match(match->notfilter, name)))
558 return 0;
559
560 if (!ftrace_location(addr))
561 return 0;
562
563 if (match->addrs) {
564 struct module *mod = __module_text_address(addr);
565
566 if (mod && !try_module_get(mod))
567 return 0;
568
569 match->mods[match->index] = mod;
570 match->addrs[match->index] = addr;
571 }
572 match->index++;
573 return match->index == match->size;
574 }
575
576 /*
577 * Make IP list from the filter/no-filter glob patterns.
578 * Return the number of matched symbols, or errno.
579 * If @addrs == NULL, this just counts the number of matched symbols. If @addrs
580 * is passed with an array, we need to pass the an @mods array of the same size
581 * to increment the module refcount for each symbol.
582 * This means we also need to call `module_put` for each element of @mods after
583 * using the @addrs.
584 */
get_ips_from_filter(const char * filter,const char * notfilter,unsigned long * addrs,struct module ** mods,size_t size)585 static int get_ips_from_filter(const char *filter, const char *notfilter,
586 unsigned long *addrs, struct module **mods,
587 size_t size)
588 {
589 struct filter_match_data match = { .filter = filter, .notfilter = notfilter,
590 .index = 0, .size = size, .addrs = addrs, .mods = mods};
591 int ret;
592
593 if (addrs && !mods)
594 return -EINVAL;
595
596 ret = kallsyms_on_each_symbol(filter_match_callback, &match);
597 if (ret < 0)
598 return ret;
599 if (IS_ENABLED(CONFIG_MODULES)) {
600 ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match);
601 if (ret < 0)
602 return ret;
603 }
604
605 return match.index ?: -ENOENT;
606 }
607
fprobe_fail_cleanup(struct fprobe * fp)608 static void fprobe_fail_cleanup(struct fprobe *fp)
609 {
610 kfree(fp->hlist_array);
611 fp->hlist_array = NULL;
612 }
613
614 /* Initialize the fprobe data structure. */
fprobe_init(struct fprobe * fp,unsigned long * addrs,int num)615 static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num)
616 {
617 struct fprobe_hlist *hlist_array;
618 unsigned long addr;
619 int size, i;
620
621 if (!fp || !addrs || num <= 0)
622 return -EINVAL;
623
624 size = ALIGN(fp->entry_data_size, sizeof(long));
625 if (size > MAX_FPROBE_DATA_SIZE)
626 return -E2BIG;
627 fp->entry_data_size = size;
628
629 hlist_array = kzalloc(struct_size(hlist_array, array, num), GFP_KERNEL);
630 if (!hlist_array)
631 return -ENOMEM;
632
633 fp->nmissed = 0;
634
635 hlist_array->size = num;
636 fp->hlist_array = hlist_array;
637 hlist_array->fp = fp;
638 for (i = 0; i < num; i++) {
639 hlist_array->array[i].fp = fp;
640 addr = ftrace_location(addrs[i]);
641 if (!addr) {
642 fprobe_fail_cleanup(fp);
643 return -ENOENT;
644 }
645 hlist_array->array[i].addr = addr;
646 }
647 return 0;
648 }
649
650 #define FPROBE_IPS_MAX INT_MAX
651
fprobe_count_ips_from_filter(const char * filter,const char * notfilter)652 int fprobe_count_ips_from_filter(const char *filter, const char *notfilter)
653 {
654 return get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX);
655 }
656
657 /**
658 * register_fprobe() - Register fprobe to ftrace by pattern.
659 * @fp: A fprobe data structure to be registered.
660 * @filter: A wildcard pattern of probed symbols.
661 * @notfilter: A wildcard pattern of NOT probed symbols.
662 *
663 * Register @fp to ftrace for enabling the probe on the symbols matched to @filter.
664 * If @notfilter is not NULL, the symbols matched the @notfilter are not probed.
665 *
666 * Return 0 if @fp is registered successfully, -errno if not.
667 */
register_fprobe(struct fprobe * fp,const char * filter,const char * notfilter)668 int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter)
669 {
670 unsigned long *addrs __free(kfree) = NULL;
671 struct module **mods __free(kfree) = NULL;
672 int ret, num;
673
674 if (!fp || !filter)
675 return -EINVAL;
676
677 num = get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX);
678 if (num < 0)
679 return num;
680
681 addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL);
682 if (!addrs)
683 return -ENOMEM;
684
685 mods = kcalloc(num, sizeof(*mods), GFP_KERNEL);
686 if (!mods)
687 return -ENOMEM;
688
689 ret = get_ips_from_filter(filter, notfilter, addrs, mods, num);
690 if (ret < 0)
691 return ret;
692
693 ret = register_fprobe_ips(fp, addrs, ret);
694
695 for (int i = 0; i < num; i++) {
696 if (mods[i])
697 module_put(mods[i]);
698 }
699 return ret;
700 }
701 EXPORT_SYMBOL_GPL(register_fprobe);
702
703 /**
704 * register_fprobe_ips() - Register fprobe to ftrace by address.
705 * @fp: A fprobe data structure to be registered.
706 * @addrs: An array of target function address.
707 * @num: The number of entries of @addrs.
708 *
709 * Register @fp to ftrace for enabling the probe on the address given by @addrs.
710 * The @addrs must be the addresses of ftrace location address, which may be
711 * the symbol address + arch-dependent offset.
712 * If you unsure what this mean, please use other registration functions.
713 *
714 * Return 0 if @fp is registered successfully, -errno if not.
715 */
register_fprobe_ips(struct fprobe * fp,unsigned long * addrs,int num)716 int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
717 {
718 struct fprobe_hlist *hlist_array;
719 int ret, i;
720
721 ret = fprobe_init(fp, addrs, num);
722 if (ret)
723 return ret;
724
725 mutex_lock(&fprobe_mutex);
726
727 hlist_array = fp->hlist_array;
728 ret = fprobe_graph_add_ips(addrs, num);
729 if (!ret) {
730 add_fprobe_hash(fp);
731 for (i = 0; i < hlist_array->size; i++)
732 insert_fprobe_node(&hlist_array->array[i]);
733 }
734 mutex_unlock(&fprobe_mutex);
735
736 if (ret)
737 fprobe_fail_cleanup(fp);
738
739 return ret;
740 }
741 EXPORT_SYMBOL_GPL(register_fprobe_ips);
742
743 /**
744 * register_fprobe_syms() - Register fprobe to ftrace by symbols.
745 * @fp: A fprobe data structure to be registered.
746 * @syms: An array of target symbols.
747 * @num: The number of entries of @syms.
748 *
749 * Register @fp to the symbols given by @syms array. This will be useful if
750 * you are sure the symbols exist in the kernel.
751 *
752 * Return 0 if @fp is registered successfully, -errno if not.
753 */
register_fprobe_syms(struct fprobe * fp,const char ** syms,int num)754 int register_fprobe_syms(struct fprobe *fp, const char **syms, int num)
755 {
756 unsigned long *addrs;
757 int ret;
758
759 if (!fp || !syms || num <= 0)
760 return -EINVAL;
761
762 addrs = get_ftrace_locations(syms, num);
763 if (IS_ERR(addrs))
764 return PTR_ERR(addrs);
765
766 ret = register_fprobe_ips(fp, addrs, num);
767
768 kfree(addrs);
769
770 return ret;
771 }
772 EXPORT_SYMBOL_GPL(register_fprobe_syms);
773
fprobe_is_registered(struct fprobe * fp)774 bool fprobe_is_registered(struct fprobe *fp)
775 {
776 if (!fp || !fp->hlist_array)
777 return false;
778 return true;
779 }
780
781 /**
782 * unregister_fprobe() - Unregister fprobe.
783 * @fp: A fprobe data structure to be unregistered.
784 *
785 * Unregister fprobe (and remove ftrace hooks from the function entries).
786 *
787 * Return 0 if @fp is unregistered successfully, -errno if not.
788 */
unregister_fprobe(struct fprobe * fp)789 int unregister_fprobe(struct fprobe *fp)
790 {
791 struct fprobe_hlist *hlist_array;
792 unsigned long *addrs = NULL;
793 int ret = 0, i, count;
794
795 mutex_lock(&fprobe_mutex);
796 if (!fp || !is_fprobe_still_exist(fp)) {
797 ret = -EINVAL;
798 goto out;
799 }
800
801 hlist_array = fp->hlist_array;
802 addrs = kcalloc(hlist_array->size, sizeof(unsigned long), GFP_KERNEL);
803 if (!addrs) {
804 ret = -ENOMEM; /* TODO: Fallback to one-by-one loop */
805 goto out;
806 }
807
808 /* Remove non-synonim ips from table and hash */
809 count = 0;
810 for (i = 0; i < hlist_array->size; i++) {
811 if (!delete_fprobe_node(&hlist_array->array[i]))
812 addrs[count++] = hlist_array->array[i].addr;
813 }
814 del_fprobe_hash(fp);
815
816 fprobe_graph_remove_ips(addrs, count);
817
818 kfree_rcu(hlist_array, rcu);
819 fp->hlist_array = NULL;
820
821 out:
822 mutex_unlock(&fprobe_mutex);
823
824 kfree(addrs);
825 return ret;
826 }
827 EXPORT_SYMBOL_GPL(unregister_fprobe);
828