1 /* SPDX-License-Identifier: GPL-2.0 */
2
3 #ifndef _KERNEL_PRINTK_RINGBUFFER_H
4 #define _KERNEL_PRINTK_RINGBUFFER_H
5
6 #include <linux/atomic.h>
7 #include <linux/bits.h>
8 #include <linux/dev_printk.h>
9 #include <linux/stddef.h>
10 #include <linux/types.h>
11
12 /*
13 * Meta information about each stored message.
14 *
15 * All fields are set by the printk code except for @seq, which is
16 * set by the ringbuffer code.
17 */
18 struct printk_info {
19 u64 seq; /* sequence number */
20 u64 ts_nsec; /* timestamp in nanoseconds */
21 u16 text_len; /* length of text message */
22 u8 facility; /* syslog facility */
23 u8 flags:5; /* internal record flags */
24 u8 level:3; /* syslog level */
25 u32 caller_id; /* thread id or processor id */
26 #ifdef CONFIG_PRINTK_EXECUTION_CTX
27 u32 caller_id2; /* caller_id complement */
28 /* name of the task that generated the message */
29 char comm[TASK_COMM_LEN];
30 #endif
31
32 struct dev_printk_info dev_info;
33 };
34
35 /*
36 * A structure providing the buffers, used by writers and readers.
37 *
38 * Writers:
39 * Using prb_rec_init_wr(), a writer sets @text_buf_size before calling
40 * prb_reserve(). On success, prb_reserve() sets @info and @text_buf to
41 * buffers reserved for that writer.
42 *
43 * Readers:
44 * Using prb_rec_init_rd(), a reader sets all fields before calling
45 * prb_read_valid(). Note that the reader provides the @info and @text_buf,
46 * buffers. On success, the struct pointed to by @info will be filled and
47 * the char array pointed to by @text_buf will be filled with text data.
48 */
49 struct printk_record {
50 struct printk_info *info;
51 char *text_buf;
52 unsigned int text_buf_size;
53 };
54
55 /* Specifies the logical position and span of a data block. */
56 struct prb_data_blk_lpos {
57 unsigned long begin;
58 unsigned long next;
59 };
60
61 /*
62 * A descriptor: the complete meta-data for a record.
63 *
64 * @state_var: A bitwise combination of descriptor ID and descriptor state.
65 */
66 struct prb_desc {
67 atomic_long_t state_var;
68 struct prb_data_blk_lpos text_blk_lpos;
69 };
70
71 /* A ringbuffer of "ID + data" elements. */
72 struct prb_data_ring {
73 unsigned int size_bits;
74 char *data;
75 atomic_long_t head_lpos;
76 atomic_long_t tail_lpos;
77 };
78
79 /* A ringbuffer of "struct prb_desc" elements. */
80 struct prb_desc_ring {
81 unsigned int count_bits;
82 struct prb_desc *descs;
83 struct printk_info *infos;
84 atomic_long_t head_id;
85 atomic_long_t tail_id;
86 atomic_long_t last_finalized_seq;
87 };
88
89 /*
90 * The high level structure representing the printk ringbuffer.
91 *
92 * @fail: Count of failed prb_reserve() calls where not even a data-less
93 * record was created.
94 */
95 struct printk_ringbuffer {
96 struct prb_desc_ring desc_ring;
97 struct prb_data_ring text_data_ring;
98 atomic_long_t fail;
99 };
100
101 /*
102 * Used by writers as a reserve/commit handle.
103 *
104 * @rb: Ringbuffer where the entry is reserved.
105 * @irqflags: Saved irq flags to restore on entry commit.
106 * @id: ID of the reserved descriptor.
107 * @text_space: Total occupied buffer space in the text data ring, including
108 * ID, alignment padding, and wrapping data blocks.
109 *
110 * This structure is an opaque handle for writers. Its contents are only
111 * to be used by the ringbuffer implementation.
112 */
113 struct prb_reserved_entry {
114 struct printk_ringbuffer *rb;
115 unsigned long irqflags;
116 unsigned long id;
117 unsigned int text_space;
118 };
119
120 /* The possible responses of a descriptor state-query. */
121 enum desc_state {
122 desc_miss = -1, /* ID mismatch (pseudo state) */
123 desc_reserved = 0x0, /* reserved, in use by writer */
124 desc_committed = 0x1, /* committed by writer, could get reopened */
125 desc_finalized = 0x2, /* committed, no further modification allowed */
126 desc_reusable = 0x3, /* free, not yet used by any writer */
127 };
128
129 #define _DATA_SIZE(sz_bits) (1UL << (sz_bits))
130 #define _DESCS_COUNT(ct_bits) (1U << (ct_bits))
131 #define DESC_SV_BITS BITS_PER_LONG
132 #define DESC_FLAGS_SHIFT (DESC_SV_BITS - 2)
133 #define DESC_FLAGS_MASK (3UL << DESC_FLAGS_SHIFT)
134 #define DESC_STATE(sv) (3UL & (sv >> DESC_FLAGS_SHIFT))
135 #define DESC_SV(id, state) (((unsigned long)state << DESC_FLAGS_SHIFT) | id)
136 #define DESC_ID_MASK (~DESC_FLAGS_MASK)
137 #define DESC_ID(sv) ((sv) & DESC_ID_MASK)
138
139 /*
140 * Special data block logical position values (for fields of
141 * @prb_desc.text_blk_lpos).
142 *
143 * - Bit0 is used to identify if the record has no data block. (Implemented in
144 * the LPOS_DATALESS() macro.)
145 *
146 * - Bit1 specifies the reason for not having a data block.
147 *
148 * These special values could never be real lpos values because of the
149 * meta data and alignment padding of data blocks. (See to_blk_size() for
150 * details.)
151 */
152 #define FAILED_LPOS 0x1
153 #define EMPTY_LINE_LPOS 0x3
154
155 #define FAILED_BLK_LPOS \
156 { \
157 .begin = FAILED_LPOS, \
158 .next = FAILED_LPOS, \
159 }
160
161 /*
162 * Descriptor Bootstrap
163 *
164 * The descriptor array is minimally initialized to allow immediate usage
165 * by readers and writers. The requirements that the descriptor array
166 * initialization must satisfy:
167 *
168 * Req1
169 * The tail must point to an existing (committed or reusable) descriptor.
170 * This is required by the implementation of prb_first_seq().
171 *
172 * Req2
173 * Readers must see that the ringbuffer is initially empty.
174 *
175 * Req3
176 * The first record reserved by a writer is assigned sequence number 0.
177 *
178 * To satisfy Req1, the tail initially points to a descriptor that is
179 * minimally initialized (having no data block, i.e. data-less with the
180 * data block's lpos @begin and @next values set to FAILED_LPOS).
181 *
182 * To satisfy Req2, the initial tail descriptor is initialized to the
183 * reusable state. Readers recognize reusable descriptors as existing
184 * records, but skip over them.
185 *
186 * To satisfy Req3, the last descriptor in the array is used as the initial
187 * head (and tail) descriptor. This allows the first record reserved by a
188 * writer (head + 1) to be the first descriptor in the array. (Only the first
189 * descriptor in the array could have a valid sequence number of 0.)
190 *
191 * The first time a descriptor is reserved, it is assigned a sequence number
192 * with the value of the array index. A "first time reserved" descriptor can
193 * be recognized because it has a sequence number of 0 but does not have an
194 * index of 0. (Only the first descriptor in the array could have a valid
195 * sequence number of 0.) After the first reservation, all future reservations
196 * (recycling) simply involve incrementing the sequence number by the array
197 * count.
198 *
199 * Hack #1
200 * Only the first descriptor in the array is allowed to have the sequence
201 * number 0. In this case it is not possible to recognize if it is being
202 * reserved the first time (set to index value) or has been reserved
203 * previously (increment by the array count). This is handled by _always_
204 * incrementing the sequence number by the array count when reserving the
205 * first descriptor in the array. In order to satisfy Req3, the sequence
206 * number of the first descriptor in the array is initialized to minus
207 * the array count. Then, upon the first reservation, it is incremented
208 * to 0, thus satisfying Req3.
209 *
210 * Hack #2
211 * prb_first_seq() can be called at any time by readers to retrieve the
212 * sequence number of the tail descriptor. However, due to Req2 and Req3,
213 * initially there are no records to report the sequence number of
214 * (sequence numbers are u64 and there is nothing less than 0). To handle
215 * this, the sequence number of the initial tail descriptor is initialized
216 * to 0. Technically this is incorrect, because there is no record with
217 * sequence number 0 (yet) and the tail descriptor is not the first
218 * descriptor in the array. But it allows prb_read_valid() to correctly
219 * report the existence of a record for _any_ given sequence number at all
220 * times. Bootstrapping is complete when the tail is pushed the first
221 * time, thus finally pointing to the first descriptor reserved by a
222 * writer, which has the assigned sequence number 0.
223 */
224
225 /*
226 * Initiating Logical Value Overflows
227 *
228 * Both logical position (lpos) and ID values can be mapped to array indexes
229 * but may experience overflows during the lifetime of the system. To ensure
230 * that printk_ringbuffer can handle the overflows for these types, initial
231 * values are chosen that map to the correct initial array indexes, but will
232 * result in overflows soon.
233 *
234 * BLK0_LPOS
235 * The initial @head_lpos and @tail_lpos for data rings. It is at index
236 * 0 and the lpos value is such that it will overflow on the first wrap.
237 *
238 * DESC0_ID
239 * The initial @head_id and @tail_id for the desc ring. It is at the last
240 * index of the descriptor array (see Req3 above) and the ID value is such
241 * that it will overflow on the second wrap.
242 */
243 #define BLK0_LPOS(sz_bits) (-(_DATA_SIZE(sz_bits)))
244 #define DESC0_ID(ct_bits) DESC_ID(-(_DESCS_COUNT(ct_bits) + 1))
245 #define DESC0_SV(ct_bits) DESC_SV(DESC0_ID(ct_bits), desc_reusable)
246
247 /*
248 * Define a ringbuffer with an external text data buffer. The same as
249 * DEFINE_PRINTKRB() but requires specifying an external buffer for the
250 * text data.
251 *
252 * Note: The specified external buffer must be of the size:
253 * 2 ^ (descbits + avgtextbits)
254 */
255 #define _DEFINE_PRINTKRB(name, descbits, avgtextbits, text_buf) \
256 static struct prb_desc _##name##_descs[_DESCS_COUNT(descbits)] = { \
257 /* the initial head and tail */ \
258 [_DESCS_COUNT(descbits) - 1] = { \
259 /* reusable */ \
260 .state_var = ATOMIC_INIT(DESC0_SV(descbits)), \
261 /* no associated data block */ \
262 .text_blk_lpos = FAILED_BLK_LPOS, \
263 }, \
264 }; \
265 static struct printk_info _##name##_infos[_DESCS_COUNT(descbits)] = { \
266 /* this will be the first record reserved by a writer */ \
267 [0] = { \
268 /* will be incremented to 0 on the first reservation */ \
269 .seq = -(u64)_DESCS_COUNT(descbits), \
270 }, \
271 /* the initial head and tail */ \
272 [_DESCS_COUNT(descbits) - 1] = { \
273 /* reports the first seq value during the bootstrap phase */ \
274 .seq = 0, \
275 }, \
276 }; \
277 static struct printk_ringbuffer name = { \
278 .desc_ring = { \
279 .count_bits = descbits, \
280 .descs = &_##name##_descs[0], \
281 .infos = &_##name##_infos[0], \
282 .head_id = ATOMIC_INIT(DESC0_ID(descbits)), \
283 .tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \
284 .last_finalized_seq = ATOMIC_INIT(0), \
285 }, \
286 .text_data_ring = { \
287 .size_bits = (avgtextbits) + (descbits), \
288 .data = text_buf, \
289 .head_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \
290 .tail_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \
291 }, \
292 .fail = ATOMIC_LONG_INIT(0), \
293 }
294
295 /**
296 * DEFINE_PRINTKRB() - Define a ringbuffer.
297 *
298 * @name: The name of the ringbuffer variable.
299 * @descbits: The number of descriptors as a power-of-2 value.
300 * @avgtextbits: The average text data size per record as a power-of-2 value.
301 *
302 * This is a macro for defining a ringbuffer and all internal structures
303 * such that it is ready for immediate use. See _DEFINE_PRINTKRB() for a
304 * variant where the text data buffer can be specified externally.
305 */
306 #define DEFINE_PRINTKRB(name, descbits, avgtextbits) \
307 static char _##name##_text[1U << ((avgtextbits) + (descbits))] \
308 __aligned(__alignof__(unsigned long)); \
309 _DEFINE_PRINTKRB(name, descbits, avgtextbits, &_##name##_text[0])
310
311 /* Writer Interface */
312
313 /**
314 * prb_rec_init_wr() - Initialize a buffer for writing records.
315 *
316 * @r: The record to initialize.
317 * @text_buf_size: The needed text buffer size.
318 */
prb_rec_init_wr(struct printk_record * r,unsigned int text_buf_size)319 static inline void prb_rec_init_wr(struct printk_record *r,
320 unsigned int text_buf_size)
321 {
322 r->info = NULL;
323 r->text_buf = NULL;
324 r->text_buf_size = text_buf_size;
325 }
326
327 bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
328 struct printk_record *r);
329 bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
330 struct printk_record *r, u32 caller_id, unsigned int max_size);
331 void prb_commit(struct prb_reserved_entry *e);
332 void prb_final_commit(struct prb_reserved_entry *e);
333
334 void prb_init(struct printk_ringbuffer *rb,
335 char *text_buf, unsigned int text_buf_size,
336 struct prb_desc *descs, unsigned int descs_count_bits,
337 struct printk_info *infos);
338 unsigned int prb_record_text_space(struct prb_reserved_entry *e);
339
340 /* Reader Interface */
341
342 /**
343 * prb_rec_init_rd() - Initialize a buffer for reading records.
344 *
345 * @r: The record to initialize.
346 * @info: A buffer to store record meta-data.
347 * @text_buf: A buffer to store text data.
348 * @text_buf_size: The size of @text_buf.
349 *
350 * Initialize all the fields that a reader is interested in. All arguments
351 * (except @r) are optional. Only record data for arguments that are
352 * non-NULL or non-zero will be read.
353 */
prb_rec_init_rd(struct printk_record * r,struct printk_info * info,char * text_buf,unsigned int text_buf_size)354 static inline void prb_rec_init_rd(struct printk_record *r,
355 struct printk_info *info,
356 char *text_buf, unsigned int text_buf_size)
357 {
358 r->info = info;
359 r->text_buf = text_buf;
360 r->text_buf_size = text_buf_size;
361 }
362
363 /**
364 * prb_for_each_record() - Iterate over the records of a ringbuffer.
365 *
366 * @from: The sequence number to begin with.
367 * @rb: The ringbuffer to iterate over.
368 * @s: A u64 to store the sequence number on each iteration.
369 * @r: A printk_record to store the record on each iteration.
370 *
371 * This is a macro for conveniently iterating over a ringbuffer.
372 * Note that @s may not be the sequence number of the record on each
373 * iteration. For the sequence number, @r->info->seq should be checked.
374 *
375 * Context: Any context.
376 */
377 #define prb_for_each_record(from, rb, s, r) \
378 for ((s) = from; prb_read_valid(rb, s, r); (s) = (r)->info->seq + 1)
379
380 /**
381 * prb_for_each_info() - Iterate over the meta data of a ringbuffer.
382 *
383 * @from: The sequence number to begin with.
384 * @rb: The ringbuffer to iterate over.
385 * @s: A u64 to store the sequence number on each iteration.
386 * @i: A printk_info to store the record meta data on each iteration.
387 * @lc: An unsigned int to store the text line count of each record.
388 *
389 * This is a macro for conveniently iterating over a ringbuffer.
390 * Note that @s may not be the sequence number of the record on each
391 * iteration. For the sequence number, @r->info->seq should be checked.
392 *
393 * Context: Any context.
394 */
395 #define prb_for_each_info(from, rb, s, i, lc) \
396 for ((s) = from; prb_read_valid_info(rb, s, i, lc); (s) = (i)->seq + 1)
397
398 bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
399 struct printk_record *r);
400 bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
401 struct printk_info *info, unsigned int *line_count);
402
403 u64 prb_first_seq(struct printk_ringbuffer *rb);
404 u64 prb_first_valid_seq(struct printk_ringbuffer *rb);
405 u64 prb_next_seq(struct printk_ringbuffer *rb);
406 u64 prb_next_reserve_seq(struct printk_ringbuffer *rb);
407
408 #ifdef CONFIG_64BIT
409
410 #define __u64seq_to_ulseq(u64seq) (u64seq)
411 #define __ulseq_to_u64seq(rb, ulseq) (ulseq)
412 #define ULSEQ_MAX(rb) (-1)
413
414 #else /* CONFIG_64BIT */
415
416 #define __u64seq_to_ulseq(u64seq) ((u32)u64seq)
417 #define ULSEQ_MAX(rb) __u64seq_to_ulseq(prb_first_seq(rb) + 0x80000000UL)
418
__ulseq_to_u64seq(struct printk_ringbuffer * rb,u32 ulseq)419 static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq)
420 {
421 u64 rb_first_seq = prb_first_seq(rb);
422 u64 seq;
423
424 /*
425 * The provided sequence is only the lower 32 bits of the ringbuffer
426 * sequence. It needs to be expanded to 64bit. Get the first sequence
427 * number from the ringbuffer and fold it.
428 *
429 * Having a 32bit representation in the console is sufficient.
430 * If a console ever gets more than 2^31 records behind
431 * the ringbuffer then this is the least of the problems.
432 *
433 * Also the access to the ring buffer is always safe.
434 */
435 seq = rb_first_seq - (s32)((u32)rb_first_seq - ulseq);
436
437 return seq;
438 }
439
440 #endif /* CONFIG_64BIT */
441
442 #endif /* _KERNEL_PRINTK_RINGBUFFER_H */
443