xref: /linux/drivers/crypto/intel/qat/qat_common/qat_comp_zstd_utils.c (revision aec2f682d47c54ef434b2d440992626d80b1ebdc)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2026 Intel Corporation */
3 #include <linux/errno.h>
4 #include <linux/printk.h>
5 #include <linux/string.h>
6 #include <linux/unaligned.h>
7 #include <linux/zstd.h>
8 
9 #include "qat_comp_zstd_utils.h"
10 
11 #define ML_BITS		4
12 #define ML_MASK		((1U << ML_BITS) - 1)
13 #define RUN_BITS	(8 - ML_BITS)
14 #define RUN_MASK	((1U << RUN_BITS) - 1)
15 #define LZ4S_MINMATCH	2
16 
17 /*
18  * ZSTD blocks can decompress to at most min(windowSize, 128KB) bytes.
19  * Insert explicit block delimiters to keep blocks within this limit.
20  */
21 #define QAT_ZSTD_BLOCK_MAX	ZSTD_BLOCKSIZE_MAX
22 
emit_delimiter(ZSTD_Sequence * out_seqs,size_t * seqs_idx,size_t out_seqs_capacity,unsigned int lz4s_buff_size)23 static int emit_delimiter(ZSTD_Sequence *out_seqs, size_t *seqs_idx,
24 			  size_t out_seqs_capacity, unsigned int lz4s_buff_size)
25 {
26 	if (*seqs_idx >= out_seqs_capacity - 1) {
27 		pr_debug("QAT ZSTD: sequence overflow (seqs_idx:%zu, capacity:%zu, lz4s_size:%u)\n",
28 			 *seqs_idx, out_seqs_capacity, lz4s_buff_size);
29 		return -EOVERFLOW;
30 	}
31 
32 	out_seqs[*seqs_idx].offset = 0;
33 	out_seqs[*seqs_idx].litLength = 0;
34 	out_seqs[*seqs_idx].matchLength = 0;
35 	(*seqs_idx)++;
36 
37 	return 0;
38 }
39 
qat_alg_dec_lz4s(ZSTD_Sequence * out_seqs,size_t out_seqs_capacity,unsigned char * lz4s_buff,unsigned int lz4s_buff_size,unsigned char * literals,unsigned int * lit_len)40 int qat_alg_dec_lz4s(ZSTD_Sequence *out_seqs, size_t out_seqs_capacity,
41 		     unsigned char *lz4s_buff, unsigned int lz4s_buff_size,
42 		     unsigned char *literals, unsigned int *lit_len)
43 {
44 	unsigned char *end_ip = lz4s_buff + lz4s_buff_size;
45 	unsigned char *start, *dest, *dest_end;
46 	unsigned int hist_literal_len = 0;
47 	unsigned char *ip = lz4s_buff;
48 	size_t block_decomp_size = 0;
49 	size_t seqs_idx = 0;
50 	int ret;
51 
52 	*lit_len = 0;
53 
54 	if (!lz4s_buff_size)
55 		return 0;
56 
57 	while (ip < end_ip) {
58 		size_t literal_len = 0, match_len = 0;
59 		const unsigned int token = *ip++;
60 		size_t length = 0;
61 		size_t offset = 0;
62 
63 		/* Get literal length */
64 		length = token >> ML_BITS;
65 		if (length == RUN_MASK) {
66 			unsigned int s;
67 
68 			do {
69 				s = *ip++;
70 				length += s;
71 			} while (s == 255);
72 		}
73 
74 		literal_len = length;
75 
76 		start = ip;
77 		dest = literals;
78 		dest_end = literals + length;
79 
80 		do {
81 			memcpy(dest, start, QAT_ZSTD_LIT_COPY_LEN);
82 			dest += QAT_ZSTD_LIT_COPY_LEN;
83 			start += QAT_ZSTD_LIT_COPY_LEN;
84 		} while (dest < dest_end);
85 
86 		literals += length;
87 		*lit_len += length;
88 
89 		ip += length;
90 		if (ip == end_ip) {
91 			literal_len += hist_literal_len;
92 			/*
93 			 * If adding trailing literals would overflow the
94 			 * current block, close it first.
95 			 */
96 			if (block_decomp_size + literal_len > QAT_ZSTD_BLOCK_MAX) {
97 				ret = emit_delimiter(out_seqs, &seqs_idx,
98 						     out_seqs_capacity,
99 						     lz4s_buff_size);
100 				if (ret)
101 					return ret;
102 			}
103 			out_seqs[seqs_idx].litLength = literal_len;
104 			out_seqs[seqs_idx].offset = offset;
105 			out_seqs[seqs_idx].matchLength = match_len;
106 			break;
107 		}
108 
109 		offset = get_unaligned_le16(ip);
110 		ip += 2;
111 
112 		length = token & ML_MASK;
113 		if (length == ML_MASK) {
114 			unsigned int s;
115 
116 			do {
117 				s = *ip++;
118 				length += s;
119 			} while (s == 255);
120 		}
121 		if (length != 0) {
122 			length += LZ4S_MINMATCH;
123 			match_len = (unsigned short)length;
124 			literal_len += hist_literal_len;
125 
126 			/*
127 			 * If this sequence would push the current block past
128 			 * the ZSTD maximum, close the block first.
129 			 */
130 			if (block_decomp_size + literal_len + match_len > QAT_ZSTD_BLOCK_MAX) {
131 				ret = emit_delimiter(out_seqs, &seqs_idx,
132 						     out_seqs_capacity,
133 						     lz4s_buff_size);
134 				if (ret)
135 					return ret;
136 
137 				block_decomp_size = 0;
138 			}
139 
140 			out_seqs[seqs_idx].offset = offset;
141 			out_seqs[seqs_idx].litLength = literal_len;
142 			out_seqs[seqs_idx].matchLength = match_len;
143 			hist_literal_len = 0;
144 			seqs_idx++;
145 			if (seqs_idx >= out_seqs_capacity - 1) {
146 				pr_debug("QAT ZSTD: sequence overflow (seqs_idx:%zu, capacity:%zu, lz4s_size:%u)\n",
147 					 seqs_idx, out_seqs_capacity, lz4s_buff_size);
148 				return -EOVERFLOW;
149 			}
150 
151 			block_decomp_size += literal_len + match_len;
152 		} else {
153 			if (literal_len > 0) {
154 				/*
155 				 * When match length is 0, the literal length needs
156 				 * to be temporarily stored and processed together
157 				 * with the next data block.
158 				 */
159 				hist_literal_len += literal_len;
160 			}
161 		}
162 	}
163 
164 	return seqs_idx + 1;
165 }
166