1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2026 Intel Corporation */
3 #include <linux/errno.h>
4 #include <linux/printk.h>
5 #include <linux/string.h>
6 #include <linux/unaligned.h>
7 #include <linux/zstd.h>
8
9 #include "qat_comp_zstd_utils.h"
10
11 #define ML_BITS 4
12 #define ML_MASK ((1U << ML_BITS) - 1)
13 #define RUN_BITS (8 - ML_BITS)
14 #define RUN_MASK ((1U << RUN_BITS) - 1)
15 #define LZ4S_MINMATCH 2
16
17 /*
18 * ZSTD blocks can decompress to at most min(windowSize, 128KB) bytes.
19 * Insert explicit block delimiters to keep blocks within this limit.
20 */
21 #define QAT_ZSTD_BLOCK_MAX ZSTD_BLOCKSIZE_MAX
22
emit_delimiter(ZSTD_Sequence * out_seqs,size_t * seqs_idx,size_t out_seqs_capacity,unsigned int lz4s_buff_size)23 static int emit_delimiter(ZSTD_Sequence *out_seqs, size_t *seqs_idx,
24 size_t out_seqs_capacity, unsigned int lz4s_buff_size)
25 {
26 if (*seqs_idx >= out_seqs_capacity - 1) {
27 pr_debug("QAT ZSTD: sequence overflow (seqs_idx:%zu, capacity:%zu, lz4s_size:%u)\n",
28 *seqs_idx, out_seqs_capacity, lz4s_buff_size);
29 return -EOVERFLOW;
30 }
31
32 out_seqs[*seqs_idx].offset = 0;
33 out_seqs[*seqs_idx].litLength = 0;
34 out_seqs[*seqs_idx].matchLength = 0;
35 (*seqs_idx)++;
36
37 return 0;
38 }
39
qat_alg_dec_lz4s(ZSTD_Sequence * out_seqs,size_t out_seqs_capacity,unsigned char * lz4s_buff,unsigned int lz4s_buff_size,unsigned char * literals,unsigned int * lit_len)40 int qat_alg_dec_lz4s(ZSTD_Sequence *out_seqs, size_t out_seqs_capacity,
41 unsigned char *lz4s_buff, unsigned int lz4s_buff_size,
42 unsigned char *literals, unsigned int *lit_len)
43 {
44 unsigned char *end_ip = lz4s_buff + lz4s_buff_size;
45 unsigned char *start, *dest, *dest_end;
46 unsigned int hist_literal_len = 0;
47 unsigned char *ip = lz4s_buff;
48 size_t block_decomp_size = 0;
49 size_t seqs_idx = 0;
50 int ret;
51
52 *lit_len = 0;
53
54 if (!lz4s_buff_size)
55 return 0;
56
57 while (ip < end_ip) {
58 size_t literal_len = 0, match_len = 0;
59 const unsigned int token = *ip++;
60 size_t length = 0;
61 size_t offset = 0;
62
63 /* Get literal length */
64 length = token >> ML_BITS;
65 if (length == RUN_MASK) {
66 unsigned int s;
67
68 do {
69 s = *ip++;
70 length += s;
71 } while (s == 255);
72 }
73
74 literal_len = length;
75
76 start = ip;
77 dest = literals;
78 dest_end = literals + length;
79
80 do {
81 memcpy(dest, start, QAT_ZSTD_LIT_COPY_LEN);
82 dest += QAT_ZSTD_LIT_COPY_LEN;
83 start += QAT_ZSTD_LIT_COPY_LEN;
84 } while (dest < dest_end);
85
86 literals += length;
87 *lit_len += length;
88
89 ip += length;
90 if (ip == end_ip) {
91 literal_len += hist_literal_len;
92 /*
93 * If adding trailing literals would overflow the
94 * current block, close it first.
95 */
96 if (block_decomp_size + literal_len > QAT_ZSTD_BLOCK_MAX) {
97 ret = emit_delimiter(out_seqs, &seqs_idx,
98 out_seqs_capacity,
99 lz4s_buff_size);
100 if (ret)
101 return ret;
102 }
103 out_seqs[seqs_idx].litLength = literal_len;
104 out_seqs[seqs_idx].offset = offset;
105 out_seqs[seqs_idx].matchLength = match_len;
106 break;
107 }
108
109 offset = get_unaligned_le16(ip);
110 ip += 2;
111
112 length = token & ML_MASK;
113 if (length == ML_MASK) {
114 unsigned int s;
115
116 do {
117 s = *ip++;
118 length += s;
119 } while (s == 255);
120 }
121 if (length != 0) {
122 length += LZ4S_MINMATCH;
123 match_len = (unsigned short)length;
124 literal_len += hist_literal_len;
125
126 /*
127 * If this sequence would push the current block past
128 * the ZSTD maximum, close the block first.
129 */
130 if (block_decomp_size + literal_len + match_len > QAT_ZSTD_BLOCK_MAX) {
131 ret = emit_delimiter(out_seqs, &seqs_idx,
132 out_seqs_capacity,
133 lz4s_buff_size);
134 if (ret)
135 return ret;
136
137 block_decomp_size = 0;
138 }
139
140 out_seqs[seqs_idx].offset = offset;
141 out_seqs[seqs_idx].litLength = literal_len;
142 out_seqs[seqs_idx].matchLength = match_len;
143 hist_literal_len = 0;
144 seqs_idx++;
145 if (seqs_idx >= out_seqs_capacity - 1) {
146 pr_debug("QAT ZSTD: sequence overflow (seqs_idx:%zu, capacity:%zu, lz4s_size:%u)\n",
147 seqs_idx, out_seqs_capacity, lz4s_buff_size);
148 return -EOVERFLOW;
149 }
150
151 block_decomp_size += literal_len + match_len;
152 } else {
153 if (literal_len > 0) {
154 /*
155 * When match length is 0, the literal length needs
156 * to be temporarily stored and processed together
157 * with the next data block.
158 */
159 hist_literal_len += literal_len;
160 }
161 }
162 }
163
164 return seqs_idx + 1;
165 }
166