1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
2 /*
3  * Copyright (c) Meta Platforms, Inc. and affiliates.
4  * All rights reserved.
5  *
6  * This source code is licensed under both the BSD-style license (found in the
7  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8  * in the COPYING file in the root directory of this source tree).
9  * You may select, at your option, one of the above-listed licenses.
10  */
11 
12 /* zstd_ddict.c :
13  * concentrates all logic that needs to know the internals of ZSTD_DDict object */
14 
15 /*-*******************************************************
16 *  Dependencies
17 *********************************************************/
18 #include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customFree */
19 #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
20 #include "../common/cpu.h"         /* bmi2 */
21 #include "../common/mem.h"         /* low level memory routines */
22 #define FSE_STATIC_LINKING_ONLY
23 #include "../common/fse.h"
24 #include "../common/huf.h"
25 #include "zstd_decompress_internal.h"
26 #include "zstd_ddict.h"
27 
28 
29 
30 
31 /*-*******************************************************
32 *  Types
33 *********************************************************/
34 struct ZSTD_DDict_s {
35     void* dictBuffer;
36     const void* dictContent;
37     size_t dictSize;
38     ZSTD_entropyDTables_t entropy;
39     U32 dictID;
40     U32 entropyPresent;
41     ZSTD_customMem cMem;
42 };  /* typedef'd to ZSTD_DDict within "zstd.h" */
43 
44 const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
45 {
46     assert(ddict != NULL);
47     return ddict->dictContent;
48 }
49 
50 size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
51 {
52     assert(ddict != NULL);
53     return ddict->dictSize;
54 }
55 
56 void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
57 {
58     DEBUGLOG(4, "ZSTD_copyDDictParameters");
59     assert(dctx != NULL);
60     assert(ddict != NULL);
61     dctx->dictID = ddict->dictID;
62     dctx->prefixStart = ddict->dictContent;
63     dctx->virtualStart = ddict->dictContent;
64     dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
65     dctx->previousDstEnd = dctx->dictEnd;
66 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
67     dctx->dictContentBeginForFuzzing = dctx->prefixStart;
68     dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
69 #endif
70     if (ddict->entropyPresent) {
71         dctx->litEntropy = 1;
72         dctx->fseEntropy = 1;
73         dctx->LLTptr = ddict->entropy.LLTable;
74         dctx->MLTptr = ddict->entropy.MLTable;
75         dctx->OFTptr = ddict->entropy.OFTable;
76         dctx->HUFptr = ddict->entropy.hufTable;
77         dctx->entropy.rep[0] = ddict->entropy.rep[0];
78         dctx->entropy.rep[1] = ddict->entropy.rep[1];
79         dctx->entropy.rep[2] = ddict->entropy.rep[2];
80     } else {
81         dctx->litEntropy = 0;
82         dctx->fseEntropy = 0;
83     }
84 }
85 
86 
87 static size_t
88 ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
89                            ZSTD_dictContentType_e dictContentType)
90 {
91     ddict->dictID = 0;
92     ddict->entropyPresent = 0;
93     if (dictContentType == ZSTD_dct_rawContent) return 0;
94 
95     if (ddict->dictSize < 8) {
96         if (dictContentType == ZSTD_dct_fullDict)
97             return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
98         return 0;   /* pure content mode */
99     }
100     {   U32 const magic = MEM_readLE32(ddict->dictContent);
101         if (magic != ZSTD_MAGIC_DICTIONARY) {
102             if (dictContentType == ZSTD_dct_fullDict)
103                 return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
104             return 0;   /* pure content mode */
105         }
106     }
107     ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
108 
109     /* load entropy tables */
110     RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
111             &ddict->entropy, ddict->dictContent, ddict->dictSize)),
112         dictionary_corrupted, "");
113     ddict->entropyPresent = 1;
114     return 0;
115 }
116 
117 
118 static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
119                                       const void* dict, size_t dictSize,
120                                       ZSTD_dictLoadMethod_e dictLoadMethod,
121                                       ZSTD_dictContentType_e dictContentType)
122 {
123     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
124         ddict->dictBuffer = NULL;
125         ddict->dictContent = dict;
126         if (!dict) dictSize = 0;
127     } else {
128         void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
129         ddict->dictBuffer = internalBuffer;
130         ddict->dictContent = internalBuffer;
131         if (!internalBuffer) return ERROR(memory_allocation);
132         ZSTD_memcpy(internalBuffer, dict, dictSize);
133     }
134     ddict->dictSize = dictSize;
135     ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001);  /* cover both little and big endian */
136 
137     /* parse dictionary content */
138     FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
139 
140     return 0;
141 }
142 
143 ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
144                                       ZSTD_dictLoadMethod_e dictLoadMethod,
145                                       ZSTD_dictContentType_e dictContentType,
146                                       ZSTD_customMem customMem)
147 {
148     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
149 
150     {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
151         if (ddict == NULL) return NULL;
152         ddict->cMem = customMem;
153         {   size_t const initResult = ZSTD_initDDict_internal(ddict,
154                                             dict, dictSize,
155                                             dictLoadMethod, dictContentType);
156             if (ZSTD_isError(initResult)) {
157                 ZSTD_freeDDict(ddict);
158                 return NULL;
159         }   }
160         return ddict;
161     }
162 }
163 
164 /*! ZSTD_createDDict() :
165 *   Create a digested dictionary, to start decompression without startup delay.
166 *   `dict` content is copied inside DDict.
167 *   Consequently, `dict` can be released after `ZSTD_DDict` creation */
168 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
169 {
170     ZSTD_customMem const allocator = { NULL, NULL, NULL };
171     return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
172 }
173 
174 /*! ZSTD_createDDict_byReference() :
175  *  Create a digested dictionary, to start decompression without startup delay.
176  *  Dictionary content is simply referenced, it will be accessed during decompression.
177  *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
178 ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
179 {
180     ZSTD_customMem const allocator = { NULL, NULL, NULL };
181     return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
182 }
183 
184 
185 const ZSTD_DDict* ZSTD_initStaticDDict(
186                                 void* sBuffer, size_t sBufferSize,
187                                 const void* dict, size_t dictSize,
188                                 ZSTD_dictLoadMethod_e dictLoadMethod,
189                                 ZSTD_dictContentType_e dictContentType)
190 {
191     size_t const neededSpace = sizeof(ZSTD_DDict)
192                              + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
193     ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
194     assert(sBuffer != NULL);
195     assert(dict != NULL);
196     if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
197     if (sBufferSize < neededSpace) return NULL;
198     if (dictLoadMethod == ZSTD_dlm_byCopy) {
199         ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
200         dict = ddict+1;
201     }
202     if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
203                                               dict, dictSize,
204                                               ZSTD_dlm_byRef, dictContentType) ))
205         return NULL;
206     return ddict;
207 }
208 
209 
210 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
211 {
212     if (ddict==NULL) return 0;   /* support free on NULL */
213     {   ZSTD_customMem const cMem = ddict->cMem;
214         ZSTD_customFree(ddict->dictBuffer, cMem);
215         ZSTD_customFree(ddict, cMem);
216         return 0;
217     }
218 }
219 
220 /*! ZSTD_estimateDDictSize() :
221  *  Estimate amount of memory that will be needed to create a dictionary for decompression.
222  *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
223 size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
224 {
225     return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
226 }
227 
228 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
229 {
230     if (ddict==NULL) return 0;   /* support sizeof on NULL */
231     return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
232 }
233 
234 /*! ZSTD_getDictID_fromDDict() :
235  *  Provides the dictID of the dictionary loaded into `ddict`.
236  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
237  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
238 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
239 {
240     if (ddict==NULL) return 0;
241     return ddict->dictID;
242 }
243