1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * linux/fs/hfsplus/unicode.c
4 *
5 * Copyright (C) 2001
6 * Brad Boyer (flar@allandria.com)
7 * (C) 2003 Ardis Technologies <roman@ardistech.com>
8 *
9 * Handler routines for unicode strings
10 */
11
12 #include <linux/types.h>
13 #include <linux/nls.h>
14 #include "hfsplus_fs.h"
15 #include "hfsplus_raw.h"
16
17 /* Fold the case of a unicode char, given the 16 bit value */
18 /* Returns folded char, or 0 if ignorable */
case_fold(u16 c)19 static inline u16 case_fold(u16 c)
20 {
21 u16 tmp;
22
23 tmp = hfsplus_case_fold_table[c >> 8];
24 if (tmp)
25 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
26 else
27 tmp = c;
28 return tmp;
29 }
30
31 /* Compare unicode strings, return values like normal strcmp */
hfsplus_strcasecmp(const struct hfsplus_unistr * s1,const struct hfsplus_unistr * s2)32 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
33 const struct hfsplus_unistr *s2)
34 {
35 u16 len1, len2, c1, c2;
36 const hfsplus_unichr *p1, *p2;
37
38 len1 = be16_to_cpu(s1->length);
39 len2 = be16_to_cpu(s2->length);
40 p1 = s1->unicode;
41 p2 = s2->unicode;
42
43 while (1) {
44 c1 = c2 = 0;
45
46 while (len1 && !c1) {
47 c1 = case_fold(be16_to_cpu(*p1));
48 p1++;
49 len1--;
50 }
51 while (len2 && !c2) {
52 c2 = case_fold(be16_to_cpu(*p2));
53 p2++;
54 len2--;
55 }
56
57 if (c1 != c2)
58 return (c1 < c2) ? -1 : 1;
59 if (!c1 && !c2)
60 return 0;
61 }
62 }
63
64 /* Compare names as a sequence of 16-bit unsigned integers */
hfsplus_strcmp(const struct hfsplus_unistr * s1,const struct hfsplus_unistr * s2)65 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
66 const struct hfsplus_unistr *s2)
67 {
68 u16 len1, len2, c1, c2;
69 const hfsplus_unichr *p1, *p2;
70 int len;
71
72 len1 = be16_to_cpu(s1->length);
73 len2 = be16_to_cpu(s2->length);
74 p1 = s1->unicode;
75 p2 = s2->unicode;
76
77 for (len = min(len1, len2); len > 0; len--) {
78 c1 = be16_to_cpu(*p1);
79 c2 = be16_to_cpu(*p2);
80 if (c1 != c2)
81 return c1 < c2 ? -1 : 1;
82 p1++;
83 p2++;
84 }
85
86 return len1 < len2 ? -1 :
87 len1 > len2 ? 1 : 0;
88 }
89
90
91 #define Hangul_SBase 0xac00
92 #define Hangul_LBase 0x1100
93 #define Hangul_VBase 0x1161
94 #define Hangul_TBase 0x11a7
95 #define Hangul_SCount 11172
96 #define Hangul_LCount 19
97 #define Hangul_VCount 21
98 #define Hangul_TCount 28
99 #define Hangul_NCount (Hangul_VCount * Hangul_TCount)
100
101
hfsplus_compose_lookup(u16 * p,u16 cc)102 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
103 {
104 int i, s, e;
105
106 s = 1;
107 e = p[1];
108 if (!e || cc < p[s * 2] || cc > p[e * 2])
109 return NULL;
110 do {
111 i = (s + e) / 2;
112 if (cc > p[i * 2])
113 s = i + 1;
114 else if (cc < p[i * 2])
115 e = i - 1;
116 else
117 return hfsplus_compose_table + p[i * 2 + 1];
118 } while (s <= e);
119 return NULL;
120 }
121
hfsplus_uni2asc(struct super_block * sb,const struct hfsplus_unistr * ustr,char * astr,int * len_p)122 int hfsplus_uni2asc(struct super_block *sb,
123 const struct hfsplus_unistr *ustr,
124 char *astr, int *len_p)
125 {
126 const hfsplus_unichr *ip;
127 struct nls_table *nls = HFSPLUS_SB(sb)->nls;
128 u8 *op;
129 u16 cc, c0, c1;
130 u16 *ce1, *ce2;
131 int i, len, ustrlen, res, compose;
132
133 op = astr;
134 ip = ustr->unicode;
135
136 ustrlen = be16_to_cpu(ustr->length);
137 if (ustrlen > HFSPLUS_MAX_STRLEN) {
138 ustrlen = HFSPLUS_MAX_STRLEN;
139 pr_err("invalid length %u has been corrected to %d\n",
140 be16_to_cpu(ustr->length), ustrlen);
141 }
142
143 len = *len_p;
144 ce1 = NULL;
145 compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
146
147 while (ustrlen > 0) {
148 c0 = be16_to_cpu(*ip++);
149 ustrlen--;
150 /* search for single decomposed char */
151 if (likely(compose))
152 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
153 if (ce1)
154 cc = ce1[0];
155 else
156 cc = 0;
157 if (cc) {
158 /* start of a possibly decomposed Hangul char */
159 if (cc != 0xffff)
160 goto done;
161 if (!ustrlen)
162 goto same;
163 c1 = be16_to_cpu(*ip) - Hangul_VBase;
164 if (c1 < Hangul_VCount) {
165 /* compose the Hangul char */
166 cc = (c0 - Hangul_LBase) * Hangul_VCount;
167 cc = (cc + c1) * Hangul_TCount;
168 cc += Hangul_SBase;
169 ip++;
170 ustrlen--;
171 if (!ustrlen)
172 goto done;
173 c1 = be16_to_cpu(*ip) - Hangul_TBase;
174 if (c1 > 0 && c1 < Hangul_TCount) {
175 cc += c1;
176 ip++;
177 ustrlen--;
178 }
179 goto done;
180 }
181 }
182 while (1) {
183 /* main loop for common case of not composed chars */
184 if (!ustrlen)
185 goto same;
186 c1 = be16_to_cpu(*ip);
187 if (likely(compose))
188 ce1 = hfsplus_compose_lookup(
189 hfsplus_compose_table, c1);
190 if (ce1)
191 break;
192 switch (c0) {
193 case 0:
194 c0 = 0x2400;
195 break;
196 case '/':
197 c0 = ':';
198 break;
199 }
200 res = nls->uni2char(c0, op, len);
201 if (res < 0) {
202 if (res == -ENAMETOOLONG)
203 goto out;
204 *op = '?';
205 res = 1;
206 }
207 op += res;
208 len -= res;
209 c0 = c1;
210 ip++;
211 ustrlen--;
212 }
213 ce2 = hfsplus_compose_lookup(ce1, c0);
214 if (ce2) {
215 i = 1;
216 while (i < ustrlen) {
217 ce1 = hfsplus_compose_lookup(ce2,
218 be16_to_cpu(ip[i]));
219 if (!ce1)
220 break;
221 i++;
222 ce2 = ce1;
223 }
224 cc = ce2[0];
225 if (cc) {
226 ip += i;
227 ustrlen -= i;
228 goto done;
229 }
230 }
231 same:
232 switch (c0) {
233 case 0:
234 cc = 0x2400;
235 break;
236 case '/':
237 cc = ':';
238 break;
239 default:
240 cc = c0;
241 }
242 done:
243 res = nls->uni2char(cc, op, len);
244 if (res < 0) {
245 if (res == -ENAMETOOLONG)
246 goto out;
247 *op = '?';
248 res = 1;
249 }
250 op += res;
251 len -= res;
252 }
253 res = 0;
254 out:
255 *len_p = (char *)op - astr;
256 return res;
257 }
258
259 /*
260 * Convert one or more ASCII characters into a single unicode character.
261 * Returns the number of ASCII characters corresponding to the unicode char.
262 */
asc2unichar(struct super_block * sb,const char * astr,int len,wchar_t * uc)263 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
264 wchar_t *uc)
265 {
266 int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
267 if (size <= 0) {
268 *uc = '?';
269 size = 1;
270 }
271 switch (*uc) {
272 case 0x2400:
273 *uc = 0;
274 break;
275 case ':':
276 *uc = '/';
277 break;
278 }
279 return size;
280 }
281
282 /* Decomposes a non-Hangul unicode character. */
hfsplus_decompose_nonhangul(wchar_t uc,int * size)283 static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
284 {
285 int off;
286
287 off = hfsplus_decompose_table[(uc >> 12) & 0xf];
288 if (off == 0 || off == 0xffff)
289 return NULL;
290
291 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
292 if (!off)
293 return NULL;
294
295 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
296 if (!off)
297 return NULL;
298
299 off = hfsplus_decompose_table[off + (uc & 0xf)];
300 *size = off & 3;
301 if (*size == 0)
302 return NULL;
303 return hfsplus_decompose_table + (off / 4);
304 }
305
306 /*
307 * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
308 * precomposed Hangul, otherwise return the length of the decomposition.
309 *
310 * This function was adapted from sample code from the Unicode Standard
311 * Annex #15: Unicode Normalization Forms, version 3.2.0.
312 *
313 * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed
314 * under the Terms of Use in http://www.unicode.org/copyright.html.
315 */
hfsplus_try_decompose_hangul(wchar_t uc,u16 * result)316 static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
317 {
318 int index;
319 int l, v, t;
320
321 index = uc - Hangul_SBase;
322 if (index < 0 || index >= Hangul_SCount)
323 return 0;
324
325 l = Hangul_LBase + index / Hangul_NCount;
326 v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
327 t = Hangul_TBase + index % Hangul_TCount;
328
329 result[0] = l;
330 result[1] = v;
331 if (t != Hangul_TBase) {
332 result[2] = t;
333 return 3;
334 }
335 return 2;
336 }
337
338 /* Decomposes a single unicode character. */
decompose_unichar(wchar_t uc,int * size,u16 * hangul_buffer)339 static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
340 {
341 u16 *result;
342
343 /* Hangul is handled separately */
344 result = hangul_buffer;
345 *size = hfsplus_try_decompose_hangul(uc, result);
346 if (*size == 0)
347 result = hfsplus_decompose_nonhangul(uc, size);
348 return result;
349 }
350
hfsplus_asc2uni(struct super_block * sb,struct hfsplus_unistr * ustr,int max_unistr_len,const char * astr,int len)351 int hfsplus_asc2uni(struct super_block *sb,
352 struct hfsplus_unistr *ustr, int max_unistr_len,
353 const char *astr, int len)
354 {
355 int size, dsize, decompose;
356 u16 *dstr, outlen = 0;
357 wchar_t c;
358 u16 dhangul[3];
359
360 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
361 while (outlen < max_unistr_len && len > 0) {
362 size = asc2unichar(sb, astr, len, &c);
363
364 if (decompose)
365 dstr = decompose_unichar(c, &dsize, dhangul);
366 else
367 dstr = NULL;
368 if (dstr) {
369 if (outlen + dsize > max_unistr_len)
370 break;
371 do {
372 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
373 } while (--dsize > 0);
374 } else
375 ustr->unicode[outlen++] = cpu_to_be16(c);
376
377 astr += size;
378 len -= size;
379 }
380 ustr->length = cpu_to_be16(outlen);
381 if (len > 0)
382 return -ENAMETOOLONG;
383 return 0;
384 }
385
386 /*
387 * Hash a string to an integer as appropriate for the HFS+ filesystem.
388 * Composed unicode characters are decomposed and case-folding is performed
389 * if the appropriate bits are (un)set on the superblock.
390 */
hfsplus_hash_dentry(const struct dentry * dentry,struct qstr * str)391 int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
392 {
393 struct super_block *sb = dentry->d_sb;
394 const char *astr;
395 const u16 *dstr;
396 int casefold, decompose, size, len;
397 unsigned long hash;
398 wchar_t c;
399 u16 c2;
400 u16 dhangul[3];
401
402 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
403 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
404 hash = init_name_hash(dentry);
405 astr = str->name;
406 len = str->len;
407 while (len > 0) {
408 int dsize;
409 size = asc2unichar(sb, astr, len, &c);
410 astr += size;
411 len -= size;
412
413 if (decompose)
414 dstr = decompose_unichar(c, &dsize, dhangul);
415 else
416 dstr = NULL;
417 if (dstr) {
418 do {
419 c2 = *dstr++;
420 if (casefold)
421 c2 = case_fold(c2);
422 if (!casefold || c2)
423 hash = partial_name_hash(c2, hash);
424 } while (--dsize > 0);
425 } else {
426 c2 = c;
427 if (casefold)
428 c2 = case_fold(c2);
429 if (!casefold || c2)
430 hash = partial_name_hash(c2, hash);
431 }
432 }
433 str->hash = end_name_hash(hash);
434
435 return 0;
436 }
437
438 /*
439 * Compare strings with HFS+ filename ordering.
440 * Composed unicode characters are decomposed and case-folding is performed
441 * if the appropriate bits are (un)set on the superblock.
442 */
hfsplus_compare_dentry(const struct dentry * dentry,unsigned int len,const char * str,const struct qstr * name)443 int hfsplus_compare_dentry(const struct dentry *dentry,
444 unsigned int len, const char *str, const struct qstr *name)
445 {
446 struct super_block *sb = dentry->d_sb;
447 int casefold, decompose, size;
448 int dsize1, dsize2, len1, len2;
449 const u16 *dstr1, *dstr2;
450 const char *astr1, *astr2;
451 u16 c1, c2;
452 wchar_t c;
453 u16 dhangul_1[3], dhangul_2[3];
454
455 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
456 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
457 astr1 = str;
458 len1 = len;
459 astr2 = name->name;
460 len2 = name->len;
461 dsize1 = dsize2 = 0;
462 dstr1 = dstr2 = NULL;
463
464 while (len1 > 0 && len2 > 0) {
465 if (!dsize1) {
466 size = asc2unichar(sb, astr1, len1, &c);
467 astr1 += size;
468 len1 -= size;
469
470 if (decompose)
471 dstr1 = decompose_unichar(c, &dsize1,
472 dhangul_1);
473 if (!decompose || !dstr1) {
474 c1 = c;
475 dstr1 = &c1;
476 dsize1 = 1;
477 }
478 }
479
480 if (!dsize2) {
481 size = asc2unichar(sb, astr2, len2, &c);
482 astr2 += size;
483 len2 -= size;
484
485 if (decompose)
486 dstr2 = decompose_unichar(c, &dsize2,
487 dhangul_2);
488 if (!decompose || !dstr2) {
489 c2 = c;
490 dstr2 = &c2;
491 dsize2 = 1;
492 }
493 }
494
495 c1 = *dstr1;
496 c2 = *dstr2;
497 if (casefold) {
498 c1 = case_fold(c1);
499 if (!c1) {
500 dstr1++;
501 dsize1--;
502 continue;
503 }
504 c2 = case_fold(c2);
505 if (!c2) {
506 dstr2++;
507 dsize2--;
508 continue;
509 }
510 }
511 if (c1 < c2)
512 return -1;
513 else if (c1 > c2)
514 return 1;
515
516 dstr1++;
517 dsize1--;
518 dstr2++;
519 dsize2--;
520 }
521
522 if (len1 < len2)
523 return -1;
524 if (len1 > len2)
525 return 1;
526 return 0;
527 }
528