1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * linux/fs/hfsplus/unicode.c
4 *
5 * Copyright (C) 2001
6 * Brad Boyer (flar@allandria.com)
7 * (C) 2003 Ardis Technologies <roman@ardistech.com>
8 *
9 * Handler routines for unicode strings
10 */
11
12 #include <linux/types.h>
13 #include <linux/nls.h>
14
15 #include <kunit/visibility.h>
16
17 #include "hfsplus_fs.h"
18 #include "hfsplus_raw.h"
19
20 /* Fold the case of a unicode char, given the 16 bit value */
21 /* Returns folded char, or 0 if ignorable */
case_fold(u16 c)22 static inline u16 case_fold(u16 c)
23 {
24 u16 tmp;
25
26 tmp = hfsplus_case_fold_table[c >> 8];
27 if (tmp)
28 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
29 else
30 tmp = c;
31 return tmp;
32 }
33
34 /* Compare unicode strings, return values like normal strcmp */
hfsplus_strcasecmp(const struct hfsplus_unistr * s1,const struct hfsplus_unistr * s2)35 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
36 const struct hfsplus_unistr *s2)
37 {
38 u16 len1, len2, c1, c2;
39 const hfsplus_unichr *p1, *p2;
40
41 len1 = be16_to_cpu(s1->length);
42 len2 = be16_to_cpu(s2->length);
43 p1 = s1->unicode;
44 p2 = s2->unicode;
45
46 if (len1 > HFSPLUS_MAX_STRLEN) {
47 len1 = HFSPLUS_MAX_STRLEN;
48 pr_err("invalid length %u has been corrected to %d\n",
49 be16_to_cpu(s1->length), len1);
50 }
51
52 if (len2 > HFSPLUS_MAX_STRLEN) {
53 len2 = HFSPLUS_MAX_STRLEN;
54 pr_err("invalid length %u has been corrected to %d\n",
55 be16_to_cpu(s2->length), len2);
56 }
57
58 while (1) {
59 c1 = c2 = 0;
60
61 while (len1 && !c1) {
62 c1 = case_fold(be16_to_cpu(*p1));
63 p1++;
64 len1--;
65 }
66 while (len2 && !c2) {
67 c2 = case_fold(be16_to_cpu(*p2));
68 p2++;
69 len2--;
70 }
71
72 if (c1 != c2)
73 return (c1 < c2) ? -1 : 1;
74 if (!c1 && !c2)
75 return 0;
76 }
77 }
78 EXPORT_SYMBOL_IF_KUNIT(hfsplus_strcasecmp);
79
80 /* Compare names as a sequence of 16-bit unsigned integers */
hfsplus_strcmp(const struct hfsplus_unistr * s1,const struct hfsplus_unistr * s2)81 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
82 const struct hfsplus_unistr *s2)
83 {
84 u16 len1, len2, c1, c2;
85 const hfsplus_unichr *p1, *p2;
86 int len;
87
88 len1 = be16_to_cpu(s1->length);
89 len2 = be16_to_cpu(s2->length);
90 p1 = s1->unicode;
91 p2 = s2->unicode;
92
93 if (len1 > HFSPLUS_MAX_STRLEN) {
94 len1 = HFSPLUS_MAX_STRLEN;
95 pr_err("invalid length %u has been corrected to %d\n",
96 be16_to_cpu(s1->length), len1);
97 }
98
99 if (len2 > HFSPLUS_MAX_STRLEN) {
100 len2 = HFSPLUS_MAX_STRLEN;
101 pr_err("invalid length %u has been corrected to %d\n",
102 be16_to_cpu(s2->length), len2);
103 }
104
105 for (len = min(len1, len2); len > 0; len--) {
106 c1 = be16_to_cpu(*p1);
107 c2 = be16_to_cpu(*p2);
108 if (c1 != c2)
109 return c1 < c2 ? -1 : 1;
110 p1++;
111 p2++;
112 }
113
114 return len1 < len2 ? -1 :
115 len1 > len2 ? 1 : 0;
116 }
117 EXPORT_SYMBOL_IF_KUNIT(hfsplus_strcmp);
118
119 #define Hangul_SBase 0xac00
120 #define Hangul_LBase 0x1100
121 #define Hangul_VBase 0x1161
122 #define Hangul_TBase 0x11a7
123 #define Hangul_SCount 11172
124 #define Hangul_LCount 19
125 #define Hangul_VCount 21
126 #define Hangul_TCount 28
127 #define Hangul_NCount (Hangul_VCount * Hangul_TCount)
128
129
hfsplus_compose_lookup(u16 * p,u16 cc)130 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
131 {
132 int i, s, e;
133
134 s = 1;
135 e = p[1];
136 if (!e || cc < p[s * 2] || cc > p[e * 2])
137 return NULL;
138 do {
139 i = (s + e) / 2;
140 if (cc > p[i * 2])
141 s = i + 1;
142 else if (cc < p[i * 2])
143 e = i - 1;
144 else
145 return hfsplus_compose_table + p[i * 2 + 1];
146 } while (s <= e);
147 return NULL;
148 }
149
150 /*
151 * In HFS+, a filename can contain / because : is the separator.
152 * The slash is a valid filename character on macOS.
153 * But on Linux, / is the path separator and
154 * it cannot appear in a filename component.
155 * There's a parallel mapping for the NUL character (0 -> U+2400).
156 * NUL terminates strings in C/POSIX but is valid in HFS+ filenames.
157 */
158 static inline
hfsplus_mac2linux_compatibility_check(u16 symbol,u16 * conversion,int name_type)159 void hfsplus_mac2linux_compatibility_check(u16 symbol, u16 *conversion,
160 int name_type)
161 {
162 *conversion = symbol;
163
164 switch (name_type) {
165 case HFS_XATTR_NAME:
166 /* ignore conversion */
167 return;
168
169 default:
170 /* continue logic */
171 break;
172 }
173
174 switch (symbol) {
175 case 0:
176 *conversion = 0x2400;
177 break;
178 case '/':
179 *conversion = ':';
180 break;
181 }
182 }
183
hfsplus_uni2asc(struct super_block * sb,const struct hfsplus_unistr * ustr,int max_len,char * astr,int * len_p,int name_type)184 static int hfsplus_uni2asc(struct super_block *sb,
185 const struct hfsplus_unistr *ustr,
186 int max_len, char *astr, int *len_p,
187 int name_type)
188 {
189 const hfsplus_unichr *ip;
190 struct nls_table *nls = HFSPLUS_SB(sb)->nls;
191 u8 *op;
192 u16 cc, c0, c1;
193 u16 *ce1, *ce2;
194 int i, len, ustrlen, res, compose;
195
196 op = astr;
197 ip = ustr->unicode;
198
199 ustrlen = be16_to_cpu(ustr->length);
200 if (ustrlen > max_len) {
201 ustrlen = max_len;
202 pr_err("invalid length %u has been corrected to %d\n",
203 be16_to_cpu(ustr->length), ustrlen);
204 }
205
206 len = *len_p;
207 ce1 = NULL;
208 compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
209
210 while (ustrlen > 0) {
211 c0 = be16_to_cpu(*ip++);
212 ustrlen--;
213 /* search for single decomposed char */
214 if (likely(compose))
215 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
216 if (ce1)
217 cc = ce1[0];
218 else
219 cc = 0;
220 if (cc) {
221 /* start of a possibly decomposed Hangul char */
222 if (cc != 0xffff)
223 goto done;
224 if (!ustrlen)
225 goto same;
226 c1 = be16_to_cpu(*ip) - Hangul_VBase;
227 if (c1 < Hangul_VCount) {
228 /* compose the Hangul char */
229 cc = (c0 - Hangul_LBase) * Hangul_VCount;
230 cc = (cc + c1) * Hangul_TCount;
231 cc += Hangul_SBase;
232 ip++;
233 ustrlen--;
234 if (!ustrlen)
235 goto done;
236 c1 = be16_to_cpu(*ip) - Hangul_TBase;
237 if (c1 > 0 && c1 < Hangul_TCount) {
238 cc += c1;
239 ip++;
240 ustrlen--;
241 }
242 goto done;
243 }
244 }
245 while (1) {
246 /* main loop for common case of not composed chars */
247 if (!ustrlen)
248 goto same;
249 c1 = be16_to_cpu(*ip);
250 if (likely(compose))
251 ce1 = hfsplus_compose_lookup(
252 hfsplus_compose_table, c1);
253 if (ce1)
254 break;
255 hfsplus_mac2linux_compatibility_check(c0, &c0,
256 name_type);
257 res = nls->uni2char(c0, op, len);
258 if (res < 0) {
259 if (res == -ENAMETOOLONG)
260 goto out;
261 *op = '?';
262 res = 1;
263 }
264 op += res;
265 len -= res;
266 c0 = c1;
267 ip++;
268 ustrlen--;
269 }
270 ce2 = hfsplus_compose_lookup(ce1, c0);
271 if (ce2) {
272 i = 1;
273 while (i < ustrlen) {
274 ce1 = hfsplus_compose_lookup(ce2,
275 be16_to_cpu(ip[i]));
276 if (!ce1)
277 break;
278 i++;
279 ce2 = ce1;
280 }
281 cc = ce2[0];
282 if (cc) {
283 ip += i;
284 ustrlen -= i;
285 goto done;
286 }
287 }
288 same:
289 hfsplus_mac2linux_compatibility_check(c0, &cc,
290 name_type);
291 done:
292 res = nls->uni2char(cc, op, len);
293 if (res < 0) {
294 if (res == -ENAMETOOLONG)
295 goto out;
296 *op = '?';
297 res = 1;
298 }
299 op += res;
300 len -= res;
301 }
302 res = 0;
303 out:
304 *len_p = (char *)op - astr;
305 return res;
306 }
307
hfsplus_uni2asc_str(struct super_block * sb,const struct hfsplus_unistr * ustr,char * astr,int * len_p)308 inline int hfsplus_uni2asc_str(struct super_block *sb,
309 const struct hfsplus_unistr *ustr, char *astr,
310 int *len_p)
311 {
312 return hfsplus_uni2asc(sb,
313 ustr, HFSPLUS_MAX_STRLEN,
314 astr, len_p,
315 HFS_REGULAR_NAME);
316 }
317 EXPORT_SYMBOL_IF_KUNIT(hfsplus_uni2asc_str);
318
hfsplus_uni2asc_xattr_str(struct super_block * sb,const struct hfsplus_attr_unistr * ustr,char * astr,int * len_p)319 inline int hfsplus_uni2asc_xattr_str(struct super_block *sb,
320 const struct hfsplus_attr_unistr *ustr,
321 char *astr, int *len_p)
322 {
323 return hfsplus_uni2asc(sb, (const struct hfsplus_unistr *)ustr,
324 HFSPLUS_ATTR_MAX_STRLEN, astr, len_p,
325 HFS_XATTR_NAME);
326 }
327 EXPORT_SYMBOL_IF_KUNIT(hfsplus_uni2asc_xattr_str);
328
329 /*
330 * In HFS+, a filename can contain / because : is the separator.
331 * The slash is a valid filename character on macOS.
332 * But on Linux, / is the path separator and
333 * it cannot appear in a filename component.
334 * There's a parallel mapping for the NUL character (0 -> U+2400).
335 * NUL terminates strings in C/POSIX but is valid in HFS+ filenames.
336 */
337 static inline
hfsplus_linux2mac_compatibility_check(wchar_t * uc,int name_type)338 void hfsplus_linux2mac_compatibility_check(wchar_t *uc, int name_type)
339 {
340 switch (name_type) {
341 case HFS_XATTR_NAME:
342 /* ignore conversion */
343 return;
344
345 default:
346 /* continue logic */
347 break;
348 }
349
350 switch (*uc) {
351 case 0x2400:
352 *uc = 0;
353 break;
354 case ':':
355 *uc = '/';
356 break;
357 }
358 }
359
360 /*
361 * Convert one or more ASCII characters into a single unicode character.
362 * Returns the number of ASCII characters corresponding to the unicode char.
363 */
asc2unichar(struct super_block * sb,const char * astr,int len,wchar_t * uc,int name_type)364 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
365 wchar_t *uc, int name_type)
366 {
367 int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
368
369 if (size <= 0) {
370 *uc = '?';
371 size = 1;
372 }
373
374 hfsplus_linux2mac_compatibility_check(uc, name_type);
375 return size;
376 }
377
378 /* Decomposes a non-Hangul unicode character. */
hfsplus_decompose_nonhangul(wchar_t uc,int * size)379 static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
380 {
381 int off;
382
383 off = hfsplus_decompose_table[(uc >> 12) & 0xf];
384 if (off == 0 || off == 0xffff)
385 return NULL;
386
387 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
388 if (!off)
389 return NULL;
390
391 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
392 if (!off)
393 return NULL;
394
395 off = hfsplus_decompose_table[off + (uc & 0xf)];
396 *size = off & 3;
397 if (*size == 0)
398 return NULL;
399 return hfsplus_decompose_table + (off / 4);
400 }
401
402 /*
403 * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
404 * precomposed Hangul, otherwise return the length of the decomposition.
405 *
406 * This function was adapted from sample code from the Unicode Standard
407 * Annex #15: Unicode Normalization Forms, version 3.2.0.
408 *
409 * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed
410 * under the Terms of Use in http://www.unicode.org/copyright.html.
411 */
hfsplus_try_decompose_hangul(wchar_t uc,u16 * result)412 static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
413 {
414 int index;
415 int l, v, t;
416
417 index = uc - Hangul_SBase;
418 if (index < 0 || index >= Hangul_SCount)
419 return 0;
420
421 l = Hangul_LBase + index / Hangul_NCount;
422 v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
423 t = Hangul_TBase + index % Hangul_TCount;
424
425 result[0] = l;
426 result[1] = v;
427 if (t != Hangul_TBase) {
428 result[2] = t;
429 return 3;
430 }
431 return 2;
432 }
433
434 /* Decomposes a single unicode character. */
decompose_unichar(wchar_t uc,int * size,u16 * hangul_buffer)435 static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
436 {
437 u16 *result;
438
439 /* Hangul is handled separately */
440 result = hangul_buffer;
441 *size = hfsplus_try_decompose_hangul(uc, result);
442 if (*size == 0)
443 result = hfsplus_decompose_nonhangul(uc, size);
444 return result;
445 }
446
hfsplus_asc2uni(struct super_block * sb,struct hfsplus_unistr * ustr,int max_unistr_len,const char * astr,int len,int name_type)447 int hfsplus_asc2uni(struct super_block *sb,
448 struct hfsplus_unistr *ustr, int max_unistr_len,
449 const char *astr, int len, int name_type)
450 {
451 int size, dsize, decompose;
452 u16 *dstr, outlen = 0;
453 wchar_t c;
454 u16 dhangul[3];
455
456 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
457 while (outlen < max_unistr_len && len > 0) {
458 size = asc2unichar(sb, astr, len, &c, name_type);
459
460 if (decompose)
461 dstr = decompose_unichar(c, &dsize, dhangul);
462 else
463 dstr = NULL;
464 if (dstr) {
465 if (outlen + dsize > max_unistr_len)
466 break;
467 do {
468 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
469 } while (--dsize > 0);
470 } else
471 ustr->unicode[outlen++] = cpu_to_be16(c);
472
473 astr += size;
474 len -= size;
475 }
476 ustr->length = cpu_to_be16(outlen);
477 if (len > 0)
478 return -ENAMETOOLONG;
479 return 0;
480 }
481 EXPORT_SYMBOL_IF_KUNIT(hfsplus_asc2uni);
482
483 /*
484 * Hash a string to an integer as appropriate for the HFS+ filesystem.
485 * Composed unicode characters are decomposed and case-folding is performed
486 * if the appropriate bits are (un)set on the superblock.
487 */
hfsplus_hash_dentry(const struct dentry * dentry,struct qstr * str)488 int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
489 {
490 struct super_block *sb = dentry->d_sb;
491 const char *astr;
492 const u16 *dstr;
493 int casefold, decompose, size, len;
494 unsigned long hash;
495 wchar_t c;
496 u16 c2;
497 u16 dhangul[3];
498
499 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
500 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
501 hash = init_name_hash(dentry);
502 astr = str->name;
503 len = str->len;
504 while (len > 0) {
505 int dsize;
506 size = asc2unichar(sb, astr, len, &c, HFS_REGULAR_NAME);
507 astr += size;
508 len -= size;
509
510 if (decompose)
511 dstr = decompose_unichar(c, &dsize, dhangul);
512 else
513 dstr = NULL;
514 if (dstr) {
515 do {
516 c2 = *dstr++;
517 if (casefold)
518 c2 = case_fold(c2);
519 if (!casefold || c2)
520 hash = partial_name_hash(c2, hash);
521 } while (--dsize > 0);
522 } else {
523 c2 = c;
524 if (casefold)
525 c2 = case_fold(c2);
526 if (!casefold || c2)
527 hash = partial_name_hash(c2, hash);
528 }
529 }
530 str->hash = end_name_hash(hash);
531
532 return 0;
533 }
534 EXPORT_SYMBOL_IF_KUNIT(hfsplus_hash_dentry);
535
536 /*
537 * Compare strings with HFS+ filename ordering.
538 * Composed unicode characters are decomposed and case-folding is performed
539 * if the appropriate bits are (un)set on the superblock.
540 */
hfsplus_compare_dentry(const struct dentry * dentry,unsigned int len,const char * str,const struct qstr * name)541 int hfsplus_compare_dentry(const struct dentry *dentry,
542 unsigned int len, const char *str, const struct qstr *name)
543 {
544 struct super_block *sb = dentry->d_sb;
545 int casefold, decompose, size;
546 int dsize1, dsize2, len1, len2;
547 const u16 *dstr1, *dstr2;
548 const char *astr1, *astr2;
549 u16 c1, c2;
550 wchar_t c;
551 u16 dhangul_1[3], dhangul_2[3];
552
553 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
554 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
555 astr1 = str;
556 len1 = len;
557 astr2 = name->name;
558 len2 = name->len;
559 dsize1 = dsize2 = 0;
560 dstr1 = dstr2 = NULL;
561
562 while (len1 > 0 && len2 > 0) {
563 if (!dsize1) {
564 size = asc2unichar(sb, astr1, len1, &c,
565 HFS_REGULAR_NAME);
566 astr1 += size;
567 len1 -= size;
568
569 if (decompose)
570 dstr1 = decompose_unichar(c, &dsize1,
571 dhangul_1);
572 if (!decompose || !dstr1) {
573 c1 = c;
574 dstr1 = &c1;
575 dsize1 = 1;
576 }
577 }
578
579 if (!dsize2) {
580 size = asc2unichar(sb, astr2, len2, &c,
581 HFS_REGULAR_NAME);
582 astr2 += size;
583 len2 -= size;
584
585 if (decompose)
586 dstr2 = decompose_unichar(c, &dsize2,
587 dhangul_2);
588 if (!decompose || !dstr2) {
589 c2 = c;
590 dstr2 = &c2;
591 dsize2 = 1;
592 }
593 }
594
595 c1 = *dstr1;
596 c2 = *dstr2;
597 if (casefold) {
598 c1 = case_fold(c1);
599 if (!c1) {
600 dstr1++;
601 dsize1--;
602 continue;
603 }
604 c2 = case_fold(c2);
605 if (!c2) {
606 dstr2++;
607 dsize2--;
608 continue;
609 }
610 }
611 if (c1 < c2)
612 return -1;
613 else if (c1 > c2)
614 return 1;
615
616 dstr1++;
617 dsize1--;
618 dstr2++;
619 dsize2--;
620 }
621
622 if (len1 < len2)
623 return -1;
624 if (len1 > len2)
625 return 1;
626 return 0;
627 }
628 EXPORT_SYMBOL_IF_KUNIT(hfsplus_compare_dentry);
629