xref: /linux/fs/hfsplus/unicode.c (revision cb6bbff7e6fb263dd739514b3f5dfdcd8eaa9836)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/hfsplus/unicode.c
4  *
5  * Copyright (C) 2001
6  * Brad Boyer (flar@allandria.com)
7  * (C) 2003 Ardis Technologies <roman@ardistech.com>
8  *
9  * Handler routines for unicode strings
10  */
11 
12 #include <linux/types.h>
13 #include <linux/nls.h>
14 #include "hfsplus_fs.h"
15 #include "hfsplus_raw.h"
16 
17 /* Fold the case of a unicode char, given the 16 bit value */
18 /* Returns folded char, or 0 if ignorable */
case_fold(u16 c)19 static inline u16 case_fold(u16 c)
20 {
21 	u16 tmp;
22 
23 	tmp = hfsplus_case_fold_table[c >> 8];
24 	if (tmp)
25 		tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
26 	else
27 		tmp = c;
28 	return tmp;
29 }
30 
31 /* Compare unicode strings, return values like normal strcmp */
hfsplus_strcasecmp(const struct hfsplus_unistr * s1,const struct hfsplus_unistr * s2)32 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
33 		       const struct hfsplus_unistr *s2)
34 {
35 	u16 len1, len2, c1, c2;
36 	const hfsplus_unichr *p1, *p2;
37 
38 	len1 = be16_to_cpu(s1->length);
39 	len2 = be16_to_cpu(s2->length);
40 	p1 = s1->unicode;
41 	p2 = s2->unicode;
42 
43 	while (1) {
44 		c1 = c2 = 0;
45 
46 		while (len1 && !c1) {
47 			c1 = case_fold(be16_to_cpu(*p1));
48 			p1++;
49 			len1--;
50 		}
51 		while (len2 && !c2) {
52 			c2 = case_fold(be16_to_cpu(*p2));
53 			p2++;
54 			len2--;
55 		}
56 
57 		if (c1 != c2)
58 			return (c1 < c2) ? -1 : 1;
59 		if (!c1 && !c2)
60 			return 0;
61 	}
62 }
63 
64 /* Compare names as a sequence of 16-bit unsigned integers */
hfsplus_strcmp(const struct hfsplus_unistr * s1,const struct hfsplus_unistr * s2)65 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
66 		   const struct hfsplus_unistr *s2)
67 {
68 	u16 len1, len2, c1, c2;
69 	const hfsplus_unichr *p1, *p2;
70 	int len;
71 
72 	len1 = be16_to_cpu(s1->length);
73 	len2 = be16_to_cpu(s2->length);
74 	p1 = s1->unicode;
75 	p2 = s2->unicode;
76 
77 	for (len = min(len1, len2); len > 0; len--) {
78 		c1 = be16_to_cpu(*p1);
79 		c2 = be16_to_cpu(*p2);
80 		if (c1 != c2)
81 			return c1 < c2 ? -1 : 1;
82 		p1++;
83 		p2++;
84 	}
85 
86 	return len1 < len2 ? -1 :
87 	       len1 > len2 ? 1 : 0;
88 }
89 
90 
91 #define Hangul_SBase	0xac00
92 #define Hangul_LBase	0x1100
93 #define Hangul_VBase	0x1161
94 #define Hangul_TBase	0x11a7
95 #define Hangul_SCount	11172
96 #define Hangul_LCount	19
97 #define Hangul_VCount	21
98 #define Hangul_TCount	28
99 #define Hangul_NCount	(Hangul_VCount * Hangul_TCount)
100 
101 
hfsplus_compose_lookup(u16 * p,u16 cc)102 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
103 {
104 	int i, s, e;
105 
106 	s = 1;
107 	e = p[1];
108 	if (!e || cc < p[s * 2] || cc > p[e * 2])
109 		return NULL;
110 	do {
111 		i = (s + e) / 2;
112 		if (cc > p[i * 2])
113 			s = i + 1;
114 		else if (cc < p[i * 2])
115 			e = i - 1;
116 		else
117 			return hfsplus_compose_table + p[i * 2 + 1];
118 	} while (s <= e);
119 	return NULL;
120 }
121 
hfsplus_uni2asc(struct super_block * sb,const struct hfsplus_unistr * ustr,char * astr,int * len_p)122 int hfsplus_uni2asc(struct super_block *sb,
123 		const struct hfsplus_unistr *ustr,
124 		char *astr, int *len_p)
125 {
126 	const hfsplus_unichr *ip;
127 	struct nls_table *nls = HFSPLUS_SB(sb)->nls;
128 	u8 *op;
129 	u16 cc, c0, c1;
130 	u16 *ce1, *ce2;
131 	int i, len, ustrlen, res, compose;
132 
133 	op = astr;
134 	ip = ustr->unicode;
135 
136 	ustrlen = be16_to_cpu(ustr->length);
137 	if (ustrlen > HFSPLUS_MAX_STRLEN) {
138 		ustrlen = HFSPLUS_MAX_STRLEN;
139 		pr_err("invalid length %u has been corrected to %d\n",
140 			be16_to_cpu(ustr->length), ustrlen);
141 	}
142 
143 	len = *len_p;
144 	ce1 = NULL;
145 	compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
146 
147 	while (ustrlen > 0) {
148 		c0 = be16_to_cpu(*ip++);
149 		ustrlen--;
150 		/* search for single decomposed char */
151 		if (likely(compose))
152 			ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
153 		if (ce1)
154 			cc = ce1[0];
155 		else
156 			cc = 0;
157 		if (cc) {
158 			/* start of a possibly decomposed Hangul char */
159 			if (cc != 0xffff)
160 				goto done;
161 			if (!ustrlen)
162 				goto same;
163 			c1 = be16_to_cpu(*ip) - Hangul_VBase;
164 			if (c1 < Hangul_VCount) {
165 				/* compose the Hangul char */
166 				cc = (c0 - Hangul_LBase) * Hangul_VCount;
167 				cc = (cc + c1) * Hangul_TCount;
168 				cc += Hangul_SBase;
169 				ip++;
170 				ustrlen--;
171 				if (!ustrlen)
172 					goto done;
173 				c1 = be16_to_cpu(*ip) - Hangul_TBase;
174 				if (c1 > 0 && c1 < Hangul_TCount) {
175 					cc += c1;
176 					ip++;
177 					ustrlen--;
178 				}
179 				goto done;
180 			}
181 		}
182 		while (1) {
183 			/* main loop for common case of not composed chars */
184 			if (!ustrlen)
185 				goto same;
186 			c1 = be16_to_cpu(*ip);
187 			if (likely(compose))
188 				ce1 = hfsplus_compose_lookup(
189 					hfsplus_compose_table, c1);
190 			if (ce1)
191 				break;
192 			switch (c0) {
193 			case 0:
194 				c0 = 0x2400;
195 				break;
196 			case '/':
197 				c0 = ':';
198 				break;
199 			}
200 			res = nls->uni2char(c0, op, len);
201 			if (res < 0) {
202 				if (res == -ENAMETOOLONG)
203 					goto out;
204 				*op = '?';
205 				res = 1;
206 			}
207 			op += res;
208 			len -= res;
209 			c0 = c1;
210 			ip++;
211 			ustrlen--;
212 		}
213 		ce2 = hfsplus_compose_lookup(ce1, c0);
214 		if (ce2) {
215 			i = 1;
216 			while (i < ustrlen) {
217 				ce1 = hfsplus_compose_lookup(ce2,
218 					be16_to_cpu(ip[i]));
219 				if (!ce1)
220 					break;
221 				i++;
222 				ce2 = ce1;
223 			}
224 			cc = ce2[0];
225 			if (cc) {
226 				ip += i;
227 				ustrlen -= i;
228 				goto done;
229 			}
230 		}
231 same:
232 		switch (c0) {
233 		case 0:
234 			cc = 0x2400;
235 			break;
236 		case '/':
237 			cc = ':';
238 			break;
239 		default:
240 			cc = c0;
241 		}
242 done:
243 		res = nls->uni2char(cc, op, len);
244 		if (res < 0) {
245 			if (res == -ENAMETOOLONG)
246 				goto out;
247 			*op = '?';
248 			res = 1;
249 		}
250 		op += res;
251 		len -= res;
252 	}
253 	res = 0;
254 out:
255 	*len_p = (char *)op - astr;
256 	return res;
257 }
258 
259 /*
260  * Convert one or more ASCII characters into a single unicode character.
261  * Returns the number of ASCII characters corresponding to the unicode char.
262  */
asc2unichar(struct super_block * sb,const char * astr,int len,wchar_t * uc)263 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
264 			      wchar_t *uc)
265 {
266 	int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
267 	if (size <= 0) {
268 		*uc = '?';
269 		size = 1;
270 	}
271 	switch (*uc) {
272 	case 0x2400:
273 		*uc = 0;
274 		break;
275 	case ':':
276 		*uc = '/';
277 		break;
278 	}
279 	return size;
280 }
281 
282 /* Decomposes a non-Hangul unicode character. */
hfsplus_decompose_nonhangul(wchar_t uc,int * size)283 static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
284 {
285 	int off;
286 
287 	off = hfsplus_decompose_table[(uc >> 12) & 0xf];
288 	if (off == 0 || off == 0xffff)
289 		return NULL;
290 
291 	off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
292 	if (!off)
293 		return NULL;
294 
295 	off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
296 	if (!off)
297 		return NULL;
298 
299 	off = hfsplus_decompose_table[off + (uc & 0xf)];
300 	*size = off & 3;
301 	if (*size == 0)
302 		return NULL;
303 	return hfsplus_decompose_table + (off / 4);
304 }
305 
306 /*
307  * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
308  * precomposed Hangul, otherwise return the length of the decomposition.
309  *
310  * This function was adapted from sample code from the Unicode Standard
311  * Annex #15: Unicode Normalization Forms, version 3.2.0.
312  *
313  * Copyright (C) 1991-2018 Unicode, Inc.  All rights reserved.  Distributed
314  * under the Terms of Use in http://www.unicode.org/copyright.html.
315  */
hfsplus_try_decompose_hangul(wchar_t uc,u16 * result)316 static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
317 {
318 	int index;
319 	int l, v, t;
320 
321 	index = uc - Hangul_SBase;
322 	if (index < 0 || index >= Hangul_SCount)
323 		return 0;
324 
325 	l = Hangul_LBase + index / Hangul_NCount;
326 	v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
327 	t = Hangul_TBase + index % Hangul_TCount;
328 
329 	result[0] = l;
330 	result[1] = v;
331 	if (t != Hangul_TBase) {
332 		result[2] = t;
333 		return 3;
334 	}
335 	return 2;
336 }
337 
338 /* Decomposes a single unicode character. */
decompose_unichar(wchar_t uc,int * size,u16 * hangul_buffer)339 static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
340 {
341 	u16 *result;
342 
343 	/* Hangul is handled separately */
344 	result = hangul_buffer;
345 	*size = hfsplus_try_decompose_hangul(uc, result);
346 	if (*size == 0)
347 		result = hfsplus_decompose_nonhangul(uc, size);
348 	return result;
349 }
350 
hfsplus_asc2uni(struct super_block * sb,struct hfsplus_unistr * ustr,int max_unistr_len,const char * astr,int len)351 int hfsplus_asc2uni(struct super_block *sb,
352 		    struct hfsplus_unistr *ustr, int max_unistr_len,
353 		    const char *astr, int len)
354 {
355 	int size, dsize, decompose;
356 	u16 *dstr, outlen = 0;
357 	wchar_t c;
358 	u16 dhangul[3];
359 
360 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
361 	while (outlen < max_unistr_len && len > 0) {
362 		size = asc2unichar(sb, astr, len, &c);
363 
364 		if (decompose)
365 			dstr = decompose_unichar(c, &dsize, dhangul);
366 		else
367 			dstr = NULL;
368 		if (dstr) {
369 			if (outlen + dsize > max_unistr_len)
370 				break;
371 			do {
372 				ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
373 			} while (--dsize > 0);
374 		} else
375 			ustr->unicode[outlen++] = cpu_to_be16(c);
376 
377 		astr += size;
378 		len -= size;
379 	}
380 	ustr->length = cpu_to_be16(outlen);
381 	if (len > 0)
382 		return -ENAMETOOLONG;
383 	return 0;
384 }
385 
386 /*
387  * Hash a string to an integer as appropriate for the HFS+ filesystem.
388  * Composed unicode characters are decomposed and case-folding is performed
389  * if the appropriate bits are (un)set on the superblock.
390  */
hfsplus_hash_dentry(const struct dentry * dentry,struct qstr * str)391 int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
392 {
393 	struct super_block *sb = dentry->d_sb;
394 	const char *astr;
395 	const u16 *dstr;
396 	int casefold, decompose, size, len;
397 	unsigned long hash;
398 	wchar_t c;
399 	u16 c2;
400 	u16 dhangul[3];
401 
402 	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
403 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
404 	hash = init_name_hash(dentry);
405 	astr = str->name;
406 	len = str->len;
407 	while (len > 0) {
408 		int dsize;
409 		size = asc2unichar(sb, astr, len, &c);
410 		astr += size;
411 		len -= size;
412 
413 		if (decompose)
414 			dstr = decompose_unichar(c, &dsize, dhangul);
415 		else
416 			dstr = NULL;
417 		if (dstr) {
418 			do {
419 				c2 = *dstr++;
420 				if (casefold)
421 					c2 = case_fold(c2);
422 				if (!casefold || c2)
423 					hash = partial_name_hash(c2, hash);
424 			} while (--dsize > 0);
425 		} else {
426 			c2 = c;
427 			if (casefold)
428 				c2 = case_fold(c2);
429 			if (!casefold || c2)
430 				hash = partial_name_hash(c2, hash);
431 		}
432 	}
433 	str->hash = end_name_hash(hash);
434 
435 	return 0;
436 }
437 
438 /*
439  * Compare strings with HFS+ filename ordering.
440  * Composed unicode characters are decomposed and case-folding is performed
441  * if the appropriate bits are (un)set on the superblock.
442  */
hfsplus_compare_dentry(const struct dentry * dentry,unsigned int len,const char * str,const struct qstr * name)443 int hfsplus_compare_dentry(const struct dentry *dentry,
444 		unsigned int len, const char *str, const struct qstr *name)
445 {
446 	struct super_block *sb = dentry->d_sb;
447 	int casefold, decompose, size;
448 	int dsize1, dsize2, len1, len2;
449 	const u16 *dstr1, *dstr2;
450 	const char *astr1, *astr2;
451 	u16 c1, c2;
452 	wchar_t c;
453 	u16 dhangul_1[3], dhangul_2[3];
454 
455 	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
456 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
457 	astr1 = str;
458 	len1 = len;
459 	astr2 = name->name;
460 	len2 = name->len;
461 	dsize1 = dsize2 = 0;
462 	dstr1 = dstr2 = NULL;
463 
464 	while (len1 > 0 && len2 > 0) {
465 		if (!dsize1) {
466 			size = asc2unichar(sb, astr1, len1, &c);
467 			astr1 += size;
468 			len1 -= size;
469 
470 			if (decompose)
471 				dstr1 = decompose_unichar(c, &dsize1,
472 							  dhangul_1);
473 			if (!decompose || !dstr1) {
474 				c1 = c;
475 				dstr1 = &c1;
476 				dsize1 = 1;
477 			}
478 		}
479 
480 		if (!dsize2) {
481 			size = asc2unichar(sb, astr2, len2, &c);
482 			astr2 += size;
483 			len2 -= size;
484 
485 			if (decompose)
486 				dstr2 = decompose_unichar(c, &dsize2,
487 							  dhangul_2);
488 			if (!decompose || !dstr2) {
489 				c2 = c;
490 				dstr2 = &c2;
491 				dsize2 = 1;
492 			}
493 		}
494 
495 		c1 = *dstr1;
496 		c2 = *dstr2;
497 		if (casefold) {
498 			c1 = case_fold(c1);
499 			if (!c1) {
500 				dstr1++;
501 				dsize1--;
502 				continue;
503 			}
504 			c2 = case_fold(c2);
505 			if (!c2) {
506 				dstr2++;
507 				dsize2--;
508 				continue;
509 			}
510 		}
511 		if (c1 < c2)
512 			return -1;
513 		else if (c1 > c2)
514 			return 1;
515 
516 		dstr1++;
517 		dsize1--;
518 		dstr2++;
519 		dsize2--;
520 	}
521 
522 	if (len1 < len2)
523 		return -1;
524 	if (len1 > len2)
525 		return 1;
526 	return 0;
527 }
528