xref: /linux/lib/xz/xz_dec_bcj.c (revision e5451c8f8330e03ad3cfa16048b4daf961af434f)
124fa0402SLasse Collin /*
224fa0402SLasse Collin  * Branch/Call/Jump (BCJ) filter decoders
324fa0402SLasse Collin  *
424fa0402SLasse Collin  * Authors: Lasse Collin <lasse.collin@tukaani.org>
524fa0402SLasse Collin  *          Igor Pavlov <http://7-zip.org/>
624fa0402SLasse Collin  *
724fa0402SLasse Collin  * This file has been put into the public domain.
824fa0402SLasse Collin  * You can do whatever you want with this file.
924fa0402SLasse Collin  */
1024fa0402SLasse Collin 
1124fa0402SLasse Collin #include "xz_private.h"
1224fa0402SLasse Collin 
1324fa0402SLasse Collin /*
1424fa0402SLasse Collin  * The rest of the file is inside this ifdef. It makes things a little more
1524fa0402SLasse Collin  * convenient when building without support for any BCJ filters.
1624fa0402SLasse Collin  */
1724fa0402SLasse Collin #ifdef XZ_DEC_BCJ
1824fa0402SLasse Collin 
1924fa0402SLasse Collin struct xz_dec_bcj {
2024fa0402SLasse Collin 	/* Type of the BCJ filter being used */
2124fa0402SLasse Collin 	enum {
2224fa0402SLasse Collin 		BCJ_X86 = 4,        /* x86 or x86-64 */
2324fa0402SLasse Collin 		BCJ_POWERPC = 5,    /* Big endian only */
2424fa0402SLasse Collin 		BCJ_IA64 = 6,       /* Big or little endian */
2524fa0402SLasse Collin 		BCJ_ARM = 7,        /* Little endian only */
2624fa0402SLasse Collin 		BCJ_ARMTHUMB = 8,   /* Little endian only */
2724fa0402SLasse Collin 		BCJ_SPARC = 9       /* Big or little endian */
2824fa0402SLasse Collin 	} type;
2924fa0402SLasse Collin 
3024fa0402SLasse Collin 	/*
3124fa0402SLasse Collin 	 * Return value of the next filter in the chain. We need to preserve
3224fa0402SLasse Collin 	 * this information across calls, because we must not call the next
3324fa0402SLasse Collin 	 * filter anymore once it has returned XZ_STREAM_END.
3424fa0402SLasse Collin 	 */
3524fa0402SLasse Collin 	enum xz_ret ret;
3624fa0402SLasse Collin 
3724fa0402SLasse Collin 	/* True if we are operating in single-call mode. */
3824fa0402SLasse Collin 	bool single_call;
3924fa0402SLasse Collin 
4024fa0402SLasse Collin 	/*
4124fa0402SLasse Collin 	 * Absolute position relative to the beginning of the uncompressed
4224fa0402SLasse Collin 	 * data (in a single .xz Block). We care only about the lowest 32
4324fa0402SLasse Collin 	 * bits so this doesn't need to be uint64_t even with big files.
4424fa0402SLasse Collin 	 */
4524fa0402SLasse Collin 	uint32_t pos;
4624fa0402SLasse Collin 
4724fa0402SLasse Collin 	/* x86 filter state */
4824fa0402SLasse Collin 	uint32_t x86_prev_mask;
4924fa0402SLasse Collin 
5024fa0402SLasse Collin 	/* Temporary space to hold the variables from struct xz_buf */
5124fa0402SLasse Collin 	uint8_t *out;
5224fa0402SLasse Collin 	size_t out_pos;
5324fa0402SLasse Collin 	size_t out_size;
5424fa0402SLasse Collin 
5524fa0402SLasse Collin 	struct {
5624fa0402SLasse Collin 		/* Amount of already filtered data in the beginning of buf */
5724fa0402SLasse Collin 		size_t filtered;
5824fa0402SLasse Collin 
5924fa0402SLasse Collin 		/* Total amount of data currently stored in buf  */
6024fa0402SLasse Collin 		size_t size;
6124fa0402SLasse Collin 
6224fa0402SLasse Collin 		/*
6324fa0402SLasse Collin 		 * Buffer to hold a mix of filtered and unfiltered data. This
6424fa0402SLasse Collin 		 * needs to be big enough to hold Alignment + 2 * Look-ahead:
6524fa0402SLasse Collin 		 *
6624fa0402SLasse Collin 		 * Type         Alignment   Look-ahead
6724fa0402SLasse Collin 		 * x86              1           4
6824fa0402SLasse Collin 		 * PowerPC          4           0
6924fa0402SLasse Collin 		 * IA-64           16           0
7024fa0402SLasse Collin 		 * ARM              4           0
7124fa0402SLasse Collin 		 * ARM-Thumb        2           2
7224fa0402SLasse Collin 		 * SPARC            4           0
7324fa0402SLasse Collin 		 */
7424fa0402SLasse Collin 		uint8_t buf[16];
7524fa0402SLasse Collin 	} temp;
7624fa0402SLasse Collin };
7724fa0402SLasse Collin 
7824fa0402SLasse Collin #ifdef XZ_DEC_X86
7924fa0402SLasse Collin /*
8024fa0402SLasse Collin  * This is used to test the most significant byte of a memory address
8124fa0402SLasse Collin  * in an x86 instruction.
8224fa0402SLasse Collin  */
8324fa0402SLasse Collin static inline int bcj_x86_test_msbyte(uint8_t b)
8424fa0402SLasse Collin {
8524fa0402SLasse Collin 	return b == 0x00 || b == 0xFF;
8624fa0402SLasse Collin }
8724fa0402SLasse Collin 
8824fa0402SLasse Collin static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
8924fa0402SLasse Collin {
9024fa0402SLasse Collin 	static const bool mask_to_allowed_status[8]
9124fa0402SLasse Collin 		= { true, true, true, false, true, false, false, false };
9224fa0402SLasse Collin 
9324fa0402SLasse Collin 	static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
9424fa0402SLasse Collin 
9524fa0402SLasse Collin 	size_t i;
9624fa0402SLasse Collin 	size_t prev_pos = (size_t)-1;
9724fa0402SLasse Collin 	uint32_t prev_mask = s->x86_prev_mask;
9824fa0402SLasse Collin 	uint32_t src;
9924fa0402SLasse Collin 	uint32_t dest;
10024fa0402SLasse Collin 	uint32_t j;
10124fa0402SLasse Collin 	uint8_t b;
10224fa0402SLasse Collin 
10324fa0402SLasse Collin 	if (size <= 4)
10424fa0402SLasse Collin 		return 0;
10524fa0402SLasse Collin 
10624fa0402SLasse Collin 	size -= 4;
10724fa0402SLasse Collin 	for (i = 0; i < size; ++i) {
10824fa0402SLasse Collin 		if ((buf[i] & 0xFE) != 0xE8)
10924fa0402SLasse Collin 			continue;
11024fa0402SLasse Collin 
11124fa0402SLasse Collin 		prev_pos = i - prev_pos;
11224fa0402SLasse Collin 		if (prev_pos > 3) {
11324fa0402SLasse Collin 			prev_mask = 0;
11424fa0402SLasse Collin 		} else {
11524fa0402SLasse Collin 			prev_mask = (prev_mask << (prev_pos - 1)) & 7;
11624fa0402SLasse Collin 			if (prev_mask != 0) {
11724fa0402SLasse Collin 				b = buf[i + 4 - mask_to_bit_num[prev_mask]];
11824fa0402SLasse Collin 				if (!mask_to_allowed_status[prev_mask]
11924fa0402SLasse Collin 						|| bcj_x86_test_msbyte(b)) {
12024fa0402SLasse Collin 					prev_pos = i;
12124fa0402SLasse Collin 					prev_mask = (prev_mask << 1) | 1;
12224fa0402SLasse Collin 					continue;
12324fa0402SLasse Collin 				}
12424fa0402SLasse Collin 			}
12524fa0402SLasse Collin 		}
12624fa0402SLasse Collin 
12724fa0402SLasse Collin 		prev_pos = i;
12824fa0402SLasse Collin 
12924fa0402SLasse Collin 		if (bcj_x86_test_msbyte(buf[i + 4])) {
13024fa0402SLasse Collin 			src = get_unaligned_le32(buf + i + 1);
13124fa0402SLasse Collin 			while (true) {
13224fa0402SLasse Collin 				dest = src - (s->pos + (uint32_t)i + 5);
13324fa0402SLasse Collin 				if (prev_mask == 0)
13424fa0402SLasse Collin 					break;
13524fa0402SLasse Collin 
13624fa0402SLasse Collin 				j = mask_to_bit_num[prev_mask] * 8;
13724fa0402SLasse Collin 				b = (uint8_t)(dest >> (24 - j));
13824fa0402SLasse Collin 				if (!bcj_x86_test_msbyte(b))
13924fa0402SLasse Collin 					break;
14024fa0402SLasse Collin 
14124fa0402SLasse Collin 				src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
14224fa0402SLasse Collin 			}
14324fa0402SLasse Collin 
14424fa0402SLasse Collin 			dest &= 0x01FFFFFF;
14524fa0402SLasse Collin 			dest |= (uint32_t)0 - (dest & 0x01000000);
14624fa0402SLasse Collin 			put_unaligned_le32(dest, buf + i + 1);
14724fa0402SLasse Collin 			i += 4;
14824fa0402SLasse Collin 		} else {
14924fa0402SLasse Collin 			prev_mask = (prev_mask << 1) | 1;
15024fa0402SLasse Collin 		}
15124fa0402SLasse Collin 	}
15224fa0402SLasse Collin 
15324fa0402SLasse Collin 	prev_pos = i - prev_pos;
15424fa0402SLasse Collin 	s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
15524fa0402SLasse Collin 	return i;
15624fa0402SLasse Collin }
15724fa0402SLasse Collin #endif
15824fa0402SLasse Collin 
15924fa0402SLasse Collin #ifdef XZ_DEC_POWERPC
16024fa0402SLasse Collin static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
16124fa0402SLasse Collin {
16224fa0402SLasse Collin 	size_t i;
16324fa0402SLasse Collin 	uint32_t instr;
16424fa0402SLasse Collin 
16524fa0402SLasse Collin 	for (i = 0; i + 4 <= size; i += 4) {
16624fa0402SLasse Collin 		instr = get_unaligned_be32(buf + i);
16724fa0402SLasse Collin 		if ((instr & 0xFC000003) == 0x48000001) {
16824fa0402SLasse Collin 			instr &= 0x03FFFFFC;
16924fa0402SLasse Collin 			instr -= s->pos + (uint32_t)i;
17024fa0402SLasse Collin 			instr &= 0x03FFFFFC;
17124fa0402SLasse Collin 			instr |= 0x48000001;
17224fa0402SLasse Collin 			put_unaligned_be32(instr, buf + i);
17324fa0402SLasse Collin 		}
17424fa0402SLasse Collin 	}
17524fa0402SLasse Collin 
17624fa0402SLasse Collin 	return i;
17724fa0402SLasse Collin }
17824fa0402SLasse Collin #endif
17924fa0402SLasse Collin 
18024fa0402SLasse Collin #ifdef XZ_DEC_IA64
18124fa0402SLasse Collin static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
18224fa0402SLasse Collin {
18324fa0402SLasse Collin 	static const uint8_t branch_table[32] = {
18424fa0402SLasse Collin 		0, 0, 0, 0, 0, 0, 0, 0,
18524fa0402SLasse Collin 		0, 0, 0, 0, 0, 0, 0, 0,
18624fa0402SLasse Collin 		4, 4, 6, 6, 0, 0, 7, 7,
18724fa0402SLasse Collin 		4, 4, 0, 0, 4, 4, 0, 0
18824fa0402SLasse Collin 	};
18924fa0402SLasse Collin 
19024fa0402SLasse Collin 	/*
19124fa0402SLasse Collin 	 * The local variables take a little bit stack space, but it's less
19224fa0402SLasse Collin 	 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
19324fa0402SLasse Collin 	 * stack usage here without doing that for the LZMA2 decoder too.
19424fa0402SLasse Collin 	 */
19524fa0402SLasse Collin 
19624fa0402SLasse Collin 	/* Loop counters */
19724fa0402SLasse Collin 	size_t i;
19824fa0402SLasse Collin 	size_t j;
19924fa0402SLasse Collin 
20024fa0402SLasse Collin 	/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
20124fa0402SLasse Collin 	uint32_t slot;
20224fa0402SLasse Collin 
20324fa0402SLasse Collin 	/* Bitwise offset of the instruction indicated by slot */
20424fa0402SLasse Collin 	uint32_t bit_pos;
20524fa0402SLasse Collin 
20624fa0402SLasse Collin 	/* bit_pos split into byte and bit parts */
20724fa0402SLasse Collin 	uint32_t byte_pos;
20824fa0402SLasse Collin 	uint32_t bit_res;
20924fa0402SLasse Collin 
21024fa0402SLasse Collin 	/* Address part of an instruction */
21124fa0402SLasse Collin 	uint32_t addr;
21224fa0402SLasse Collin 
21324fa0402SLasse Collin 	/* Mask used to detect which instructions to convert */
21424fa0402SLasse Collin 	uint32_t mask;
21524fa0402SLasse Collin 
21624fa0402SLasse Collin 	/* 41-bit instruction stored somewhere in the lowest 48 bits */
21724fa0402SLasse Collin 	uint64_t instr;
21824fa0402SLasse Collin 
21924fa0402SLasse Collin 	/* Instruction normalized with bit_res for easier manipulation */
22024fa0402SLasse Collin 	uint64_t norm;
22124fa0402SLasse Collin 
22224fa0402SLasse Collin 	for (i = 0; i + 16 <= size; i += 16) {
22324fa0402SLasse Collin 		mask = branch_table[buf[i] & 0x1F];
22424fa0402SLasse Collin 		for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
22524fa0402SLasse Collin 			if (((mask >> slot) & 1) == 0)
22624fa0402SLasse Collin 				continue;
22724fa0402SLasse Collin 
22824fa0402SLasse Collin 			byte_pos = bit_pos >> 3;
22924fa0402SLasse Collin 			bit_res = bit_pos & 7;
23024fa0402SLasse Collin 			instr = 0;
23124fa0402SLasse Collin 			for (j = 0; j < 6; ++j)
23224fa0402SLasse Collin 				instr |= (uint64_t)(buf[i + j + byte_pos])
23324fa0402SLasse Collin 						<< (8 * j);
23424fa0402SLasse Collin 
23524fa0402SLasse Collin 			norm = instr >> bit_res;
23624fa0402SLasse Collin 
23724fa0402SLasse Collin 			if (((norm >> 37) & 0x0F) == 0x05
23824fa0402SLasse Collin 					&& ((norm >> 9) & 0x07) == 0) {
23924fa0402SLasse Collin 				addr = (norm >> 13) & 0x0FFFFF;
24024fa0402SLasse Collin 				addr |= ((uint32_t)(norm >> 36) & 1) << 20;
24124fa0402SLasse Collin 				addr <<= 4;
24224fa0402SLasse Collin 				addr -= s->pos + (uint32_t)i;
24324fa0402SLasse Collin 				addr >>= 4;
24424fa0402SLasse Collin 
24524fa0402SLasse Collin 				norm &= ~((uint64_t)0x8FFFFF << 13);
24624fa0402SLasse Collin 				norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
24724fa0402SLasse Collin 				norm |= (uint64_t)(addr & 0x100000)
24824fa0402SLasse Collin 						<< (36 - 20);
24924fa0402SLasse Collin 
25024fa0402SLasse Collin 				instr &= (1 << bit_res) - 1;
25124fa0402SLasse Collin 				instr |= norm << bit_res;
25224fa0402SLasse Collin 
25324fa0402SLasse Collin 				for (j = 0; j < 6; j++)
25424fa0402SLasse Collin 					buf[i + j + byte_pos]
25524fa0402SLasse Collin 						= (uint8_t)(instr >> (8 * j));
25624fa0402SLasse Collin 			}
25724fa0402SLasse Collin 		}
25824fa0402SLasse Collin 	}
25924fa0402SLasse Collin 
26024fa0402SLasse Collin 	return i;
26124fa0402SLasse Collin }
26224fa0402SLasse Collin #endif
26324fa0402SLasse Collin 
26424fa0402SLasse Collin #ifdef XZ_DEC_ARM
26524fa0402SLasse Collin static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
26624fa0402SLasse Collin {
26724fa0402SLasse Collin 	size_t i;
26824fa0402SLasse Collin 	uint32_t addr;
26924fa0402SLasse Collin 
27024fa0402SLasse Collin 	for (i = 0; i + 4 <= size; i += 4) {
27124fa0402SLasse Collin 		if (buf[i + 3] == 0xEB) {
27224fa0402SLasse Collin 			addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
27324fa0402SLasse Collin 					| ((uint32_t)buf[i + 2] << 16);
27424fa0402SLasse Collin 			addr <<= 2;
27524fa0402SLasse Collin 			addr -= s->pos + (uint32_t)i + 8;
27624fa0402SLasse Collin 			addr >>= 2;
27724fa0402SLasse Collin 			buf[i] = (uint8_t)addr;
27824fa0402SLasse Collin 			buf[i + 1] = (uint8_t)(addr >> 8);
27924fa0402SLasse Collin 			buf[i + 2] = (uint8_t)(addr >> 16);
28024fa0402SLasse Collin 		}
28124fa0402SLasse Collin 	}
28224fa0402SLasse Collin 
28324fa0402SLasse Collin 	return i;
28424fa0402SLasse Collin }
28524fa0402SLasse Collin #endif
28624fa0402SLasse Collin 
28724fa0402SLasse Collin #ifdef XZ_DEC_ARMTHUMB
28824fa0402SLasse Collin static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
28924fa0402SLasse Collin {
29024fa0402SLasse Collin 	size_t i;
29124fa0402SLasse Collin 	uint32_t addr;
29224fa0402SLasse Collin 
29324fa0402SLasse Collin 	for (i = 0; i + 4 <= size; i += 2) {
29424fa0402SLasse Collin 		if ((buf[i + 1] & 0xF8) == 0xF0
29524fa0402SLasse Collin 				&& (buf[i + 3] & 0xF8) == 0xF8) {
29624fa0402SLasse Collin 			addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
29724fa0402SLasse Collin 					| ((uint32_t)buf[i] << 11)
29824fa0402SLasse Collin 					| (((uint32_t)buf[i + 3] & 0x07) << 8)
29924fa0402SLasse Collin 					| (uint32_t)buf[i + 2];
30024fa0402SLasse Collin 			addr <<= 1;
30124fa0402SLasse Collin 			addr -= s->pos + (uint32_t)i + 4;
30224fa0402SLasse Collin 			addr >>= 1;
30324fa0402SLasse Collin 			buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
30424fa0402SLasse Collin 			buf[i] = (uint8_t)(addr >> 11);
30524fa0402SLasse Collin 			buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
30624fa0402SLasse Collin 			buf[i + 2] = (uint8_t)addr;
30724fa0402SLasse Collin 			i += 2;
30824fa0402SLasse Collin 		}
30924fa0402SLasse Collin 	}
31024fa0402SLasse Collin 
31124fa0402SLasse Collin 	return i;
31224fa0402SLasse Collin }
31324fa0402SLasse Collin #endif
31424fa0402SLasse Collin 
31524fa0402SLasse Collin #ifdef XZ_DEC_SPARC
31624fa0402SLasse Collin static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
31724fa0402SLasse Collin {
31824fa0402SLasse Collin 	size_t i;
31924fa0402SLasse Collin 	uint32_t instr;
32024fa0402SLasse Collin 
32124fa0402SLasse Collin 	for (i = 0; i + 4 <= size; i += 4) {
32224fa0402SLasse Collin 		instr = get_unaligned_be32(buf + i);
32324fa0402SLasse Collin 		if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
32424fa0402SLasse Collin 			instr <<= 2;
32524fa0402SLasse Collin 			instr -= s->pos + (uint32_t)i;
32624fa0402SLasse Collin 			instr >>= 2;
32724fa0402SLasse Collin 			instr = ((uint32_t)0x40000000 - (instr & 0x400000))
32824fa0402SLasse Collin 					| 0x40000000 | (instr & 0x3FFFFF);
32924fa0402SLasse Collin 			put_unaligned_be32(instr, buf + i);
33024fa0402SLasse Collin 		}
33124fa0402SLasse Collin 	}
33224fa0402SLasse Collin 
33324fa0402SLasse Collin 	return i;
33424fa0402SLasse Collin }
33524fa0402SLasse Collin #endif
33624fa0402SLasse Collin 
33724fa0402SLasse Collin /*
33824fa0402SLasse Collin  * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
33924fa0402SLasse Collin  * of data that got filtered.
34024fa0402SLasse Collin  *
34124fa0402SLasse Collin  * NOTE: This is implemented as a switch statement to avoid using function
34224fa0402SLasse Collin  * pointers, which could be problematic in the kernel boot code, which must
34324fa0402SLasse Collin  * avoid pointers to static data (at least on x86).
34424fa0402SLasse Collin  */
34524fa0402SLasse Collin static void bcj_apply(struct xz_dec_bcj *s,
34624fa0402SLasse Collin 		      uint8_t *buf, size_t *pos, size_t size)
34724fa0402SLasse Collin {
34824fa0402SLasse Collin 	size_t filtered;
34924fa0402SLasse Collin 
35024fa0402SLasse Collin 	buf += *pos;
35124fa0402SLasse Collin 	size -= *pos;
35224fa0402SLasse Collin 
35324fa0402SLasse Collin 	switch (s->type) {
35424fa0402SLasse Collin #ifdef XZ_DEC_X86
35524fa0402SLasse Collin 	case BCJ_X86:
35624fa0402SLasse Collin 		filtered = bcj_x86(s, buf, size);
35724fa0402SLasse Collin 		break;
35824fa0402SLasse Collin #endif
35924fa0402SLasse Collin #ifdef XZ_DEC_POWERPC
36024fa0402SLasse Collin 	case BCJ_POWERPC:
36124fa0402SLasse Collin 		filtered = bcj_powerpc(s, buf, size);
36224fa0402SLasse Collin 		break;
36324fa0402SLasse Collin #endif
36424fa0402SLasse Collin #ifdef XZ_DEC_IA64
36524fa0402SLasse Collin 	case BCJ_IA64:
36624fa0402SLasse Collin 		filtered = bcj_ia64(s, buf, size);
36724fa0402SLasse Collin 		break;
36824fa0402SLasse Collin #endif
36924fa0402SLasse Collin #ifdef XZ_DEC_ARM
37024fa0402SLasse Collin 	case BCJ_ARM:
37124fa0402SLasse Collin 		filtered = bcj_arm(s, buf, size);
37224fa0402SLasse Collin 		break;
37324fa0402SLasse Collin #endif
37424fa0402SLasse Collin #ifdef XZ_DEC_ARMTHUMB
37524fa0402SLasse Collin 	case BCJ_ARMTHUMB:
37624fa0402SLasse Collin 		filtered = bcj_armthumb(s, buf, size);
37724fa0402SLasse Collin 		break;
37824fa0402SLasse Collin #endif
37924fa0402SLasse Collin #ifdef XZ_DEC_SPARC
38024fa0402SLasse Collin 	case BCJ_SPARC:
38124fa0402SLasse Collin 		filtered = bcj_sparc(s, buf, size);
38224fa0402SLasse Collin 		break;
38324fa0402SLasse Collin #endif
38424fa0402SLasse Collin 	default:
38524fa0402SLasse Collin 		/* Never reached but silence compiler warnings. */
38624fa0402SLasse Collin 		filtered = 0;
38724fa0402SLasse Collin 		break;
38824fa0402SLasse Collin 	}
38924fa0402SLasse Collin 
39024fa0402SLasse Collin 	*pos += filtered;
39124fa0402SLasse Collin 	s->pos += filtered;
39224fa0402SLasse Collin }
39324fa0402SLasse Collin 
39424fa0402SLasse Collin /*
39524fa0402SLasse Collin  * Flush pending filtered data from temp to the output buffer.
39624fa0402SLasse Collin  * Move the remaining mixture of possibly filtered and unfiltered
39724fa0402SLasse Collin  * data to the beginning of temp.
39824fa0402SLasse Collin  */
39924fa0402SLasse Collin static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
40024fa0402SLasse Collin {
40124fa0402SLasse Collin 	size_t copy_size;
40224fa0402SLasse Collin 
40324fa0402SLasse Collin 	copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
40424fa0402SLasse Collin 	memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
40524fa0402SLasse Collin 	b->out_pos += copy_size;
40624fa0402SLasse Collin 
40724fa0402SLasse Collin 	s->temp.filtered -= copy_size;
40824fa0402SLasse Collin 	s->temp.size -= copy_size;
40924fa0402SLasse Collin 	memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
41024fa0402SLasse Collin }
41124fa0402SLasse Collin 
41224fa0402SLasse Collin /*
41324fa0402SLasse Collin  * The BCJ filter functions are primitive in sense that they process the
41424fa0402SLasse Collin  * data in chunks of 1-16 bytes. To hide this issue, this function does
41524fa0402SLasse Collin  * some buffering.
41624fa0402SLasse Collin  */
41724fa0402SLasse Collin XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
41824fa0402SLasse Collin 				     struct xz_dec_lzma2 *lzma2,
41924fa0402SLasse Collin 				     struct xz_buf *b)
42024fa0402SLasse Collin {
42124fa0402SLasse Collin 	size_t out_start;
42224fa0402SLasse Collin 
42324fa0402SLasse Collin 	/*
42424fa0402SLasse Collin 	 * Flush pending already filtered data to the output buffer. Return
42524fa0402SLasse Collin 	 * immediatelly if we couldn't flush everything, or if the next
42624fa0402SLasse Collin 	 * filter in the chain had already returned XZ_STREAM_END.
42724fa0402SLasse Collin 	 */
42824fa0402SLasse Collin 	if (s->temp.filtered > 0) {
42924fa0402SLasse Collin 		bcj_flush(s, b);
43024fa0402SLasse Collin 		if (s->temp.filtered > 0)
43124fa0402SLasse Collin 			return XZ_OK;
43224fa0402SLasse Collin 
43324fa0402SLasse Collin 		if (s->ret == XZ_STREAM_END)
43424fa0402SLasse Collin 			return XZ_STREAM_END;
43524fa0402SLasse Collin 	}
43624fa0402SLasse Collin 
43724fa0402SLasse Collin 	/*
43824fa0402SLasse Collin 	 * If we have more output space than what is currently pending in
43924fa0402SLasse Collin 	 * temp, copy the unfiltered data from temp to the output buffer
44024fa0402SLasse Collin 	 * and try to fill the output buffer by decoding more data from the
44124fa0402SLasse Collin 	 * next filter in the chain. Apply the BCJ filter on the new data
44224fa0402SLasse Collin 	 * in the output buffer. If everything cannot be filtered, copy it
44324fa0402SLasse Collin 	 * to temp and rewind the output buffer position accordingly.
444*9c1f8594SLasse Collin 	 *
445*9c1f8594SLasse Collin 	 * This needs to be always run when temp.size == 0 to handle a special
446*9c1f8594SLasse Collin 	 * case where the output buffer is full and the next filter has no
447*9c1f8594SLasse Collin 	 * more output coming but hasn't returned XZ_STREAM_END yet.
44824fa0402SLasse Collin 	 */
449*9c1f8594SLasse Collin 	if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
45024fa0402SLasse Collin 		out_start = b->out_pos;
45124fa0402SLasse Collin 		memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
45224fa0402SLasse Collin 		b->out_pos += s->temp.size;
45324fa0402SLasse Collin 
45424fa0402SLasse Collin 		s->ret = xz_dec_lzma2_run(lzma2, b);
45524fa0402SLasse Collin 		if (s->ret != XZ_STREAM_END
45624fa0402SLasse Collin 				&& (s->ret != XZ_OK || s->single_call))
45724fa0402SLasse Collin 			return s->ret;
45824fa0402SLasse Collin 
45924fa0402SLasse Collin 		bcj_apply(s, b->out, &out_start, b->out_pos);
46024fa0402SLasse Collin 
46124fa0402SLasse Collin 		/*
46224fa0402SLasse Collin 		 * As an exception, if the next filter returned XZ_STREAM_END,
46324fa0402SLasse Collin 		 * we can do that too, since the last few bytes that remain
46424fa0402SLasse Collin 		 * unfiltered are meant to remain unfiltered.
46524fa0402SLasse Collin 		 */
46624fa0402SLasse Collin 		if (s->ret == XZ_STREAM_END)
46724fa0402SLasse Collin 			return XZ_STREAM_END;
46824fa0402SLasse Collin 
46924fa0402SLasse Collin 		s->temp.size = b->out_pos - out_start;
47024fa0402SLasse Collin 		b->out_pos -= s->temp.size;
47124fa0402SLasse Collin 		memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
472*9c1f8594SLasse Collin 
473*9c1f8594SLasse Collin 		/*
474*9c1f8594SLasse Collin 		 * If there wasn't enough input to the next filter to fill
475*9c1f8594SLasse Collin 		 * the output buffer with unfiltered data, there's no point
476*9c1f8594SLasse Collin 		 * to try decoding more data to temp.
477*9c1f8594SLasse Collin 		 */
478*9c1f8594SLasse Collin 		if (b->out_pos + s->temp.size < b->out_size)
479*9c1f8594SLasse Collin 			return XZ_OK;
48024fa0402SLasse Collin 	}
48124fa0402SLasse Collin 
48224fa0402SLasse Collin 	/*
483*9c1f8594SLasse Collin 	 * We have unfiltered data in temp. If the output buffer isn't full
484*9c1f8594SLasse Collin 	 * yet, try to fill the temp buffer by decoding more data from the
485*9c1f8594SLasse Collin 	 * next filter. Apply the BCJ filter on temp. Then we hopefully can
486*9c1f8594SLasse Collin 	 * fill the actual output buffer by copying filtered data from temp.
487*9c1f8594SLasse Collin 	 * A mix of filtered and unfiltered data may be left in temp; it will
488*9c1f8594SLasse Collin 	 * be taken care on the next call to this function.
48924fa0402SLasse Collin 	 */
490*9c1f8594SLasse Collin 	if (b->out_pos < b->out_size) {
49124fa0402SLasse Collin 		/* Make b->out{,_pos,_size} temporarily point to s->temp. */
49224fa0402SLasse Collin 		s->out = b->out;
49324fa0402SLasse Collin 		s->out_pos = b->out_pos;
49424fa0402SLasse Collin 		s->out_size = b->out_size;
49524fa0402SLasse Collin 		b->out = s->temp.buf;
49624fa0402SLasse Collin 		b->out_pos = s->temp.size;
49724fa0402SLasse Collin 		b->out_size = sizeof(s->temp.buf);
49824fa0402SLasse Collin 
49924fa0402SLasse Collin 		s->ret = xz_dec_lzma2_run(lzma2, b);
50024fa0402SLasse Collin 
50124fa0402SLasse Collin 		s->temp.size = b->out_pos;
50224fa0402SLasse Collin 		b->out = s->out;
50324fa0402SLasse Collin 		b->out_pos = s->out_pos;
50424fa0402SLasse Collin 		b->out_size = s->out_size;
50524fa0402SLasse Collin 
50624fa0402SLasse Collin 		if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
50724fa0402SLasse Collin 			return s->ret;
50824fa0402SLasse Collin 
50924fa0402SLasse Collin 		bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
51024fa0402SLasse Collin 
51124fa0402SLasse Collin 		/*
51224fa0402SLasse Collin 		 * If the next filter returned XZ_STREAM_END, we mark that
51324fa0402SLasse Collin 		 * everything is filtered, since the last unfiltered bytes
51424fa0402SLasse Collin 		 * of the stream are meant to be left as is.
51524fa0402SLasse Collin 		 */
51624fa0402SLasse Collin 		if (s->ret == XZ_STREAM_END)
51724fa0402SLasse Collin 			s->temp.filtered = s->temp.size;
51824fa0402SLasse Collin 
51924fa0402SLasse Collin 		bcj_flush(s, b);
52024fa0402SLasse Collin 		if (s->temp.filtered > 0)
52124fa0402SLasse Collin 			return XZ_OK;
52224fa0402SLasse Collin 	}
52324fa0402SLasse Collin 
52424fa0402SLasse Collin 	return s->ret;
52524fa0402SLasse Collin }
52624fa0402SLasse Collin 
52724fa0402SLasse Collin XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
52824fa0402SLasse Collin {
52924fa0402SLasse Collin 	struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
53024fa0402SLasse Collin 	if (s != NULL)
53124fa0402SLasse Collin 		s->single_call = single_call;
53224fa0402SLasse Collin 
53324fa0402SLasse Collin 	return s;
53424fa0402SLasse Collin }
53524fa0402SLasse Collin 
53624fa0402SLasse Collin XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
53724fa0402SLasse Collin {
53824fa0402SLasse Collin 	switch (id) {
53924fa0402SLasse Collin #ifdef XZ_DEC_X86
54024fa0402SLasse Collin 	case BCJ_X86:
54124fa0402SLasse Collin #endif
54224fa0402SLasse Collin #ifdef XZ_DEC_POWERPC
54324fa0402SLasse Collin 	case BCJ_POWERPC:
54424fa0402SLasse Collin #endif
54524fa0402SLasse Collin #ifdef XZ_DEC_IA64
54624fa0402SLasse Collin 	case BCJ_IA64:
54724fa0402SLasse Collin #endif
54824fa0402SLasse Collin #ifdef XZ_DEC_ARM
54924fa0402SLasse Collin 	case BCJ_ARM:
55024fa0402SLasse Collin #endif
55124fa0402SLasse Collin #ifdef XZ_DEC_ARMTHUMB
55224fa0402SLasse Collin 	case BCJ_ARMTHUMB:
55324fa0402SLasse Collin #endif
55424fa0402SLasse Collin #ifdef XZ_DEC_SPARC
55524fa0402SLasse Collin 	case BCJ_SPARC:
55624fa0402SLasse Collin #endif
55724fa0402SLasse Collin 		break;
55824fa0402SLasse Collin 
55924fa0402SLasse Collin 	default:
56024fa0402SLasse Collin 		/* Unsupported Filter ID */
56124fa0402SLasse Collin 		return XZ_OPTIONS_ERROR;
56224fa0402SLasse Collin 	}
56324fa0402SLasse Collin 
56424fa0402SLasse Collin 	s->type = id;
56524fa0402SLasse Collin 	s->ret = XZ_OK;
56624fa0402SLasse Collin 	s->pos = 0;
56724fa0402SLasse Collin 	s->x86_prev_mask = 0;
56824fa0402SLasse Collin 	s->temp.filtered = 0;
56924fa0402SLasse Collin 	s->temp.size = 0;
57024fa0402SLasse Collin 
57124fa0402SLasse Collin 	return XZ_OK;
57224fa0402SLasse Collin }
57324fa0402SLasse Collin 
57424fa0402SLasse Collin #endif
575