196f47d61SGeert Uytterhoeven /*
296f47d61SGeert Uytterhoeven * Fast C2P (Chunky-to-Planar) Conversion
396f47d61SGeert Uytterhoeven *
496f47d61SGeert Uytterhoeven * Copyright (C) 2003-2008 Geert Uytterhoeven
596f47d61SGeert Uytterhoeven *
696f47d61SGeert Uytterhoeven * This file is subject to the terms and conditions of the GNU General Public
796f47d61SGeert Uytterhoeven * License. See the file COPYING in the main directory of this archive
896f47d61SGeert Uytterhoeven * for more details.
996f47d61SGeert Uytterhoeven */
1096f47d61SGeert Uytterhoeven
1196f47d61SGeert Uytterhoeven #include <linux/export.h>
1296f47d61SGeert Uytterhoeven #include <linux/module.h>
1396f47d61SGeert Uytterhoeven #include <linux/string.h>
14*5f60d5f6SAl Viro
1596f47d61SGeert Uytterhoeven #include <linux/unaligned.h>
1696f47d61SGeert Uytterhoeven
1796f47d61SGeert Uytterhoeven #include "c2p.h"
1896f47d61SGeert Uytterhoeven #include "c2p_core.h"
1996f47d61SGeert Uytterhoeven
2096f47d61SGeert Uytterhoeven
2196f47d61SGeert Uytterhoeven /*
2296f47d61SGeert Uytterhoeven * Perform a full C2P step on 16 8-bit pixels, stored in 4 32-bit words
2396f47d61SGeert Uytterhoeven * containing
2496f47d61SGeert Uytterhoeven * - 16 8-bit chunky pixels on input
2596f47d61SGeert Uytterhoeven * - permutated planar data (2 planes per 32-bit word) on output
2696f47d61SGeert Uytterhoeven */
2796f47d61SGeert Uytterhoeven
c2p_16x8(u32 d[4])2896f47d61SGeert Uytterhoeven static void c2p_16x8(u32 d[4])
2996f47d61SGeert Uytterhoeven {
3096f47d61SGeert Uytterhoeven transp4(d, 8, 2);
3196f47d61SGeert Uytterhoeven transp4(d, 1, 2);
3296f47d61SGeert Uytterhoeven transp4x(d, 16, 2);
3396f47d61SGeert Uytterhoeven transp4x(d, 2, 2);
3496f47d61SGeert Uytterhoeven transp4(d, 4, 1);
3596f47d61SGeert Uytterhoeven }
3696f47d61SGeert Uytterhoeven
3796f47d61SGeert Uytterhoeven
3896f47d61SGeert Uytterhoeven /*
3996f47d61SGeert Uytterhoeven * Array containing the permutation indices of the planar data after c2p
4096f47d61SGeert Uytterhoeven */
4196f47d61SGeert Uytterhoeven
4296f47d61SGeert Uytterhoeven static const int perm_c2p_16x8[4] = { 1, 3, 0, 2 };
4396f47d61SGeert Uytterhoeven
4496f47d61SGeert Uytterhoeven
4596f47d61SGeert Uytterhoeven /*
4696f47d61SGeert Uytterhoeven * Store a full block of iplan2 data after c2p conversion
4796f47d61SGeert Uytterhoeven */
4896f47d61SGeert Uytterhoeven
store_iplan2(void * dst,u32 bpp,u32 d[4])4996f47d61SGeert Uytterhoeven static inline void store_iplan2(void *dst, u32 bpp, u32 d[4])
5096f47d61SGeert Uytterhoeven {
5196f47d61SGeert Uytterhoeven int i;
5296f47d61SGeert Uytterhoeven
5396f47d61SGeert Uytterhoeven for (i = 0; i < bpp/2; i++, dst += 4)
5496f47d61SGeert Uytterhoeven put_unaligned_be32(d[perm_c2p_16x8[i]], dst);
5596f47d61SGeert Uytterhoeven }
5696f47d61SGeert Uytterhoeven
5796f47d61SGeert Uytterhoeven
5896f47d61SGeert Uytterhoeven /*
5996f47d61SGeert Uytterhoeven * Store a partial block of iplan2 data after c2p conversion
6096f47d61SGeert Uytterhoeven */
6196f47d61SGeert Uytterhoeven
store_iplan2_masked(void * dst,u32 bpp,u32 d[4],u32 mask)6296f47d61SGeert Uytterhoeven static inline void store_iplan2_masked(void *dst, u32 bpp, u32 d[4], u32 mask)
6396f47d61SGeert Uytterhoeven {
6496f47d61SGeert Uytterhoeven int i;
6596f47d61SGeert Uytterhoeven
6696f47d61SGeert Uytterhoeven for (i = 0; i < bpp/2; i++, dst += 4)
6796f47d61SGeert Uytterhoeven put_unaligned_be32(comp(d[perm_c2p_16x8[i]],
6896f47d61SGeert Uytterhoeven get_unaligned_be32(dst), mask),
6996f47d61SGeert Uytterhoeven dst);
7096f47d61SGeert Uytterhoeven }
7196f47d61SGeert Uytterhoeven
7296f47d61SGeert Uytterhoeven
7396f47d61SGeert Uytterhoeven /*
7496f47d61SGeert Uytterhoeven * c2p_iplan2 - Copy 8-bit chunky image data to an interleaved planar
7596f47d61SGeert Uytterhoeven * frame buffer with 2 bytes of interleave
7696f47d61SGeert Uytterhoeven * @dst: Starting address of the planar frame buffer
7796f47d61SGeert Uytterhoeven * @dx: Horizontal destination offset (in pixels)
7896f47d61SGeert Uytterhoeven * @dy: Vertical destination offset (in pixels)
7996f47d61SGeert Uytterhoeven * @width: Image width (in pixels)
8096f47d61SGeert Uytterhoeven * @height: Image height (in pixels)
8196f47d61SGeert Uytterhoeven * @dst_nextline: Frame buffer offset to the next line (in bytes)
8296f47d61SGeert Uytterhoeven * @src_nextline: Image offset to the next line (in bytes)
8396f47d61SGeert Uytterhoeven * @bpp: Bits per pixel of the planar frame buffer (2, 4, or 8)
8496f47d61SGeert Uytterhoeven */
8596f47d61SGeert Uytterhoeven
c2p_iplan2(void * dst,const void * src,u32 dx,u32 dy,u32 width,u32 height,u32 dst_nextline,u32 src_nextline,u32 bpp)8696f47d61SGeert Uytterhoeven void c2p_iplan2(void *dst, const void *src, u32 dx, u32 dy, u32 width,
8796f47d61SGeert Uytterhoeven u32 height, u32 dst_nextline, u32 src_nextline, u32 bpp)
8896f47d61SGeert Uytterhoeven {
8996f47d61SGeert Uytterhoeven union {
9096f47d61SGeert Uytterhoeven u8 pixels[16];
9196f47d61SGeert Uytterhoeven u32 words[4];
9296f47d61SGeert Uytterhoeven } d;
9396f47d61SGeert Uytterhoeven u32 dst_idx, first, last, w;
9496f47d61SGeert Uytterhoeven const u8 *c;
9596f47d61SGeert Uytterhoeven void *p;
9696f47d61SGeert Uytterhoeven
9796f47d61SGeert Uytterhoeven dst += dy*dst_nextline+(dx & ~15)*bpp;
9896f47d61SGeert Uytterhoeven dst_idx = dx % 16;
9996f47d61SGeert Uytterhoeven first = 0xffffU >> dst_idx;
10096f47d61SGeert Uytterhoeven first |= first << 16;
10196f47d61SGeert Uytterhoeven last = 0xffffU ^ (0xffffU >> ((dst_idx+width) % 16));
10296f47d61SGeert Uytterhoeven last |= last << 16;
10396f47d61SGeert Uytterhoeven while (height--) {
10496f47d61SGeert Uytterhoeven c = src;
10596f47d61SGeert Uytterhoeven p = dst;
10696f47d61SGeert Uytterhoeven w = width;
10796f47d61SGeert Uytterhoeven if (dst_idx+width <= 16) {
10896f47d61SGeert Uytterhoeven /* Single destination word */
10996f47d61SGeert Uytterhoeven first &= last;
11096f47d61SGeert Uytterhoeven memset(d.pixels, 0, sizeof(d));
11196f47d61SGeert Uytterhoeven memcpy(d.pixels+dst_idx, c, width);
11296f47d61SGeert Uytterhoeven c += width;
11396f47d61SGeert Uytterhoeven c2p_16x8(d.words);
11496f47d61SGeert Uytterhoeven store_iplan2_masked(p, bpp, d.words, first);
11596f47d61SGeert Uytterhoeven p += bpp*2;
11696f47d61SGeert Uytterhoeven } else {
11796f47d61SGeert Uytterhoeven /* Multiple destination words */
11896f47d61SGeert Uytterhoeven w = width;
11996f47d61SGeert Uytterhoeven /* Leading bits */
12096f47d61SGeert Uytterhoeven if (dst_idx) {
12196f47d61SGeert Uytterhoeven w = 16 - dst_idx;
12296f47d61SGeert Uytterhoeven memset(d.pixels, 0, dst_idx);
12396f47d61SGeert Uytterhoeven memcpy(d.pixels+dst_idx, c, w);
12496f47d61SGeert Uytterhoeven c += w;
12596f47d61SGeert Uytterhoeven c2p_16x8(d.words);
12696f47d61SGeert Uytterhoeven store_iplan2_masked(p, bpp, d.words, first);
12796f47d61SGeert Uytterhoeven p += bpp*2;
12896f47d61SGeert Uytterhoeven w = width-w;
12996f47d61SGeert Uytterhoeven }
13096f47d61SGeert Uytterhoeven /* Main chunk */
13196f47d61SGeert Uytterhoeven while (w >= 16) {
13296f47d61SGeert Uytterhoeven memcpy(d.pixels, c, 16);
13396f47d61SGeert Uytterhoeven c += 16;
13496f47d61SGeert Uytterhoeven c2p_16x8(d.words);
13596f47d61SGeert Uytterhoeven store_iplan2(p, bpp, d.words);
13696f47d61SGeert Uytterhoeven p += bpp*2;
13796f47d61SGeert Uytterhoeven w -= 16;
13896f47d61SGeert Uytterhoeven }
13996f47d61SGeert Uytterhoeven /* Trailing bits */
14096f47d61SGeert Uytterhoeven w %= 16;
14196f47d61SGeert Uytterhoeven if (w > 0) {
14296f47d61SGeert Uytterhoeven memcpy(d.pixels, c, w);
14396f47d61SGeert Uytterhoeven memset(d.pixels+w, 0, 16-w);
14496f47d61SGeert Uytterhoeven c2p_16x8(d.words);
14596f47d61SGeert Uytterhoeven store_iplan2_masked(p, bpp, d.words, last);
14696f47d61SGeert Uytterhoeven }
14796f47d61SGeert Uytterhoeven }
14896f47d61SGeert Uytterhoeven src += src_nextline;
14996f47d61SGeert Uytterhoeven dst += dst_nextline;
15096f47d61SGeert Uytterhoeven }
15196f47d61SGeert Uytterhoeven }
15296f47d61SGeert Uytterhoeven EXPORT_SYMBOL_GPL(c2p_iplan2);
15396f47d61SGeert Uytterhoeven
154 MODULE_LICENSE("GPL");
155