/** * Copyright (C) 2001, 2003 Billy Biggs . * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or (at * your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include #include #include "diffcomp.h" /** * This source file implements a simple form of information-preserving * differential coding, that is, predictive coding without quantization. * The basic idea is to predict the value of a pixel and encode the * error. * * I first got interested in the idea from the 'Huffyuv' codec by Ben * Rudiak-Gould and released under the GPL. See * http://www.math.berkeley.edu/~benrg/huffyuv.html for more information * about 'Huffyuv'. * * There are a few ways of making a good prediction. My reference is * 'Image and Video Compression for Multimedia Engineering' by Yun Q. * Shi and Huifang Sun. One good reference they seemed to use was: * Haskell, B. G., Frame replenishment coding of television, in 'Image * Transmission Techniques', W.K. Pratt (Ed.), Academic Press, New York, * 1979. * * Here are some ideas: Say we have the following samples: * * a b c m n o * d e f p q r * g h i s t u * j k l v w x * | +----current frame * previous frame * * So, say we're trying to encode sample t. There are a few easy * prediction schemes. The few I played with were: * * Element difference: E = t - s * Field-based interpolation: E = t - ((n + s) / 2) * or E = t - ((n + s + m) / 3) * Median (field based): E = med(n, s, (n + s) / 2) * or E = med(n, s, n + s - m) * Frame difference: E = (t - s) - (h - g) * (E = error) * * Some other neat suggested ones were: * Field difference: * E = (t - ((q + w) / 2)) - (h - ((e + k) / 2)) * Line difference: * E = (t - n) - (h - b) * * Some of the above names are from Shi and Sun. Also, some were based * on Ben Rudiak-Gould's page. Basically though, what I found was that * the frame difference idea of using the previous frame did not produce * a code with much less entropy (<.1 bit). As well, using anything * from 2 scanlines above is another bad idea. This leaves only the * element difference, that is, using the previous pixel value, as the * best option. * * I seem to get an average compression of about 55%-65% of the original * uncompressed size. * * The code below is optimized for codewords < 16 bits. I first * generated a code with a max wordlength of 12 bits based on * 'Transformers: The Movie' VHS sampled at 352x480. I generated the * new code below from 'Austin Powers' VHS, and I think I like it * better. Sampling TV at 352x480 gave me longer codes without much * better entropy values, so I'm keeping this code for now. * * I use the source code found at * http://www.cs.mu.oz.au/~alistair/abstracts/wads95.html * to generate the word lengths for my code. The code I build at * runtime based on these lenghts. I generate a 'canonical huffman * code'. I got help for coding this (and optimizing it) from the * website 'Practical Huffman Coding' at * http://www.compressconsult.com/huffman/ * * You'll note that I use the same compression table for both the luma * and the chroma. I know, I could probably do even better by building * different codes for each, but I didn't. Since I'm only sampling at * 4:2:0 rates in my app, I'm ok with this situation, for now. * * - Billy Biggs */ /** * Updates: * * Sat Apr 12 16:19:03 ADT 2003 * - Updated to have a first-8-bits lookup for decoding speed. Works well. * - Updated to include a YUY2 compressor and decompressor. * - Note the code generation source we used is now here: * http://www.cs.mu.oz.au/~alistair/inplace.c */ /** * Everything I do needs to hold a number of bits and a value. Kinda * cool, eh? */ typedef struct mytuple_s { int numbits; unsigned int value; } mytuple_t; /** * This table holds the number of bits used by each value in the huffman * code. Since we're using canonical huffman codes, this is sufficient * to uniquely describe the code. The max size variable holds the * maximum length of any codeword. */ static int maxsize = 15; static mytuple_t codesizetable[] = { { 15, 120 }, { 15, 121 }, { 15, 122 }, { 15, 118 }, { 15, 124 }, { 15, 119 }, { 15, 123 }, { 15, 125 }, { 15, 117 }, { 15, 128 }, { 15, 127 }, { 15, 116 }, { 15, 126 }, { 15, 129 }, { 15, 130 }, { 15, 114 }, { 15, 115 }, { 15, 131 }, { 15, 132 }, { 15, 133 }, { 15, 134 }, { 15, 135 }, { 15, 113 }, { 15, 112 }, { 15, 111 }, { 15, 137 }, { 15, 136 }, { 15, 110 }, { 14, 139 }, { 14, 138 }, { 14, 109 }, { 14, 140 }, { 14, 108 }, { 14, 141 }, { 14, 107 }, { 14, 142 }, { 14, 143 }, { 14, 106 }, { 14, 144 }, { 14, 145 }, { 14, 105 }, { 14, 104 }, { 14, 146 }, { 14, 103 }, { 14, 147 }, { 14, 148 }, { 14, 102 }, { 14, 149 }, { 14, 150 }, { 14, 101 }, { 14, 151 }, { 14, 100 }, { 14, 152 }, { 14, 99 }, { 14, 153 }, { 14, 154 }, { 14, 98 }, { 14, 97 }, { 14, 155 }, { 14, 156 }, { 14, 96 }, { 14, 157 }, { 14, 95 }, { 14, 94 }, { 14, 158 }, { 14, 159 }, { 14, 93 }, { 14, 160 }, { 14, 92 }, { 14, 161 }, { 14, 91 }, { 14, 162 }, { 14, 163 }, { 14, 90 }, { 14, 164 }, { 14, 89 }, { 14, 165 }, { 14, 88 }, { 13, 166 }, { 13, 87 }, { 13, 167 }, { 13, 86 }, { 13, 168 }, { 13, 85 }, { 13, 169 }, { 13, 84 }, { 13, 170 }, { 13, 83 }, { 13, 171 }, { 13, 82 }, { 13, 172 }, { 13, 81 }, { 13, 173 }, { 13, 80 }, { 13, 174 }, { 13, 175 }, { 13, 79 }, { 13, 176 }, { 13, 78 }, { 13, 177 }, { 13, 77 }, { 13, 178 }, { 13, 76 }, { 13, 75 }, { 13, 179 }, { 13, 180 }, { 13, 74 }, { 13, 181 }, { 13, 73 }, { 13, 182 }, { 13, 72 }, { 13, 183 }, { 13, 71 }, { 13, 184 }, { 13, 70 }, { 13, 185 }, { 13, 186 }, { 13, 69 }, { 12, 68 }, { 12, 187 }, { 12, 67 }, { 12, 188 }, { 12, 66 }, { 12, 189 }, { 12, 65 }, { 12, 190 }, { 12, 64 }, { 12, 191 }, { 12, 63 }, { 12, 192 }, { 12, 62 }, { 12, 193 }, { 12, 194 }, { 12, 61 }, { 12, 195 }, { 12, 60 }, { 12, 196 }, { 12, 59 }, { 12, 197 }, { 12, 58 }, { 12, 198 }, { 12, 57 }, { 12, 199 }, { 12, 56 }, { 12, 200 }, { 12, 55 }, { 12, 201 }, { 12, 54 }, { 12, 202 }, { 12, 53 }, { 12, 203 }, { 12, 52 }, { 12, 204 }, { 12, 51 }, { 12, 205 }, { 12, 50 }, { 11, 206 }, { 11, 49 }, { 11, 207 }, { 11, 48 }, { 11, 208 }, { 11, 47 }, { 11, 209 }, { 11, 46 }, { 11, 210 }, { 11, 45 }, { 11, 211 }, { 11, 212 }, { 11, 44 }, { 11, 213 }, { 11, 43 }, { 11, 214 }, { 11, 42 }, { 11, 215 }, { 11, 41 }, { 11, 216 }, { 11, 40 }, { 11, 217 }, { 11, 39 }, { 11, 218 }, { 11, 38 }, { 11, 219 }, { 11, 37 }, { 11, 220 }, { 11, 36 }, { 11, 221 }, { 11, 35 }, { 10, 222 }, { 10, 34 }, { 10, 223 }, { 10, 33 }, { 10, 224 }, { 10, 32 }, { 10, 225 }, { 10, 31 }, { 10, 226 }, { 10, 30 }, { 10, 227 }, { 10, 29 }, { 10, 228 }, { 10, 28 }, { 10, 27 }, { 10, 229 }, { 10, 26 }, { 10, 230 }, { 10, 25 }, { 10, 231 }, { 10, 24 }, { 10, 232 }, { 9, 23 }, { 9, 233 }, { 9, 22 }, { 9, 234 }, { 9, 21 }, { 9, 235 }, { 9, 20 }, { 9, 236 }, { 9, 19 }, { 9, 237 }, { 9, 18 }, { 9, 238 }, { 9, 17 }, { 9, 239 }, { 9, 16 }, { 9, 240 }, { 8, 15 }, { 8, 241 }, { 8, 14 }, { 8, 242 }, { 8, 13 }, { 8, 243 }, { 8, 244 }, { 8, 12 }, { 8, 245 }, { 8, 11 }, { 7, 246 }, { 7, 10 }, { 7, 247 }, { 7, 9 }, { 7, 248 }, { 7, 8 }, { 7, 249 }, { 7, 7 }, { 6, 250 }, { 6, 6 }, { 6, 251 }, { 6, 5 }, { 5, 252 }, { 5, 4 }, { 5, 253 }, { 5, 3 }, { 4, 254 }, { 4, 2 }, { 3, 255 }, { 3, 1 }, { 2, 0 }, }; /** * This holds the code table used for encoding. */ static mytuple_t codetable[ 256 ]; static int codesgenerated = 0; /** * Here is where I generate the huffman codewords based on the table * above. Real simple. See the website on 'Practical Huffman Coding'. */ static void generate_codes( void ) { mytuple_t *curtuple = codesizetable; int lastsize = maxsize; int curcode = 0; while( curtuple->numbits > 0 ) { /** * Basically, I just go through each value in the codesizetable * and assign a codeword. */ if( curtuple->numbits != lastsize ) { for(; lastsize > curtuple->numbits; lastsize-- ) { curcode >>= 1; } } codetable[ curtuple->value ].numbits = curtuple->numbits; codetable[ curtuple->value ].value = curcode; curcode++; curtuple++; } codesgenerated = 1; } /** * This is the table I use for decompression. Basically, I just make a * table of every possible 16bit value I could see in the input and what * code word is first in those 16 bits. Not very fast. */ static mytuple_t decompressiontable[ 65536 ]; static int table_generated = 0; static mytuple_t first_decompressiontable[ 256 ]; static int first_cutoff = 0; static void generate_decompression_table( void ) { int num_hits_first[ 256 ]; int i; if( !codesgenerated ) generate_codes(); /** * For sanity checking our table. Only bother if you change the * code */ /* for( i = 0; i < 65536; i++ ) { decompressiontable[ i ].numbits = -1; decompressiontable[ i ].value = -1; } */ for( i = 0; i < 256; i++ ) { first_decompressiontable[ i ].numbits = -1; num_hits_first[ i ] = 0; } /** * We need to know the cut off point in our table where our first lookup table * will fail. */ for( i = 0; i < 256; i++ ) { mytuple_t *codeword = &(codetable[ i ]); int startword = codeword->value << ( 16 - codeword->numbits ); int firstbyte = (startword >> 8) & 0xff; num_hits_first[ firstbyte ]++; } for( i = 0; i < 256; i++ ) { if( num_hits_first[ i ] < 2 ) { first_cutoff = i - 1; break; } } /** * So, for each codeword, we just fill up every entry which * corresponds to it in the decode table. Pretty simple. */ for( i = 0; i < 256; i++ ) { mytuple_t *codeword = &(codetable[ i ]); int startword = codeword->value << ( 16 - codeword->numbits ); int nextword = startword; int mask = 0; int firstbyte = (startword >> 8) & 0xff; int j; for( j = 0; j < codeword->numbits; j++ ) { mask = ( mask >> 1 ) | 0x8000; } if( firstbyte > first_cutoff ) { int temp = firstbyte; while( ( temp & (mask >> 8) ) == firstbyte ) { first_decompressiontable[ temp & 0xff ].numbits = codeword->numbits; first_decompressiontable[ temp & 0xff ].value = i; temp++; } first_decompressiontable[ firstbyte ].numbits = codeword->numbits; first_decompressiontable[ firstbyte ].value = i; } while( ( nextword & mask ) == startword ) { decompressiontable[ nextword & 0xffff ].numbits = codeword->numbits; decompressiontable[ nextword & 0xffff ].value = i; nextword++; } } /* Sanity check our table. */ /* for( i = 0; i < 65536; i++ ) { if( decompressiontable[ i ].numbits == -1 ) { fprintf( stderr, "diffcomp: Didn't fill value %d\n", i ); } } */ table_generated = 1; } static inline __attribute__ ((always_inline,const)) mytuple_t *decompress_next_byte( const uint32_t curstuff ) { /** * Since we have that ereet table, all we need do is look up * which value we just read! */ int firstbyte = (curstuff >> 24) & 0xff; if( firstbyte > first_cutoff ) { return &(first_decompressiontable[ firstbyte ]); } else { return &(decompressiontable[ ( curstuff >> 16 ) & 0xffff ]); } } /** * Code for keeping statistics useful for building a new code and * analyzing the entropy of our codes. Currently unused. */ FILE *statsfile = 0; static unsigned int frequency[ 256 ]; static unsigned int values[ 256 ]; static void initialize_frequencies( void ) { int i; for( i = 0; i < 256; i++ ) frequency[ i ] = 0; for( i = 0; i < 256; i++ ) values[ i ] = i; } /** * This is a bad sort, but cheap to code, and it's only 256 values. */ static void sort_frequencies( void ) { int i, j; for( i = 0; i < 256; i++ ) { for( j = i + 1; j < 256; j++ ) { if( frequency[ j ] < frequency[ i ] ) { int tmp = frequency[ j ]; frequency[ j ] = frequency[ i ]; frequency[ i ] = tmp; tmp = values[ j ]; values[ j ] = values[ i ]; values[ i ] = tmp; } } } } /** * Prints out the frequencies in a form suitable for the app I'm using * to generate my code lengths. */ static void print_frequencies( void ) { int i; fprintf( statsfile, "256\n" ); for( i = 0; i < 256; i++ ) { fprintf( statsfile, "%d %d\n", frequency[ i ], values[ i ] ); } } int diffcomp_compress_plane( unsigned char *dst, unsigned char *src, int width, int height ) { unsigned char lastcode = 0; uint32_t curout = 0; int bitsused = 0; int outsize = 0; int i; if( !codesgenerated ) generate_codes(); for( i = 0; i < width * height; i++ ) { unsigned char diff = ( *src - lastcode ) & 0xff; mytuple_t *nextout = &(codetable[ diff ]); /** * If we were trying to measure the input one would * do: * frequency[ diff ]++; * around here. */ /** * If we're going to overflow, write out some bytes. */ while( ( nextout->numbits + bitsused ) > 32 ) { bitsused -= 8; *dst++ = ( curout >> bitsused ); outsize++; } /** * Shift over enough space for our codeword, then or in the * value of the codeword. */ curout = curout << nextout->numbits; curout = curout | nextout->value; bitsused += nextout->numbits; lastcode = *src; src++; } /* Write what's left, and write some buffer room for safety. */ curout = curout << ( 32 - bitsused ); *dst++ = ( curout >> 24 ) & 0xff; *dst++ = ( curout >> 16 ) & 0xff; *dst++ = ( curout >> 8 ) & 0xff; *dst++ = ( curout ) & 0xff; *dst++ = 0; *dst++ = 0; *dst++ = 0; *dst++ = 0; outsize += 8; return outsize; } void diffcomp_decompress_plane( unsigned char *dst, unsigned char *src, int width, int height ) { unsigned char lastvalue = 0; uint32_t curstuff = 0; int bitsused = 0; int i; if( !table_generated ) generate_decompression_table(); for( i = 0; i < width * height; i++ ) { mytuple_t *decodedword; /** * Any time we have less than 16 bits, load up some more data * from the compressed image. */ if( bitsused < 16 ) { unsigned int coolstuff = *src << 8 | *(src + 1); curstuff = curstuff | ( coolstuff << ( 16 - bitsused ) ); bitsused += 16; src += 2; } /** * Since we have that ereet table, all we need do is look up * which value we just read! */ decodedword = decompress_next_byte( curstuff ); curstuff = curstuff << decodedword->numbits; bitsused -= decodedword->numbits; *dst = ( lastvalue + decodedword->value ) & 0xff; lastvalue = *dst; dst++; } } void compress_next_byte( unsigned char **dst, uint32_t *curout, int *bitsused, int *outsize, unsigned char diff ) { mytuple_t *nextout = &(codetable[ diff ]); /** * If we're going to overflow, write out some bytes. */ while( ( nextout->numbits + *bitsused ) > 32 ) { *bitsused -= 8; **dst = ( *curout >> *bitsused ); *dst += 1; *outsize = *outsize + 1; } /** * Shift over enough space for our codeword, then or in the * value of the codeword. */ *curout = *curout << nextout->numbits; *curout = *curout | nextout->value; *bitsused += nextout->numbits; } int diffcomp_compress_packed422( unsigned char *dst, unsigned char *src, int width, int height ) { unsigned char lastcode_y = 0; unsigned char lastcode_cb = 0; unsigned char lastcode_cr = 0; uint32_t curout = 0; int bitsused = 0; int outsize = 0; int i; if( !codesgenerated ) generate_codes(); for( i = 0; i < width * height / 2; i++ ) { compress_next_byte( &dst, &curout, &bitsused, &outsize, (*src - lastcode_y) & 0xff ); lastcode_y = *src; src++; compress_next_byte( &dst, &curout, &bitsused, &outsize, (*src - lastcode_cb) & 0xff ); lastcode_cb = *src; src++; compress_next_byte( &dst, &curout, &bitsused, &outsize, (*src - lastcode_y) & 0xff ); lastcode_y = *src; src++; compress_next_byte( &dst, &curout, &bitsused, &outsize, (*src - lastcode_cr) & 0xff ); lastcode_cr = *src; src++; } /* Write what's left, and write some buffer room for safety. */ curout = curout << ( 32 - bitsused ); *dst++ = ( curout >> 24 ) & 0xff; *dst++ = ( curout >> 16 ) & 0xff; *dst++ = ( curout >> 8 ) & 0xff; *dst++ = ( curout ) & 0xff; *dst++ = 0; *dst++ = 0; *dst++ = 0; *dst++ = 0; outsize += 8; return outsize; } void diffcomp_decompress_packed422( unsigned char *dst, unsigned char *src, int width, int height ) { unsigned char lastvalue_y = 0; unsigned char lastvalue_cb = 0; unsigned char lastvalue_cr = 0; uint32_t curstuff = 0; int bitsused = 0; int i; if( !table_generated ) generate_decompression_table(); for( i = 0; i < width * height / 2; i++ ) { mytuple_t *decodedword; if( bitsused < 16 ) { unsigned int coolstuff = (*src << 8) | *(src + 1); curstuff = curstuff | ( coolstuff << ( 16 - bitsused ) ); bitsused += 16; src += 2; } decodedword = decompress_next_byte( curstuff ); curstuff = curstuff << decodedword->numbits; bitsused -= decodedword->numbits; *dst = ( lastvalue_y + decodedword->value ) & 0xff; lastvalue_y = *dst; dst++; if( bitsused < 16 ) { unsigned int coolstuff = (*src << 8) | *(src + 1); curstuff = curstuff | ( coolstuff << ( 16 - bitsused ) ); bitsused += 16; src += 2; } decodedword = decompress_next_byte( curstuff ); curstuff = curstuff << decodedword->numbits; bitsused -= decodedword->numbits; *dst = ( lastvalue_cb + decodedword->value ) & 0xff; lastvalue_cb = *dst; dst++; if( bitsused < 16 ) { unsigned int coolstuff = (*src << 8) | *(src + 1); curstuff = curstuff | ( coolstuff << ( 16 - bitsused ) ); bitsused += 16; src += 2; } decodedword = decompress_next_byte( curstuff ); curstuff = curstuff << decodedword->numbits; bitsused -= decodedword->numbits; *dst = ( lastvalue_y + decodedword->value ) & 0xff; lastvalue_y = *dst; dst++; if( bitsused < 16 ) { unsigned int coolstuff = (*src << 8) | *(src + 1); curstuff = curstuff | ( coolstuff << ( 16 - bitsused ) ); bitsused += 16; src += 2; } decodedword = decompress_next_byte( curstuff ); curstuff = curstuff << decodedword->numbits; bitsused -= decodedword->numbits; *dst = ( lastvalue_cr + decodedword->value ) & 0xff; lastvalue_cr = *dst; dst++; } }