Arithmetische Kodierung in C
/******************************************************************************
* *
* File: hwzip.c *
* *
* Purpose: simple entropy encoder using arithmetic codes *
* *
* Authors: Steffen Hein *
* Juergen Werner *
* *
* Based on: Khalid Sayood, "Introduction to Data Compression" *
* Morgan Kaufmann Publishers, Inc., 1996 *
* *
* Copyright 2000 Steffen Hein and Juergen Werner. All rights reserved. *
* *
* This program is supplied free of charge for research purpose only, *
* it may not sold or incorporated into any commercial product. *
* It comes with ABSOLUTELY NO WARRANTY, use it at your own risk. *
* The copyright notice and this statement of conditions must remain *
* in every copy of this file. *
* *
* Last update: 2000-05-23 *
* *
*****************************************************************************/
#include
#include
#define COUNT_MAX 256
FILE *infile, *outfile;
char *s_infile, *s_outfile;
unsigned short buffer, buf_count;
unsigned long file_size,out_file_size;
unsigned long cum_count[COUNT_MAX+1], total_count;
unsigned long l, u, l_new, u_new;
unsigned long tag;
int S;
#define CONDITION_E12(l,u) ( ( ( (l) ^ (u) ) & 0x00008000 ) == 0 )
#define CONDITION_E3(l,u) ( ( ( (l) & 0x0000C000 ) == 0x00004000 ) \
&& ( ( (u) & 0x0000C000 ) == 0x00008000 ) )
#define MAPPING_E12(x) ( ( (x) << 1 ) & 0x0000FFFF )
#define MAPPING_E3(x) ( ( ( (x) << 1 ) ^ 0x00008000 ) & 0x0000FFFF )
#define SEND_BIT(b) \
do { \
buffer = (buffer << 1) | ((short) (b) & 0x01); \
if (++buf_count == 8) \
{ \
fputc(buffer, outfile); \
out_file_size++; \
buf_count = 0; \
buffer = 0; \
}; \
} while (0)
void compress_start()
{
unsigned long i, q;
/******************* now first do some init ********************/
printf("compressing %s to %s\n", s_infile, s_outfile);
buffer = 0;
buf_count = 0;
for (i = 0; i <= COUNT_MAX; i++) cum_count[ i ] = 0; // init cum_count
S = fgetc(infile);
file_size = 0;
/************* first pass: get symbol probabilities *************/
while (!feof(infile)) { cum_count[ S+1 ]++; S = fgetc(infile); file_size++; }
fclose(infile);
total_count = file_size;
while ( total_count >= 0x4000 ) // do recalculation of probabilities
{
q = (total_count >> 14) + 1;
printf(" source file is big (> 16 KB), divide with %i\n",q);
total_count = 0;
for (i=1; i<=COUNT_MAX; i++) {
cum_count[i] = ( cum_count[i] + q - 1 ) / q; //round up
total_count += cum_count[i];
}
} // while
infile = fopen(s_infile , "rb");
fprintf(outfile, "HWZ1");
fwrite( &file_size, sizeof(file_size), 1, outfile);
/*************** make the cumulative function ***************/
for (i = 1; i <= COUNT_MAX; i++)
{
cum_count[ i ] += cum_count[ i-1 ];
fwrite( &cum_count[i], 2, 1, outfile);
}
}
void compress_end()
{
printf("\nSource file: %u bytes, ", file_size);
printf("representation: %u bytes (+520 bytes)\n", out_file_size);
if (file_size > 0) {
printf("compressed to %4.1f %% of original size, ", \
(float) (out_file_size) * 100 / (float) (file_size));
printf("%4.2f bits/char\n\n",(float) out_file_size*8 / (float) file_size);
}
}
void compress()
{
unsigned long scale3;
compress_start();
/************* second pass: encode the input stream ***********/
S = fgetc(infile); // read first symbol
out_file_size = 0;
l = 0x00000000;
u = 0x0000FFFF;
scale3 = 0;
while ( !feof( infile ) )
{
l_new = l + ( u - l + 1 ) * cum_count[ S ] / total_count;
u_new = l - 1 + ( u - l + 1 ) * cum_count[S+1] / total_count;
l = l_new; /********** update borders l and u ************/
u = u_new; /********** and rescale ************/
while CONDITION_E12(l, u)
{ // E1 or E2-mapping
SEND_BIT(l >> 15);
while (scale3 > 0) { SEND_BIT((l >> 15) ^ 0x01); scale3--; }
l = MAPPING_E12(l);
u = MAPPING_E12(u) | 0x00000001;
}
while CONDITION_E3(l, u)
{ // E3-mapping
l = MAPPING_E3(l);
u = MAPPING_E3(u) | 0x00000001;
scale3++;
};
S = fgetc(infile); // read next symbol
} // while not eof
SEND_BIT(1);
while (buf_count > 0) SEND_BIT(0); // flush buffer
compress_end();
} // compress
#define READ_BIT(b) \
do { \
if ( buf_count <= 0 ) \
{ \
buffer = fgetc(infile); \
if ( feof(infile) ) buffer = 0; \
buf_count = 8; \
}; \
b = ((unsigned long) (buffer) >> 7 ) & 0x01; \
buffer = buffer << 1; \
buf_count--; \
} while (0)
int decompress_start()
{
char header[5];
unsigned long i;
/******************** first some init *************************/
printf("decompressing %s to %s\n", s_infile, s_outfile);
buffer = 0;
buf_count = 0;
fgets( header, sizeof(header), infile);
if (strcmp(header,"HWZ1")!=0) { printf("file is not a HWZ!\n"); return(1); }
fread( &file_size, sizeof(file_size), 1, infile); // read file_size
/*************** read cumulative function from file ***************/
cum_count[0] = 0;
for (i = 1; i <= COUNT_MAX; i++) fread( &cum_count[i], 2, 1, infile);
total_count = cum_count[COUNT_MAX];
}
int decompress()
{
int S_l,S_u;
unsigned long i,t,bit;
decompress_start();
/***************** start the decoding process *****************/
l = 0x00000000;
u = 0x0000FFFF;
tag = 0;
for (i=0; i<16; i++) { READ_BIT(bit); tag = (tag << 1) | bit; } // read tag
for (i = 0; i < file_size; i++)
{
t = ( ( tag - l + 1 ) * total_count - 1 ) / ( u - l + 1 );
/***************** now decode the symbol S ****************/
S_l = 0;
S_u = COUNT_MAX;
S = COUNT_MAX / 2;
do { // find symbol S with ( cum_count[S] <= t < cum_count[S+1] )
if ( cum_count[S] <= t ) S_l = S; else S_u = S;
S = ( S_l + S_u ) >> 1;
} while (!( (cum_count[S] <= t) && (t < cum_count[S+1]) ));
/*************** update borders l and u ***************/
l_new = l + (u - l + 1) * cum_count[ S ] / total_count;
u_new = l - 1 + (u - l + 1) * cum_count[S+1] / total_count;
l = l_new;
u = u_new;
fputc(S, outfile); /****** and rescale ********/
while CONDITION_E12(l, u)
{ // E1 or E2-mapping
READ_BIT(bit);
tag = MAPPING_E12(tag) | bit;
l = MAPPING_E12(l);
u = MAPPING_E12(u) | 0x00000001;
}
while CONDITION_E3(l, u)
{ // E3-mapping
READ_BIT(bit);
tag = MAPPING_E3(tag) | bit;
l = MAPPING_E3(l);
u = MAPPING_E3(u) | 0x00000001;
}
} // for
return(0);
}
int main(int argc, char *argv[])
{
printf("\nHWZip (c) 2000 S. Hein & J. Werner\n\n");
if (argc == 4) {
s_infile = argv[2];
s_outfile = argv[3];
infile = fopen(s_infile, "rb");
outfile = fopen(s_outfile, "wb");
if (strcmp(argv[1],"-z")==0) compress();
if (strcmp(argv[1],"-u")==0) decompress();
fclose(infile);
fclose(outfile);
}
else {
printf("usage: hwzip -[zu] infile outfile\n\n");
printf(" z: zip\n");
printf(" u: unzip\n\n");
}
return(0);
} // main