Arithmetische Kodierung in C




/******************************************************************************
 *                                                                            *
 *   File:      hwzip.c                                                       *
 *                                                                            *
 *   Purpose:   simple entropy encoder using arithmetic codes                 *
 *                                                                            *
 *   Authors:   Steffen Hein                             *
 *              Juergen Werner                                 *
 *                                                                            *
 *   Based on:  Khalid Sayood, "Introduction to Data Compression"             *
 *                Morgan Kaufmann Publishers, Inc., 1996                      *
 *                                                                            *
 *   Copyright 2000 Steffen Hein and Juergen Werner. All rights reserved.     *
 *                                                                            * 
 *   This program is supplied free of charge for research purpose only,       *
 *   it may not sold or incorporated into any commercial product.             *
 *   It comes with ABSOLUTELY NO WARRANTY, use it at your own risk.           *
 *   The copyright notice and this statement of conditions must remain        *
 *   in every copy of this file.                                              *
 *                                                                            *
 *   Last update: 2000-05-23                                                  *
 *                                                                            *
 *****************************************************************************/

#include 
#include 

#define COUNT_MAX 256

FILE *infile, *outfile;
char *s_infile, *s_outfile;
unsigned short buffer, buf_count;
unsigned long file_size,out_file_size;
unsigned long cum_count[COUNT_MAX+1], total_count;
unsigned long l, u, l_new, u_new;
unsigned long tag;
int S;

#define CONDITION_E12(l,u) ( ( ( (l) ^ (u) ) & 0x00008000 ) == 0 )
#define CONDITION_E3(l,u)  ( ( ( (l) & 0x0000C000 ) == 0x00004000 )   \
                          && ( ( (u) & 0x0000C000 ) == 0x00008000 ) )
#define MAPPING_E12(x)   ( ( (x) << 1 ) & 0x0000FFFF )
#define MAPPING_E3(x)  ( ( ( (x) << 1 ) ^ 0x00008000 ) & 0x0000FFFF )

#define SEND_BIT(b)                                                   \
do {                                                                  \
	buffer = (buffer << 1) | ((short) (b) & 0x01);                \
	if (++buf_count == 8)                                         \
	{                                                             \
		fputc(buffer, outfile);                               \
		out_file_size++;                                      \
		buf_count = 0;                                        \
		buffer = 0;                                           \
	};                                                            \
} while (0)

void compress_start()
{
  unsigned long i, q;
     /*******************   now first do some init   ********************/
  printf("compressing %s to %s\n", s_infile, s_outfile);
  buffer = 0;
  buf_count = 0;
  for (i = 0; i <= COUNT_MAX; i++) cum_count[ i ] = 0;  // init cum_count
  
  S = fgetc(infile);
  file_size = 0;
    /*************   first pass: get symbol probabilities   *************/
  while (!feof(infile)) { cum_count[ S+1 ]++; S = fgetc(infile); file_size++; }
  fclose(infile);
  total_count = file_size;
  
  while ( total_count >= 0x4000 )       // do recalculation of probabilities
  {
	q = (total_count >> 14) + 1;
	printf("  source file is big (> 16 KB), divide with  %i\n",q); 
	total_count = 0;
	for (i=1; i<=COUNT_MAX; i++) { 
	    cum_count[i] = ( cum_count[i] + q - 1 ) / q;  //round up
	    total_count += cum_count[i];
	}
  } // while

  infile  = fopen(s_infile , "rb");
  
  fprintf(outfile, "HWZ1");
  fwrite( &file_size, sizeof(file_size), 1, outfile);
     /***************   make the cumulative function   ***************/
  for (i = 1; i <= COUNT_MAX; i++) 
  {
      cum_count[ i ] += cum_count[ i-1 ];
      fwrite( &cum_count[i], 2, 1, outfile);
  }
}

void compress_end()
{
  printf("\nSource file: %u bytes, ", file_size);
  printf("representation: %u bytes (+520 bytes)\n", out_file_size);
  if (file_size > 0) {
    printf("compressed to %4.1f %% of original size, ",               \
           (float) (out_file_size) * 100 / (float) (file_size));
    printf("%4.2f bits/char\n\n",(float) out_file_size*8 / (float) file_size);
  }
}

void compress()
{
  unsigned long scale3;
  
  compress_start();
     /*************   second pass: encode the input stream   ***********/
  S = fgetc(infile);				// read first symbol
  out_file_size = 0;
  l = 0x00000000; 
  u = 0x0000FFFF;
  scale3 = 0;
  
  while ( !feof( infile ) )
  {
	l_new = l     + ( u - l + 1 ) * cum_count[ S ] / total_count;
	u_new = l - 1 + ( u - l + 1 ) * cum_count[S+1] / total_count;
	l = l_new;  /**********    update borders l and u   ************/
	u = u_new;  /**********         and rescale         ************/
	while CONDITION_E12(l, u)
	{					// E1 or E2-mapping
	    SEND_BIT(l >> 15);
	    while (scale3 > 0) { SEND_BIT((l >> 15) ^ 0x01); scale3--; }
	    l = MAPPING_E12(l);
	    u = MAPPING_E12(u) | 0x00000001;
	}
	while CONDITION_E3(l, u)
	{					// E3-mapping
	    l = MAPPING_E3(l);             
	    u = MAPPING_E3(u) | 0x00000001;
	    scale3++;
	};
	S = fgetc(infile);			// read next symbol
  }  // while not eof

  SEND_BIT(1);
  while (buf_count > 0) SEND_BIT(0);		// flush buffer
  
  compress_end();
} // compress

#define READ_BIT(b)                                                   \
do {                                                                  \
        if ( buf_count <= 0 )                                         \
        {                                                             \
                buffer = fgetc(infile);                               \
                if ( feof(infile) ) buffer = 0;                       \
                buf_count = 8;                                        \
        };                                                            \
        b = ((unsigned long) (buffer) >> 7 ) & 0x01;                  \
        buffer = buffer << 1;                                         \
        buf_count--;                                                  \
} while (0)

int decompress_start()
{  
  char header[5];
  unsigned long i;
     /********************   first some init   *************************/
  printf("decompressing %s to %s\n", s_infile, s_outfile);
  buffer = 0;
  buf_count = 0;
  
  fgets( header, sizeof(header), infile);
  if (strcmp(header,"HWZ1")!=0) { printf("file is not a HWZ!\n"); return(1); }
  fread( &file_size, sizeof(file_size), 1, infile);          // read file_size
     /***************   read cumulative function from file   ***************/
  cum_count[0] = 0;
  for (i = 1; i <= COUNT_MAX; i++) fread( &cum_count[i], 2, 1, infile);
  total_count = cum_count[COUNT_MAX];
}

int decompress()
{
  int S_l,S_u;
  unsigned long i,t,bit;
  
  decompress_start();
     /*****************   start the decoding process   *****************/
  l = 0x00000000;
  u = 0x0000FFFF;
  tag = 0;
  for (i=0; i<16; i++) { READ_BIT(bit); tag = (tag << 1) | bit; } // read tag
  
  for (i = 0; i < file_size; i++) 
  {
	t = ( ( tag - l + 1 ) * total_count - 1 ) / ( u - l + 1 );
           /***************** now decode the symbol S ****************/
	S_l = 0;
	S_u = COUNT_MAX;
	S = COUNT_MAX / 2;
	do {	// find symbol S with ( cum_count[S] <= t < cum_count[S+1] )
	  if ( cum_count[S] <= t ) S_l = S; else S_u = S;
	  S = ( S_l + S_u ) >> 1;
	} while (!( (cum_count[S] <= t) && (t < cum_count[S+1]) ));
           /***************   update borders l and u   ***************/
	l_new = l     + (u - l + 1) * cum_count[ S ] / total_count;
	u_new = l - 1 + (u - l + 1) * cum_count[S+1] / total_count;
	l = l_new;
	u = u_new;
	fputc(S, outfile);   /******   and rescale   ********/
	while CONDITION_E12(l, u)
	{			// E1 or E2-mapping
	    READ_BIT(bit);
            tag = MAPPING_E12(tag) | bit;
	    l = MAPPING_E12(l);
	    u = MAPPING_E12(u) | 0x00000001;
	}
	while CONDITION_E3(l, u)
	{			// E3-mapping
	    READ_BIT(bit);
            tag = MAPPING_E3(tag) | bit;
	    l = MAPPING_E3(l);             
	    u = MAPPING_E3(u) | 0x00000001;
	}
  } // for
  return(0);
}

int main(int argc, char *argv[])
{
  printf("\nHWZip (c) 2000 S. Hein & J. Werner\n\n");
  if (argc == 4) {
        s_infile = argv[2];
	s_outfile = argv[3];
	infile  = fopen(s_infile,  "rb");
	outfile = fopen(s_outfile, "wb");
	if (strcmp(argv[1],"-z")==0)   compress();
	if (strcmp(argv[1],"-u")==0) decompress();
	fclose(infile);
        fclose(outfile);
  }
  else {
	printf("usage: hwzip -[zu] infile outfile\n\n");
	printf("               z: zip\n");
	printf("               u: unzip\n\n");
  }
  return(0);
} // main