/* Program for Huffman coding. The program prints to the standard output a representation of the tree and a table of the code, followed by some compression statistics. If any codeword exceeds 32 bits in length, a message is printed to the standard error stream. USAGE: huffman source_file output_file */ #include #include typedef struct encode_treenode{ long frequency; int parent;} ETREENODE; /* the parent field of a treenode is 0 if it has no parent, -n if it is the left child of cell n, and +n if it is the right child of cell n of the ETREE. */ typedef struct code_table_node{int length;int code;} CODE_TABLE_NODE; /* relies on 32-bit int !! if you have 16-bit ints, use long for code */ void make_ehuffman_tree(ETREENODE*); void make_code_table(CODE_TABLE_NODE*,ETREENODE*); void write_compressed(FILE*,FILE*,CODE_TABLE_NODE*,ETREENODE*); main(int argc, char **argv) { ETREENODE ETREE[512]; /* Cell 0 of this array will not be used. The first 256 nodes of the tree will be initialized with the frequencies of the first 256 bytes, and the remaining 255 nodes will be created by the Huffman tree algorithm. */ CODE_TABLE_NODE CODE_TABLE[255]; FILE *infile,*outfile; int i,ch,numbits=0,numchars=0; unsigned char nextbyte; if(argc!=3) { printf("Wrong number of arguments.\n"); exit(0); } else { infile = fopen(argv[1],"r"); outfile = fopen(argv[2],"w"); } for(i=1;i<=511;i++) { ETREE[i].frequency=0; ETREE[i].parent=0; } while((ch=fgetc(infile))!=EOF) ETREE[ch+1].frequency++; fclose(infile); make_ehuffman_tree(ETREE); printf("\n\nTREE REPRESENTATION\n\n"); for(i=1;i<=511;i++) printf("%d\t%d\t%d\n",i,ETREE[i].frequency,ETREE[i].parent); make_code_table(CODE_TABLE,ETREE); printf("\n\n CODE TABLE \n\n"); for(i=0;i<256;i++) printf("%d\t%d\t%X\n",i,CODE_TABLE[i].length,CODE_TABLE[i].code); printf("\n\nNumber of bits per character\n\n"); for(i=0;i<256;i++) { numbits+=CODE_TABLE[i].length*ETREE[i+1].frequency; numchars+=ETREE[i+1].frequency; } printf("%d\t%d\t%g\n",numbits,numchars,1.0*numbits/numchars); /* now reopen infile */ infile=fopen(argv[1],"r"); write_compressed(infile,outfile,CODE_TABLE,ETREE); } void make_ehuffman_tree(ETREENODE *ETREE) { int smallest,second_smallest,i,maxnode=256; while(1) { /* Search for first parentless nonzero frequency */ smallest=1; while((ETREE[smallest].frequency==0)||(ETREE[smallest].parent!=0)) smallest++; /* Now that it's found, search for next such record */ second_smallest=smallest+1; while((second_smallest<=maxnode)&& ((ETREE[second_smallest].frequency==0)|| (ETREE[second_smallest].parent!=0))) second_smallest++; if(second_smallest>maxnode) /* There was only one parentless nonzero frequency, so we're done. */ break; for(i=second_smallest+1;i<=maxnode;i++) { if((ETREE[i].frequency32) { fprintf(stderr,"Code word too long!!"); exit(0); } codeword>>=1; if(tree[j].parent>0) { codeword|=0x80000000; j=tree[j].parent; } else j=-tree[j].parent; } table[i].length=length; table[i].code=codeword; } } } void write_compressed(FILE *infile,FILE *outfile,CODE_TABLE_NODE* table, ETREENODE *tree) { int i,outshiftcount=0,ch,length; unsigned char inbyte,outbyte; unsigned int codeword,mask; /* First we write a representation of the tree to the output file. This is just a listing of the parent links. This will enable the decoder to reconstruct the tree and use it in decoding. */ for(i=1;i<=511;i++) fprintf(outfile,"%d\n",tree[i].parent); fputc('*',outfile); /* this marks the end of the header and the beginning of the encoded source */ /* The actual writing of the code bytes closely mimics the simple compression routine. */ while((ch=fgetc(infile))!=EOF) { inbyte = (unsigned char) ch; codeword=table[inbyte].code; length=table[inbyte].length; mask=0x80000000; while(length!=0) { outbyte=(outbyte<<1)|(((codeword&mask)!=0)?1:0); outshiftcount++; if(outshiftcount==8) { fputc(outbyte,outfile); outshiftcount=0; } mask=mask>>1; length--; } } if(outshiftcount !=0) { outbyte=outbyte<<8-outshiftcount;/*if untransmitted bits remain, shift them left*/ fputc(outbyte,outfile); } }