]> git.llucax.com Git - z.facultad/75.06/jacu.git/blobdiff - src/statichuff/statichuff.c
Se implementa zerogrouping sin uso de buffer (caracter a caracter). Esto mejora
[z.facultad/75.06/jacu.git] / src / statichuff / statichuff.c
index b6da6f46b3383a01cde20a67012fe8c29d09880f..79fcdceae8d852c5f080b9e10460356abe35112c 100644 (file)
@@ -1,21 +1,41 @@
 
-#include <stdio.h>
+#include "statichuff.h"
+#include "../vfile/vfile.h"
+#include <stdlib.h>
 
-typedef unsigned short int t_freq;
-
-typedef struct t_freqnode {
-       unsigned short int symbol;
-       unsigned short int freq;
-       struct t_freqnode *lchild;
-       struct t_freqnode *rchild;
-} HUFFNODE;
+void putbit(char bit, char restart, char flush, VFILE *fp)
+{
+       static unsigned long int bits_buffer = 0;
+       static unsigned char bits_used = 0;     
 
-typedef struct t_code {
-       unsigned short int code;
-       unsigned char codelength;
-} CODE;
+       /* me obligan a emitir el output */
+       if ((flush == 1) && (bits_used > 0)) {
+               bits_buffer = bits_buffer << ((sizeof(unsigned long int)*8) - bits_used);
+               vfwrite(&bits_buffer,sizeof(unsigned long int),1,fp);
+               bits_buffer = 0;
+               bits_used = 0;
+               return;
+       }       
+       /* me indican que comienza un nuevo output */
+       if (restart) {
+               bits_buffer = 0;
+               bits_used = 0;
+       }                       
+       /* inserto el bit en el buffer */       
+       bits_buffer = bits_buffer << 1;
+       bits_buffer |= bit;
+       bits_used++;
+       
+       /* lleno el buffer, escribo */
+       if (bits_used == 32) {
+               vfwrite(&bits_buffer,sizeof(unsigned long int),1,fp);
+               bits_buffer = 0;
+               bits_used = 0;
+       }       
+       return;
+}
 
-void cpynode(HUFFNODE *node1, HUFFNODE *node2)
+void shuff_cpynode(SHUFFNODE *node1, SHUFFNODE *node2)
 {
        node1->symbol = node2->symbol;
        node1->freq = node2->freq;
@@ -23,63 +43,40 @@ void cpynode(HUFFNODE *node1, HUFFNODE *node2)
        node1->rchild = node2->rchild;  
 }
 
-int compnode(HUFFNODE *node1, HUFFNODE *node2)
+int shuff_compnode(const void *node1, const void *node2)
 {      
-       if (node1->freq < node2->freq) return 1;
-       if (node1->freq > node2->freq) return -11;
+       if (((SHUFFNODE*)node1)->freq < ((SHUFFNODE*)node2)->freq) return 1;
+       if (((SHUFFNODE*)node1)->freq > ((SHUFFNODE*)node2)->freq) return -1;
        return 0;
 }
 
-HUFFNODE *buildlist(t_freq *freqtable, int *nonzerofreqs)
-{
-       int i,j = 0,nonzero = 0;        
-       HUFFNODE *inputlist;
-       
-       /* Calculo cuantas frequencias > 0 hay y creo la tabla */
-       for (i = 0; i < 256; ++i) if (freqtable[i] > 0) nonzero++;
-       inputlist = (HUFFNODE*)malloc(sizeof(HUFFNODE)*nonzero);
-               
-       /* Cargo la inputlist del huffman solo con freqs > 0 */
-       for (i = 0; i < 256; ++i)
-               if (freqtable[i] > 0) {                 
-                       inputlist[j].symbol = i;
-                       inputlist[j].freq = freqtable[i];
-                       inputlist[j].lchild = NULL;
-                       inputlist[j].rchild = NULL;                     
-                       j++;
-               }
-               
-       *nonzerofreqs = nonzero;
-       return inputlist;
-}
-
-int rescalefreq(t_freq *freqtable)
+int shuff_rescalefreq(t_freq *freqtable)
 { 
        int i;
-       int totalfreq = 0;
+       t_freq totalfreq = 0;
        
        /* Divido por la mitad las frecuencias, asegurando de no perder */
        /* frequencias en 1, por ello le sumo 1 antes de partir */
        for (i = 0; i < 256; i++) {             
-               freqtable[i] = (freqtable[i]+1)/2;
+               freqtable[i] = (freqtable[i] << 2) | 1;
                totalfreq += freqtable[i];
        }
        
        return totalfreq;
 }
 
-int scanfreq(char *inputfile, t_freq *freqtable)
+int shuff_scanfreq(char *inputfile, t_freq *freqtable)
 {
        /* Locals */    
        FILE *fp;
-       int sumfreq = 0,auxsum = 0;
+       t_freq sumfreq = 0;
        int i,symbol;
        
-       /* Inicializamos la tabla de frecuencias */
+       /* Inicializamos la tabla de frecuencias */     
        for (i = 0; i < 256; ++i) freqtable[i] = 0;
                
        /* Abrimos el file */
-       if ((fp = fopen(inputfile,"rb")) == NULL) return 0;
+       if ((fp = fopen(inputfile,"r")) == NULL) return 0;
        while (!feof(fp)) {             
                /* Contamos las frecuencias */          
                symbol = fgetc(fp);
@@ -89,15 +86,64 @@ int scanfreq(char *inputfile, t_freq *freqtable)
                ++sumfreq;
                                
                /* Si llegue al tope de freq acumulada, halve em */
-               if (sumfreq == 4181
-                       sumfreq = rescalefreq(freqtable);
+               if (sumfreq == 14930352
+                       sumfreq = shuff_rescalefreq(freqtable);
        }
        
        fclose(fp);
-       return 1;       
+       return 1;
+}
+
+SHUFFNODE *shuff_buildlist(t_freq *freqtable, int *nonzerofreqs)
+{
+       int i,j = 0,nonzero = 0;        
+       SHUFFNODE *inputlist;
+       
+       /* Calculo cuantas frequencias > 0 hay y creo la tabla */
+       for (i = 0; i < 256; ++i) if (freqtable[i] > 0) nonzero++;
+       inputlist = (SHUFFNODE*)malloc(sizeof(SHUFFNODE)*nonzero);
+               
+       /* Cargo la inputlist del huffman solo con freqs > 0 */
+       for (i = 0; i < 256; ++i)
+               if (freqtable[i] > 0) {                 
+                       inputlist[j].symbol = i;
+                       inputlist[j].freq = freqtable[i];
+                       inputlist[j].lchild = NULL;
+                       inputlist[j].rchild = NULL;                     
+                       j++;
+               }
+               
+       *nonzerofreqs = nonzero;
+       return inputlist;
 }
 
-void printcodes(CODE *codetable,t_freq *freqtable)
+SHUFFNODE *shuff_buildtree(SHUFFNODE *list, int listcount)
+{
+       SHUFFNODE *lastsymbol = list+(listcount-1);
+       SHUFFNODE *node1,*node2;
+
+       while (lastsymbol > list) {             
+               /* Ordeno la lista por frecuencia descendente */
+               qsort(list,listcount,sizeof(SHUFFNODE),shuff_compnode);                         
+               /* Tomo los ultimos dos elementos, generando dos nodos del arbol */
+               node1 = (SHUFFNODE*)malloc(sizeof(SHUFFNODE));
+               node2 = (SHUFFNODE*)malloc(sizeof(SHUFFNODE));
+               shuff_cpynode(node1,lastsymbol-1);
+               shuff_cpynode(node2,lastsymbol);                
+               lastsymbol -= 1;
+               /* Nodo ficticio con la suma de las probs y los ptros a childs */
+               lastsymbol->symbol = 256;
+               lastsymbol->freq = node1->freq + node2->freq;
+               lastsymbol->lchild = node1;
+               lastsymbol->rchild = node2;
+               --listcount;
+       }
+               
+       /* Devuelvo el puntero a la raiz del arbol de huffman */
+       return lastsymbol;
+}
+
+void shuff_printcodes(SHUFFCODE *codetable,t_freq *freqtable)
 {
        int i,j;
        unsigned short int auxcode;
@@ -106,7 +152,7 @@ void printcodes(CODE *codetable,t_freq *freqtable)
        for (i = 0; i < 256; ++i) {
                if (codetable[i].codelength > 0) {
                        auxcode = codetable[i].code;                    
-                       printf("Symbol:%i  Freq: %i  Code:",i,freqtable[i]);
+                       printf("Symbol:%i  Freq: %li  Code:",i,freqtable[i]);
                        for (j = codetable[i].codelength-1; j >= 0; --j) {
                                auxcode = codetable[i].code;                    
                                auxcode = auxcode >> j;
@@ -118,82 +164,166 @@ void printcodes(CODE *codetable,t_freq *freqtable)
        }
 }
 
-void zerocodes(CODE *table)
+void shuff_zerocodes(SHUFFCODE *table)
 {
        int i;
        
-       /* Inicializo los codigos prefijos */
+       /* Inicializo los codigos prefijos */   
        for (i = 0; i < 256; ++i) {
                table[i].code = 0;
                table[i].codelength = 0;
        }
 }
 
-void buildcodes(CODE *table, HUFFNODE *node, int level, int code)
+void shuff_buildcodes(SHUFFCODE *table, SHUFFNODE *node, int level, int code)
 {
        if (node->symbol < 256) {
                /* Guardo el codigo en la tabla */
                table[node->symbol].code = code;
-               table[node->symbol].codelength = level;
-               /*printf("Found symbol %i with freq %i at depth %i\n",node->symbol,node->freq,level);*/
+               table[node->symbol].codelength = level;         
        }
        else {
                code = code << 1;
-               buildcodes(table,node->lchild,level+1,code);
+               shuff_buildcodes(table,node->lchild,level+1,code);
                code |= 1;
-               buildcodes(table,node->rchild,level+1,code);
+               shuff_buildcodes(table,node->rchild,level+1,code);
        }
 }
 
-HUFFNODE *buildtree(HUFFNODE *list, int listcount)
+int shuff_encode_symbols(t_freq *ftable, SHUFFCODE *ctable, char* inputfile, char *outputfile, long volsize)
 {
-       HUFFNODE *lastsymbol = list+(listcount-1);
-       HUFFNODE *node1,*node2,*fictnode;
-
-       /* Ordenamos inicialmente la inputlist para tomar las dos freqs min */
-       while (lastsymbol > list) {             
-               /* Ordeno la lista por frecuencia descendente */
-               qsort(list,listcount,sizeof(HUFFNODE),compnode);                                
-               /* Tomo los ultimos dos elementos, generando dos nodos del arbol */
-               node1 = (HUFFNODE*)malloc(sizeof(HUFFNODE));
-               node2 = (HUFFNODE*)malloc(sizeof(HUFFNODE));
-               cpynode(node1,lastsymbol-1);
-               cpynode(node2,lastsymbol);              
-               lastsymbol -= 1;
-               /* Nodo ficticio con la suma de las probs y los ptros a childs */
-               lastsymbol->symbol = 256;
-               lastsymbol->freq = node1->freq + node2->freq;
-               lastsymbol->lchild = node1;
-               lastsymbol->rchild = node2;
-               --listcount;
-       }
+       FILE *fpsource;
+       VFILE *fpdest;
+       int symbol,i;
+       unsigned long int sourcesize;
+       char bit;
+       SHUFFCODE symbolcode;
                
-       /* Devuelvo el puntero a la raiz del arbol de huffman */
-       return lastsymbol;
+       /* Abrimos el file */
+       if ((fpsource = fopen(inputfile,"r")) == NULL) return 0;
+       if ((fpdest = vfopen(outputfile,"w",volsize)) == NULL) return 0;
+               
+       /* Guardamos el size el archivo original e inputlist como header */
+       fseek(fpsource,0,SEEK_END);
+       sourcesize = ftell(fpsource);
+       vfwrite(&sourcesize,sizeof(unsigned long int),1,fpdest);
+       vfwrite(ftable,sizeof(t_freq),256,fpdest);
+       
+       /* Encodeo */
+       fseek(fpsource,0,SEEK_SET);
+       while (!feof(fpsource)) {
+               /* Levanto un symbolo (byte) */         
+               symbol = fgetc(fpsource);
+               if (symbol == EOF) continue;
+               
+               /* Cargamos el codigo y lo emitimos */
+               symbolcode = ctable[symbol];
+               for (i = symbolcode.codelength; i > 0; --i) {
+                       bit = (symbolcode.code >> (i-1)) & 1;
+                       putbit(bit,0,0,fpdest);
+               }               
+       }
+       
+       /* Hacemos un flush de lo que haya quedado en el buffer de salida */
+       putbit(0,0,1,fpdest);
+       fclose(fpsource);
+       vfclose(fpdest);
+       return 1;       
 }
 
-int main(int argc, char* argv[])
+int shuff_encode_file(char *inputfile, char *outputfile, long volsize)
 {
        /* Locals */
        t_freq *freqtable = (t_freq*)malloc(sizeof(t_freq)*256);
-       HUFFNODE *inputlist;
-       HUFFNODE *codetree;
-       CODE *codetable = (CODE*)malloc(sizeof(CODE)*256);
-       int freqcount = 0,i;
-       
-       if (argc == 1) return -1;
+       SHUFFNODE *inputlist;
+       SHUFFNODE *codetree;
+       SHUFFCODE *codetable = (SHUFFCODE*)malloc(sizeof(SHUFFCODE)*256);
+       int freqcount = 0;
        
        /* Armamos la tabla de frecuencias */
-       if (!scanfreq(argv[1],freqtable)) return -1;
+       if (!shuff_scanfreq(inputfile,freqtable)) return 0;
        
        /* Armo el input list y genero el arbol de huffman */
-       inputlist = buildlist(freqtable, &freqcount);   
-       codetree = buildtree(inputlist,freqcount);
-       zerocodes(codetable);
-       buildcodes(codetable,codetree,0,0);
-       printcodes(codetable,freqtable);
-       /*encode(codetable)*/
+       inputlist = shuff_buildlist(freqtable, &freqcount);
+       codetree = shuff_buildtree(inputlist,freqcount);
+
+       /* Armo la tabla de codigos prefijos para el encoder */
+       shuff_zerocodes(codetable);
+       shuff_buildcodes(codetable,codetree,0,0);
+       /*shuff_printcodes(codetable,freqtable);*/
+
+       /* Encodeo byte per byte */
+       shuff_encode_symbols(freqtable,codetable,inputfile,outputfile,volsize);
        
-       return 0;
+       /* Free up memory baby yeah */
+       free(freqtable);
+       free(inputlist);        
+       free(codetable);
+       
+       return 1;
+}
+
+SHUFFNODE *shuff_decode_symbols(SHUFFNODE *entrynode, unsigned long int buffer, 
+                                                        int *bitsleft, unsigned short int *symbol)
+{
+       char bit = 0;
+               
+       /* Levanto el symbolo y si es uno valido, devuelvo */
+       *symbol = entrynode->symbol;
+       if (*symbol != 256) return entrynode;           
+       if (*bitsleft == 0) return entrynode;
+               
+       /* Obtengo otro bit a procesar y me muevo en el arbol */
+       bit = (buffer >> ((*bitsleft)-1)) & 1;  
+       --(*bitsleft);
+       if (bit == 0) return shuff_decode_symbols(entrynode->lchild,buffer,bitsleft,symbol);
+       else return shuff_decode_symbols(entrynode->rchild,buffer,bitsleft,symbol);
+}
 
+int shuff_decode_file(char *inputfile, char *outputfile)
+{
+       SHUFFNODE *inputlist;
+       SHUFFNODE *codetree,*currnode;
+       t_freq *ftable = (t_freq*)malloc(sizeof(t_freq)*256);
+       unsigned long int bytesleft,codebuffer;
+       VFILE *fpsource;
+       FILE *fpdest;
+       unsigned short int decoded_symbol;
+       int bitsleft,freqcount = 0;     
+       
+       /* Levanto cuantos bytes decodeo y la freq table */
+       if ((fpsource = vfopen(inputfile,"r",0)) == NULL) return 0;
+       if ((fpdest = fopen(outputfile,"w")) == NULL) return 0;
+       vfread(&bytesleft,sizeof(unsigned long int),1,fpsource);
+       vfread(ftable,sizeof(unsigned long int),256,fpsource);  
+       inputlist = shuff_buildlist(ftable, &freqcount);
+       codetree = shuff_buildtree(inputlist,freqcount);
+       currnode = codetree;
+       
+       while (!vfeof(fpsource) && (bytesleft > 0)) {
+               
+               /* Leo un buffer de 32 bits */
+               if (vfread(&codebuffer,sizeof(unsigned long int),1,fpsource) != 1) continue;
+               bitsleft = sizeof(unsigned long int) * 8;
+               
+               /* Proceso el buffer sacando simbolos hasta que se me agote */
+               while ((bitsleft > 0) && (bytesleft > 0)) {     
+                       currnode = shuff_decode_symbols(currnode,codebuffer,&bitsleft,&decoded_symbol);
+                       /* Si obtuve un symbolo valido lo emito*/
+                       if (decoded_symbol != 256) {
+                               fputc(decoded_symbol,fpdest);
+                               currnode = codetree;
+                               --bytesleft;                            
+                       }                                               
+               }               
+       }
+               
+       vfclose(fpsource);
+       fclose(fpdest);
+       
+       /* Free up memory baby yeah */
+       free(ftable);
+       free(inputlist);
+       
+       return 1;
 }