]> git.llucax.com Git - z.facultad/75.06/jacu.git/commitdiff
Huffman con capacidad de comprimir chunks y presentando nueva API. El descompresor...
authorAlan Kennedy <kennedya@3dgames.com.ar>
Wed, 23 Jun 2004 05:04:28 +0000 (05:04 +0000)
committerAlan Kennedy <kennedya@3dgames.com.ar>
Wed, 23 Jun 2004 05:04:28 +0000 (05:04 +0000)
src/statichuff/main.c
src/statichuff/main_bychunk.c [new file with mode: 0644]
src/statichuff/statichuff.c
src/statichuff/statichuff.h

index aa7cde86840a441dab46a74e36719e14bc6c120b..65a1c3ded4c2963a9b59e68217087f11ee0bdbf6 100644 (file)
@@ -9,6 +9,7 @@ int main(int argc, char* argv[])
        int tflag = 0;
        long int volumesize = 0;
        int ch;
        int tflag = 0;
        long int volumesize = 0;
        int ch;
+       HUFF_STATE *shuff;
                        
        while ((ch = getopt(argc, argv, "cdt:")) != -1) { 
                 
                        
        while ((ch = getopt(argc, argv, "cdt:")) != -1) { 
                 
@@ -36,12 +37,18 @@ int main(int argc, char* argv[])
                
        if (cflag == 1) {
                /* Comprimo */
                
        if (cflag == 1) {
                /* Comprimo */
-           return shuff_encode_file(argv[optind],argv[optind+1],volumesize*1024);
+               shuff = shuff_init_static_byfile(argv[optind],argv[optind+1],volumesize*1024);
+           shuff_encode_file(shuff);
+               shuff_deinit_static_byfile(shuff);
+               free(shuff);
        }
        
        if (dflag == 1) { 
                /* Descomprimo */
        }
        
        if (dflag == 1) { 
                /* Descomprimo */
-               return shuff_decode_file(argv[optind],argv[optind+1]);
+               shuff = shuff_init_static_byfile(argv[optind],argv[optind+1],0);
+               shuff_decode_file(shuff);
+               shuff_deinit_static_byfile(shuff);
+               free(shuff);
        }
                
        return 0;
        }
                
        return 0;
diff --git a/src/statichuff/main_bychunk.c b/src/statichuff/main_bychunk.c
new file mode 100644 (file)
index 0000000..54ce628
--- /dev/null
@@ -0,0 +1,78 @@
+
+#include "statichuff.h"
+#include <stdlib.h>
+
+int main(int argc, char* argv[])
+{      
+       HUFF_STATE *shuff;
+       FILE *fp;
+       char *chunk = (char*)malloc(sizeof(char)*4);
+       int cflag = 0;
+       int dflag = 0;
+       int tflag = 0;
+       long int volumesize = 0;
+       int lastchunk,i,j,ch;
+                       
+       while ((ch = getopt(argc, argv, "cdt:")) != -1) { 
+                
+               switch (ch) { 
+                       case 'c': cflag = 1; 
+                                         break;
+                       
+                       case 'd': dflag = 1; 
+                                         break; 
+                       
+                       case 't': tflag = 1; 
+                                         volumesize = atoi(optarg);                                      
+                                         break; 
+                       
+                       default: fprintf(stderr, "Usage: %s [-cdt] sourcefile targetfile\n", argv[0]); 
+                                        return(2);
+               }
+       }
+               
+       if ( (argc == 1) || (cflag & dflag) || !(cflag | dflag) || ((argc - optind) < 2) ) {
+               fprintf(stderr, "Usage: %s [-cdt] sourcefile targetfile\n", argv[0]); 
+               if ((tflag == 1) && (volumesize < 0)) fprintf(stderr,"Error: The volume size must be a non-zero value\n");
+               return (2);             
+       }
+               
+       if (cflag == 1) {
+               /* Inicio un compresor huffman estatico por chunks */
+               if ((shuff = shuff_init_static_bychunk(argv[optind+1],volumesize*1024)) == NULL) return 0;
+               
+               /* Comprimo por chunks */               
+               if ((fp = fopen(argv[optind],"rb")) == NULL) return 1;          
+               while (!feof(fp)) {
+                       i = 0;
+                       while (!feof(fp) && (i < 4))
+                       {
+                         ch = fgetc(fp);
+                         if (feof(fp)) continue;
+                         chunk[i] = ch;                          
+                         i++;
+                       }                       
+                       /* Comprimo el chunk con huffman estatico */                                            
+                       shuff_encode_chunk(shuff,chunk,i,0);
+               }
+               /* Le indico al huffman que efectivamente comprima los chunks */
+               shuff_encode_file(shuff);
+               
+               /* De init shuffman by chunks */
+               shuff_deinit_static_bychunk(shuff);
+               
+               /* Free mem allocated by main */
+               free(shuff);
+               free(chunk);
+               
+               /* Close files opened by main */
+               fclose(fp);
+       }
+       
+       if (dflag == 1) { 
+               /* Descomprimo */
+               return shuff_decode_file(argv[optind],argv[optind+1]);
+       }
+               
+       return 0;
+}
index 79fcdceae8d852c5f080b9e10460356abe35112c..32b3e8d7b2670460b318aad41aca54ca2b80b2d7 100644 (file)
@@ -56,15 +56,34 @@ int shuff_rescalefreq(t_freq *freqtable)
        t_freq totalfreq = 0;
        
        /* Divido por la mitad las frecuencias, asegurando de no perder */
        t_freq totalfreq = 0;
        
        /* Divido por la mitad las frecuencias, asegurando de no perder */
-       /* frequencias en 1, por ello le sumo 1 antes de partir */
        for (i = 0; i < 256; i++) {             
        for (i = 0; i < 256; i++) {             
-               freqtable[i] = (freqtable[i] << 2) | 1;
+               freqtable[i] = (freqtable[i] >> 2) | 1;
                totalfreq += freqtable[i];
        }
        
        return totalfreq;
 }
 
                totalfreq += freqtable[i];
        }
        
        return totalfreq;
 }
 
+int shuff_scanfreq_chunk(HUFF_STATE *chunkshuff, char* chunk, int chunksize)
+{      
+       /* Locals */                    
+       int i = 0;
+       unsigned char symbol = 0;       
+               
+       /* Contamos las frecuencias del chunk*/ 
+       for (i = 0; i < chunksize; ++i) {                               
+               symbol = chunk[i];              
+               chunkshuff->freqtable[symbol] += 1;
+               chunkshuff->sumfreq += 1;
+                               
+               /* Si llegue al tope de freq acumulada, halve em */
+               if (chunkshuff->sumfreq == 14930352) 
+                       chunkshuff->sumfreq = shuff_rescalefreq(chunkshuff->freqtable);
+       }               
+               
+       return 1;
+}
+
 int shuff_scanfreq(char *inputfile, t_freq *freqtable)
 {
        /* Locals */    
 int shuff_scanfreq(char *inputfile, t_freq *freqtable)
 {
        /* Locals */    
@@ -117,14 +136,20 @@ SHUFFNODE *shuff_buildlist(t_freq *freqtable, int *nonzerofreqs)
        return inputlist;
 }
 
        return inputlist;
 }
 
-SHUFFNODE *shuff_buildtree(SHUFFNODE *list, int listcount)
+SHUFFNODE *shuff_buildtree(t_freq *ftable)
 {
 {
-       SHUFFNODE *lastsymbol = list+(listcount-1);
-       SHUFFNODE *node1,*node2;
+       SHUFFNODE *lastsymbol;
+       SHUFFNODE *node1,*node2,*root;
+       SHUFFNODE *inputlist;
+       int freqcount = 0;
 
 
-       while (lastsymbol > list) {             
+       /* Genero la input list en base a la cual genera el arbol */
+       inputlist = shuff_buildlist(ftable, &freqcount);        
+       lastsymbol = inputlist+(freqcount-1);
+       
+       while (lastsymbol > inputlist) {                
                /* Ordeno la lista por frecuencia descendente */
                /* Ordeno la lista por frecuencia descendente */
-               qsort(list,listcount,sizeof(SHUFFNODE),shuff_compnode);                         
+               qsort(inputlist,freqcount,sizeof(SHUFFNODE),shuff_compnode);                            
                /* Tomo los ultimos dos elementos, generando dos nodos del arbol */
                node1 = (SHUFFNODE*)malloc(sizeof(SHUFFNODE));
                node2 = (SHUFFNODE*)malloc(sizeof(SHUFFNODE));
                /* Tomo los ultimos dos elementos, generando dos nodos del arbol */
                node1 = (SHUFFNODE*)malloc(sizeof(SHUFFNODE));
                node2 = (SHUFFNODE*)malloc(sizeof(SHUFFNODE));
@@ -136,11 +161,18 @@ SHUFFNODE *shuff_buildtree(SHUFFNODE *list, int listcount)
                lastsymbol->freq = node1->freq + node2->freq;
                lastsymbol->lchild = node1;
                lastsymbol->rchild = node2;
                lastsymbol->freq = node1->freq + node2->freq;
                lastsymbol->lchild = node1;
                lastsymbol->rchild = node2;
-               --listcount;
+               --freqcount;
        }
        }
-               
+       
+       /* Copio la raiz para poder liberar la lista sin perderla */
+       root = (SHUFFNODE*)malloc(sizeof(SHUFFNODE));
+       shuff_cpynode(root,lastsymbol);
+       
+       /* Free up mem */
+       free(inputlist);
+       
        /* Devuelvo el puntero a la raiz del arbol de huffman */
        /* Devuelvo el puntero a la raiz del arbol de huffman */
-       return lastsymbol;
+       return root;
 }
 
 void shuff_printcodes(SHUFFCODE *codetable,t_freq *freqtable)
 }
 
 void shuff_printcodes(SHUFFCODE *codetable,t_freq *freqtable)
@@ -190,7 +222,7 @@ void shuff_buildcodes(SHUFFCODE *table, SHUFFNODE *node, int level, int code)
        }
 }
 
        }
 }
 
-int shuff_encode_symbols(t_freq *ftable, SHUFFCODE *ctable, char* inputfile, char *outputfile, long volsize)
+int shuff_encode_symbols(HUFF_STATE *shuff, SHUFFCODE *ctable)
 {
        FILE *fpsource;
        VFILE *fpdest;
 {
        FILE *fpsource;
        VFILE *fpdest;
@@ -200,14 +232,14 @@ int shuff_encode_symbols(t_freq *ftable, SHUFFCODE *ctable, char* inputfile, cha
        SHUFFCODE symbolcode;
                
        /* Abrimos el file */
        SHUFFCODE symbolcode;
                
        /* Abrimos el file */
-       if ((fpsource = fopen(inputfile,"r")) == NULL) return 0;
-       if ((fpdest = vfopen(outputfile,"w",volsize)) == NULL) return 0;
+       if ((fpsource = fopen(shuff->sourcefile,"r")) == NULL) return 0;
+       if ((fpdest = vfopen(shuff->targetfile,"w",shuff->volsize)) == NULL) return 0;
                
        /* Guardamos el size el archivo original e inputlist como header */
        fseek(fpsource,0,SEEK_END);
        sourcesize = ftell(fpsource);
        vfwrite(&sourcesize,sizeof(unsigned long int),1,fpdest);
                
        /* Guardamos el size el archivo original e inputlist como header */
        fseek(fpsource,0,SEEK_END);
        sourcesize = ftell(fpsource);
        vfwrite(&sourcesize,sizeof(unsigned long int),1,fpdest);
-       vfwrite(ftable,sizeof(t_freq),256,fpdest);
+       vfwrite(shuff->freqtable,sizeof(t_freq),256,fpdest);
        
        /* Encodeo */
        fseek(fpsource,0,SEEK_SET);
        
        /* Encodeo */
        fseek(fpsource,0,SEEK_SET);
@@ -231,35 +263,111 @@ int shuff_encode_symbols(t_freq *ftable, SHUFFCODE *ctable, char* inputfile, cha
        return 1;       
 }
 
        return 1;       
 }
 
-int shuff_encode_file(char *inputfile, char *outputfile, long volsize)
+int shuff_encode_chunk(HUFF_STATE *chunkshuff, char *chunk, int chunksize, int process)
+{
+       int i;
+       
+       if (!process) {
+               /* Scaneo las frecuencias de este chunk */
+               shuff_scanfreq_chunk(chunkshuff,chunk,chunksize);
+       }
+       else {          
+               /* Ya tengo la tabla y el archivo temporal, paso al huffman by file */          
+               return 0;
+       }
+       
+       return 1; 
+}
+
+HUFF_STATE *shuff_init_static_byfile(char *inputfile, char *outputfile, long volsize)
 {
        /* Locals */
 {
        /* Locals */
-       t_freq *freqtable = (t_freq*)malloc(sizeof(t_freq)*256);
-       SHUFFNODE *inputlist;
+       HUFF_STATE *fshuff = (HUFF_STATE*)malloc(sizeof(HUFF_STATE));                   
+       int i;
+       
+       /* Inicializo la estructura para trabajar con Huff Static by File */
+       fshuff->tmpfp = NULL;
+       fshuff->sourcefile = (char*)malloc(sizeof(char)*(strlen(inputfile)+1));
+       fshuff->targetfile = (char*)malloc(sizeof(char)*(strlen(outputfile)+1));
+       strcpy(fshuff->sourcefile,inputfile);   
+       strcpy(fshuff->targetfile,outputfile);
+       fshuff->preloadfreq = 0;
+       fshuff->volsize = volsize;
+       fshuff->sumfreq = 0;
+       fshuff->freqtable = (t_freq*)malloc(sizeof(t_freq)*256);
+       for (i = 0; i < 256; ++i) fshuff->freqtable[i] = 0;             
+       
+       return fshuff;
+}
+
+HUFF_STATE *shuff_init_static_bychunk(char *outputfile, long volsize)
+{
+       /* Locals */
+       HUFF_STATE *cshuff = (HUFF_STATE*)malloc(sizeof(HUFF_STATE));           
+       FILE *fp;
+       int i;
+       
+       /* Inicializo la estructura para trabajar con Huff Static by Chunks */
+       cshuff->sourcefile = (char*)malloc(sizeof(char)*(strlen(outputfile)+2));
+       cshuff->targetfile = (char*)malloc(sizeof(char)*(strlen(outputfile)+1));
+       strcpy(cshuff->targetfile,outputfile);  
+       strcpy(cshuff->sourcefile,outputfile);
+       strcat(cshuff->sourcefile,"~"); 
+       cshuff->volsize = volsize;
+       cshuff->preloadfreq = 1;        
+       cshuff->sumfreq = 0;
+       cshuff->freqtable = (t_freq*)malloc(sizeof(t_freq)*256);
+       for (i = 0; i < 256; ++i) cshuff->freqtable[i] = 0;             
+       
+       /* Abrimos un archivo temporal para ir tirando los chunks */    
+       if ((cshuff->tmpfp = fopen(cshuff->sourcefile,"w")) == NULL) return NULL;       
+
+       return cshuff;
+}
+
+void shuff_deinit_static_byfile(HUFF_STATE *fshuff)
+{
+       /* Libero mallocs y cierro archivos */
+       free(fshuff->freqtable);
+       free(fshuff->sourcefile);
+       free(fshuff->targetfile);               
+}
+
+void shuff_deinit_static_bychunk(HUFF_STATE *cshuff)
+{
+       /* Libero mallocs y cierro archivos */
+       free(cshuff->freqtable);
+       free(cshuff->sourcefile);
+       free(cshuff->targetfile);
+       fclose(cshuff->tmpfp);
+}
+
+int shuff_encode_file(HUFF_STATE *shuff)
+{
+       /* Locals */    
        SHUFFNODE *codetree;
        SHUFFCODE *codetable = (SHUFFCODE*)malloc(sizeof(SHUFFCODE)*256);
        SHUFFNODE *codetree;
        SHUFFCODE *codetable = (SHUFFCODE*)malloc(sizeof(SHUFFCODE)*256);
-       int freqcount = 0;
+       int i;
        
        
-       /* Armamos la tabla de frecuencias */
-       if (!shuff_scanfreq(inputfile,freqtable)) return 0;
+       /* Veo si debo armar una freqtable o si esta preloaded */
+       if (!shuff->preloadfreq) if (!shuff_scanfreq(shuff->sourcefile,shuff->freqtable)) return 0;
        
        
-       /* Armo el input list y genero el arbol de huffman */
-       inputlist = shuff_buildlist(freqtable, &freqcount);
-       codetree = shuff_buildtree(inputlist,freqcount);
+       /* Genero el arbol de huffman */
+       codetree = shuff_buildtree(shuff->freqtable);
 
        /* Armo la tabla de codigos prefijos para el encoder */
        shuff_zerocodes(codetable);
        shuff_buildcodes(codetable,codetree,0,0);
 
        /* Armo la tabla de codigos prefijos para el encoder */
        shuff_zerocodes(codetable);
        shuff_buildcodes(codetable,codetree,0,0);
-       /*shuff_printcodes(codetable,freqtable);*/
+       /*shuff_printcodes(codetable,shuff->freqtable);*/
 
        /* Encodeo byte per byte */
 
        /* Encodeo byte per byte */
-       shuff_encode_symbols(freqtable,codetable,inputfile,outputfile,volsize);
+       shuff_encode_symbols(shuff,codetable);
        
        
-       /* Free up memory baby yeah */
-       free(freqtable);
-       free(inputlist);        
+       /* Free up memory baby yeah */  
        free(codetable);
        
        free(codetable);
        
+       /* Destruyo el arbol recursivamente TODO */
+       
        return 1;
 }
 
        return 1;
 }
 
@@ -280,11 +388,9 @@ SHUFFNODE *shuff_decode_symbols(SHUFFNODE *entrynode, unsigned long int buffer,
        else return shuff_decode_symbols(entrynode->rchild,buffer,bitsleft,symbol);
 }
 
        else return shuff_decode_symbols(entrynode->rchild,buffer,bitsleft,symbol);
 }
 
-int shuff_decode_file(char *inputfile, char *outputfile)
-{
-       SHUFFNODE *inputlist;
-       SHUFFNODE *codetree,*currnode;
-       t_freq *ftable = (t_freq*)malloc(sizeof(t_freq)*256);
+int shuff_decode_file(HUFF_STATE *shuff)
+{      
+       SHUFFNODE *codetree,*currnode;  
        unsigned long int bytesleft,codebuffer;
        VFILE *fpsource;
        FILE *fpdest;
        unsigned long int bytesleft,codebuffer;
        VFILE *fpsource;
        FILE *fpdest;
@@ -292,12 +398,11 @@ int shuff_decode_file(char *inputfile, char *outputfile)
        int bitsleft,freqcount = 0;     
        
        /* Levanto cuantos bytes decodeo y la freq table */
        int bitsleft,freqcount = 0;     
        
        /* Levanto cuantos bytes decodeo y la freq table */
-       if ((fpsource = vfopen(inputfile,"r",0)) == NULL) return 0;
-       if ((fpdest = fopen(outputfile,"w")) == NULL) return 0;
+       if ((fpsource = vfopen(shuff->sourcefile,"r",0)) == NULL) return 0;
+       if ((fpdest = fopen(shuff->targetfile,"w")) == NULL) return 0;
        vfread(&bytesleft,sizeof(unsigned long int),1,fpsource);
        vfread(&bytesleft,sizeof(unsigned long int),1,fpsource);
-       vfread(ftable,sizeof(unsigned long int),256,fpsource);  
-       inputlist = shuff_buildlist(ftable, &freqcount);
-       codetree = shuff_buildtree(inputlist,freqcount);
+       vfread(shuff->freqtable,sizeof(unsigned long int),256,fpsource);                
+       codetree = shuff_buildtree(shuff->freqtable);
        currnode = codetree;
        
        while (!vfeof(fpsource) && (bytesleft > 0)) {
        currnode = codetree;
        
        while (!vfeof(fpsource) && (bytesleft > 0)) {
@@ -321,9 +426,7 @@ int shuff_decode_file(char *inputfile, char *outputfile)
        vfclose(fpsource);
        fclose(fpdest);
        
        vfclose(fpsource);
        fclose(fpdest);
        
-       /* Free up memory baby yeah */
-       free(ftable);
-       free(inputlist);
+       /* Libero el arbol recursivamente TODO */       
        
        return 1;
 }
        
        return 1;
 }
index 5699ebb6d2411bbe2adfb9ca5a711ba1ea7933b6..08da37ed8566381655f4170208b5878b0752b437 100644 (file)
@@ -19,7 +19,22 @@ typedef struct t_code {
        unsigned char codelength;
 } SHUFFCODE;
 
        unsigned char codelength;
 } SHUFFCODE;
 
-int shuff_decode_file(char *inputfile, char *outputfile);
-int shuff_encode_file(char *inputfile, char *outputfile, long volsize);
+typedef struct t_huff {
+       FILE *tmpfp; /* filepointer al temporal para un shuff_bychunk */
+       char *sourcefile; /* Nombre del archivo a comprimir */
+       char *targetfile; /* Nombre del archivo comprimido */
+       t_freq *freqtable; /* Tabla de frecuencias */
+       t_freq sumfreq; /* Frecuencia total acumulada */
+       long volsize; /* Tamanio de volumen para multivol */
+       char preloadfreq; /* 1 freqtable preloaded (bychunk|canonico) - 0 byfile */
+       
+} HUFF_STATE;
+
+HUFF_STATE *shuff_init_static_byfile(char *inputfile, char *outputfile, long volsize);
+HUFF_STATE *shuff_init_static_bychunk(char *outputfile, long volsize);
+void shuff_deinit_static_byfile(HUFF_STATE *fshuff);
+void shuff_deinit_static_bychunk(HUFF_STATE *cshuff);
+int shuff_decode_file(HUFF_STATE *shuff);
+int shuff_encode_file(HUFF_STATE *shuff);
 
 #endif /* _STATICHUFF_H_ */
 
 #endif /* _STATICHUFF_H_ */