From dff2f7bc143397a35e8c67bbf2848283b6764dda Mon Sep 17 00:00:00 2001 From: Alan Kennedy Date: Fri, 18 Jun 2004 06:29:23 +0000 Subject: [PATCH] Compresor Huffman, todo salvo el encodeado final, pero ya se obtiene la tabla de codigos prefijos, toy a un paso. Nevertheless, tengo que ver varias cositas como por ejemplo limitar los codigos a 32 bits max en vez de 16 como lo hize porque meparece que se pierden poder de compresion. Con texto pinta lindo, con un mp3, medio choto pero bueno after all un mp3 ya esta comprimido a su manera. --- src/statichuff/input.dat | 2 + src/statichuff/statichuff.c | 199 ++++++++++++++++++++++++++++++++++++ 2 files changed, 201 insertions(+) create mode 100644 src/statichuff/input.dat create mode 100644 src/statichuff/statichuff.c diff --git a/src/statichuff/input.dat b/src/statichuff/input.dat new file mode 100644 index 0000000..9178921 --- /dev/null +++ b/src/statichuff/input.dat @@ -0,0 +1,2 @@ +Esto es una prueba de compresion de texto para ver como comprime el huffman +estatico. diff --git a/src/statichuff/statichuff.c b/src/statichuff/statichuff.c new file mode 100644 index 0000000..b6da6f4 --- /dev/null +++ b/src/statichuff/statichuff.c @@ -0,0 +1,199 @@ + +#include + +typedef unsigned short int t_freq; + +typedef struct t_freqnode { + unsigned short int symbol; + unsigned short int freq; + struct t_freqnode *lchild; + struct t_freqnode *rchild; +} HUFFNODE; + +typedef struct t_code { + unsigned short int code; + unsigned char codelength; +} CODE; + +void cpynode(HUFFNODE *node1, HUFFNODE *node2) +{ + node1->symbol = node2->symbol; + node1->freq = node2->freq; + node1->lchild = node2->lchild; + node1->rchild = node2->rchild; +} + +int compnode(HUFFNODE *node1, HUFFNODE *node2) +{ + if (node1->freq < node2->freq) return 1; + if (node1->freq > node2->freq) return -11; + return 0; +} + +HUFFNODE *buildlist(t_freq *freqtable, int *nonzerofreqs) +{ + int i,j = 0,nonzero = 0; + HUFFNODE *inputlist; + + /* Calculo cuantas frequencias > 0 hay y creo la tabla */ + for (i = 0; i < 256; ++i) if (freqtable[i] > 0) nonzero++; + inputlist = (HUFFNODE*)malloc(sizeof(HUFFNODE)*nonzero); + + /* Cargo la inputlist del huffman solo con freqs > 0 */ + for (i = 0; i < 256; ++i) + if (freqtable[i] > 0) { + inputlist[j].symbol = i; + inputlist[j].freq = freqtable[i]; + inputlist[j].lchild = NULL; + inputlist[j].rchild = NULL; + j++; + } + + *nonzerofreqs = nonzero; + return inputlist; +} + +int rescalefreq(t_freq *freqtable) +{ + int i; + int totalfreq = 0; + + /* Divido por la mitad las frecuencias, asegurando de no perder */ + /* frequencias en 1, por ello le sumo 1 antes de partir */ + for (i = 0; i < 256; i++) { + freqtable[i] = (freqtable[i]+1)/2; + totalfreq += freqtable[i]; + } + + return totalfreq; +} + +int scanfreq(char *inputfile, t_freq *freqtable) +{ + /* Locals */ + FILE *fp; + int sumfreq = 0,auxsum = 0; + int i,symbol; + + /* Inicializamos la tabla de frecuencias */ + for (i = 0; i < 256; ++i) freqtable[i] = 0; + + /* Abrimos el file */ + if ((fp = fopen(inputfile,"rb")) == NULL) return 0; + while (!feof(fp)) { + /* Contamos las frecuencias */ + symbol = fgetc(fp); + if (symbol == EOF) continue; + + freqtable[symbol]++; + ++sumfreq; + + /* Si llegue al tope de freq acumulada, halve em */ + if (sumfreq == 4181) + sumfreq = rescalefreq(freqtable); + } + + fclose(fp); + return 1; +} + +void printcodes(CODE *codetable,t_freq *freqtable) +{ + int i,j; + unsigned short int auxcode; + unsigned char bit; + + for (i = 0; i < 256; ++i) { + if (codetable[i].codelength > 0) { + auxcode = codetable[i].code; + printf("Symbol:%i Freq: %i Code:",i,freqtable[i]); + for (j = codetable[i].codelength-1; j >= 0; --j) { + auxcode = codetable[i].code; + auxcode = auxcode >> j; + bit = auxcode & 1; + printf("%i",bit); + } + printf(" Length:%i\n",codetable[i].codelength); + } + } +} + +void zerocodes(CODE *table) +{ + int i; + + /* Inicializo los codigos prefijos */ + for (i = 0; i < 256; ++i) { + table[i].code = 0; + table[i].codelength = 0; + } +} + +void buildcodes(CODE *table, HUFFNODE *node, int level, int code) +{ + if (node->symbol < 256) { + /* Guardo el codigo en la tabla */ + table[node->symbol].code = code; + table[node->symbol].codelength = level; + /*printf("Found symbol %i with freq %i at depth %i\n",node->symbol,node->freq,level);*/ + } + else { + code = code << 1; + buildcodes(table,node->lchild,level+1,code); + code |= 1; + buildcodes(table,node->rchild,level+1,code); + } +} + +HUFFNODE *buildtree(HUFFNODE *list, int listcount) +{ + HUFFNODE *lastsymbol = list+(listcount-1); + HUFFNODE *node1,*node2,*fictnode; + + /* Ordenamos inicialmente la inputlist para tomar las dos freqs min */ + while (lastsymbol > list) { + /* Ordeno la lista por frecuencia descendente */ + qsort(list,listcount,sizeof(HUFFNODE),compnode); + /* Tomo los ultimos dos elementos, generando dos nodos del arbol */ + node1 = (HUFFNODE*)malloc(sizeof(HUFFNODE)); + node2 = (HUFFNODE*)malloc(sizeof(HUFFNODE)); + cpynode(node1,lastsymbol-1); + cpynode(node2,lastsymbol); + lastsymbol -= 1; + /* Nodo ficticio con la suma de las probs y los ptros a childs */ + lastsymbol->symbol = 256; + lastsymbol->freq = node1->freq + node2->freq; + lastsymbol->lchild = node1; + lastsymbol->rchild = node2; + --listcount; + } + + /* Devuelvo el puntero a la raiz del arbol de huffman */ + return lastsymbol; +} + +int main(int argc, char* argv[]) +{ + /* Locals */ + t_freq *freqtable = (t_freq*)malloc(sizeof(t_freq)*256); + HUFFNODE *inputlist; + HUFFNODE *codetree; + CODE *codetable = (CODE*)malloc(sizeof(CODE)*256); + int freqcount = 0,i; + + if (argc == 1) return -1; + + /* Armamos la tabla de frecuencias */ + if (!scanfreq(argv[1],freqtable)) return -1; + + /* Armo el input list y genero el arbol de huffman */ + inputlist = buildlist(freqtable, &freqcount); + codetree = buildtree(inputlist,freqcount); + zerocodes(codetable); + buildcodes(codetable,codetree,0,0); + printcodes(codetable,freqtable); + /*encode(codetable)*/ + + return 0; + +} -- 2.43.0