]> git.llucax.com Git - z.facultad/75.06/jacu.git/blobdiff - src/jacu.c
Test de Calgary sobre JACU vs GZIP vs BZIP2 done, me falta el pic en mi maquina que...
[z.facultad/75.06/jacu.git] / src / jacu.c
index bc56a41350462d52983219a79a19e7d5a88e763d..8f9beb0fb2f9230153d0bca58abbba76aadb646b 100644 (file)
@@ -1,4 +1,5 @@
 
 
+/* Jacu Team - GPL */
 #include "blocksorting/bs.h"
 #include "mtf/mtf.h"
 #include "zerogrouping/zerogrouping.h"
 #include "blocksorting/bs.h"
 #include "mtf/mtf.h"
 #include "zerogrouping/zerogrouping.h"
 #include <stdio.h>
 #include <unistd.h>
 
 #include <stdio.h>
 #include <unistd.h>
 
-long get_file_size(const char* filename);
+long fsize(const char* filename);
+
+typedef struct _flags_ {
+       int cflag;
+       int dflag;
+       int zflag;
+       int tflag;
+       int qflag;
+       int sflag;
+       int mflag;
+       int rflag; /* Richard Dictionary :-) */
+} t_Flags;
+
+int comprimir(char *src, char *dst, Uint32 pagesize, Uint32 volumesize, t_Flags *flags, char *staticmodel);
+int descomprimir(char *src, char *dst, t_Flags *flags);
 
 int main(int argc, char* argv[])
 {      
 
 int main(int argc, char* argv[])
 {      
-       int cflag = 0;
-       int dflag = 0;
-       int zflag = 0;
-       int tflag = 0;
-       int qflag = 0;
        long int volumesize = 0;
        long int volumesize = 0;
-       size_t pagesize = 32768; /* 32KB */
+       Uint32 pagesize = 32768; /* 32KB */
        int ch;
        int ch;
-       t_BlockSort *bs;
+       t_Flags flags;
+       char *staticmodel = NULL;
                        
                        
-       while ((ch = getopt(argc, argv, "cdzt:q:")) != -1) { 
+       memset(&flags, 0, sizeof(t_Flags));
+
+       while ((ch = getopt(argc, argv, "rscdzm:t:q:")) != -1) { 
                 
                switch (ch) { 
                 
                switch (ch) { 
-                       case 'c': cflag = 1; 
+                       case 'c': flags.cflag = 1; 
                                          break;
 
                                          break;
 
-                       case 'd': dflag = 1; 
+                       case 'd': flags.dflag = 1; 
                                          break; 
 
                                          break; 
 
-                       case 'z': zflag = 1; 
+                       case 'z': flags.zflag = 1; 
+                                         break; 
+                       
+                       case 'm': flags.mflag = 1;
+                                         staticmodel = optarg;
                                          break; 
                                          break; 
+                       
+                       case 's': flags.sflag = 1;                                        
+                                         break;
 
 
-                       case 't': tflag = 1; 
+                       case 't': flags.tflag = 1; 
                                volumesize = atol(optarg);
                                break; 
 
                                volumesize = atol(optarg);
                                break; 
 
-                       case 'q': qflag = 1; 
+                       case 'r': flags.rflag = 1;
+                               break;
+                       case 'q': flags.qflag = 1; 
                                switch (atoi(optarg))
                                {
                                        case 0: pagesize = 1024; /* 1K */
                                switch (atoi(optarg))
                                {
                                        case 0: pagesize = 1024; /* 1K */
@@ -66,200 +88,36 @@ int main(int argc, char* argv[])
                                }
                                break; 
 
                                }
                                break; 
 
-                       default: fprintf(stderr, "Usage: %s [-cdpt] sourcefile targetfile\n", argv[0]); 
+                       default: fprintf(stderr, "Usage: %s [-cdzsr][-q blksize][-t volsize][-m modeldumpfile] source target\n", argv[0]); 
                                         return(2);
                }
        }
                
                                         return(2);
                }
        }
                
-       if ( (argc == 1) || (cflag & dflag) || !(cflag | dflag) || ((argc - optind) < 2) ) {
-               fprintf(stderr, "Usage: %s [-cdt] sourcefile targetfile\n", argv[0]); 
+       if ( (argc == 1) || (flags.cflag & flags.dflag) || !(flags.cflag | flags.dflag) || ((argc - optind) < 2) || (flags.mflag & flags.sflag)) {
+               fprintf(stderr, "Usage: %s [-cdzsr][-q compressionquality][-t volsize][-m modeldumpfile] source target\n", argv[0]); 
                return (3);
        }
                return (3);
        }
-       if ((tflag) && (volumesize <= 0l)) {
+       if ((flags.tflag) && (volumesize <= 0l)) {
                fprintf(stderr,"Error: The volume size must be a non-zero value\n");
                return (4);
        }
                fprintf(stderr,"Error: The volume size must be a non-zero value\n");
                return (4);
        }
-       if ((qflag) && (pagesize <= 1u)) {
+       if ((flags.qflag) && (pagesize <= 1u)) {
                fprintf(stderr,"Error: El nivel de compresión debe ser entre 0 (menor) y 9 (mayor).\n");
                return (5);
        }
                
                fprintf(stderr,"Error: El nivel de compresión debe ser entre 0 (menor) y 9 (mayor).\n");
                return (5);
        }
                
-       if (cflag == 1) {
-               /* Comprimo */
-               /* No me gusta el tmpfile ... es para probar como anda todo junto */
-               FILE *fp, *fp_out;
-               Uint32 i, j, total, k;
-               char *mtf;
-               char *salida, *data, c;
-               char *z;
-               int z_len;
-
-               data = malloc(sizeof(char)*pagesize);
-               /* Reservo lugar tambien para guardar el k y el tamaño  */
-               salida = malloc(sizeof(char)*pagesize+sizeof(Uint32)*2);
-               bs = bs_create(pagesize);
-
-               fp = fopen(argv[optind], "rb");
-               fp_out = fopen("tmp.comp", "wb");
-
-               c = fgetc(fp);
-               total = 0;
-               while (!feof(fp)) {
-                       i = 0;
-                       while ((!feof(fp)) && (i < pagesize)) {
-                               data[i++] = c;
-                               c = fgetc(fp);
-                               total++;
-                       }
-
-                       /* Hago el BS */
-                       bs_solve(data, salida, bs, &k, i);
-
-                       printf("BS k=%ld\n", *(Uint32 *)(salida+sizeof(Uint32)));
-                       printf("PageSize = %ld\n", *(Uint32 *)salida);
-
-                       printf("Antes de MTF = %ld [", i);
-                       {
-                               int ii;
-                               for(ii=0; ii<(i+sizeof(Uint32)); ii++)
-                                       printf("(%c)", salida[ii+sizeof(Uint32)]);
-                               printf("]\n");
-                       }
-                       /* Le aplico el MTF, salteo el tamaño del bloque para que no se pierda. */
-                       mtf = jacu_mtf(salida+sizeof(Uint32), i+sizeof(Uint32), &z, &z_len);
-
-                       printf("MTF Z (len=%d) = [", z_len);
-                       {
-                               int ii;
-                               for(ii=0; ii<z_len; ii++)
-                                       printf("%c", z[ii]);
-                               printf("]\n");
-
-                       }
-                       /* Si me lo piden, aplico ZG. */
-                       if (zflag) {
-                               size_t len;
-                               char buff[2];
-                               ZG zg;
-                               zg_init(&zg);
-                               /* TODO HACER LO MISMO QUE EN EL ELSE XXX */
-                               for (j = 0; j < i; ++j)
-                                       if ((len = zg_group(&zg, buff, mtf[j]))) fwrite(buff, 1, len, fp_out);
-                       } else {
-                               /* Guardo el PageSize */
-                               fwrite(salida, sizeof(Uint32), 1, fp_out);
-
-                               /* Guardo el Z len y el Z */
-                               fwrite(&z_len, sizeof(int), 1, fp_out);
-                               fwrite(z, z_len, sizeof(char), fp_out);
-
-                               /* Guardo la salida del MTF */
-                               printf("Despues de MTF : [");
-                               for(j=0; j<(i+sizeof(Uint32)); j++) {
-                                       fputc(mtf[j+sizeof(Uint32)], fp_out);
-                                       putchar('(');
-                                       fputc(mtf[j+sizeof(Uint32)], stdout);
-                                       putchar(')');
-                               }
-                               printf("]\n");
-                       }
-                       free(mtf);
-                       free(z);
-               }
-
-               /* Limpiando */
-               fclose(fp);
-               fclose(fp_out);
-               bs_destroy(bs);
-
-               /* Comprimo con huffman */
-               i = shuff_encode_file("tmp.comp", argv[optind+1], volumesize);
-
-               /* borro el temporal */
-               remove("tmp.comp");
-
-               /* Muestro bpb */
-               printf("Comprimido a %.04f bpb.\n", get_file_size(argv[optind+1])*8.0/get_file_size(argv[optind]));
-               return i;
+       if (flags.cflag == 1) {
+               return comprimir(argv[optind], argv[optind+1], pagesize, volumesize, &flags, staticmodel);
        }
        
        }
        
-       if (dflag == 1) { 
-               /* Descomprimo */
-               FILE *fp_out;
-               FILE *fp_in;
-               Uint32 block_size, k;
-               char *block, *mtf, *orig;
-               char *z;
-               int z_len;
-
-               shuff_decode_file(argv[optind], "tmp.comp"); /*argv[optind+1]);*/
-               fp_in = fopen("tmp.comp", "rb");
-               fp_out = fopen(argv[optind+1], "wb");
-
-               while (!feof(fp_in)) {
-                       block_size = 0;
-                       PERR("Leo bloque");
-                       fread(&block_size, sizeof(Uint32), 1, fp_in);
-                       printf("PageSize = %ld\n", block_size);
-                       fread(&z_len, sizeof(int), 1, fp_in);
-                       z = malloc(sizeof(char)*z_len);
-                       fread(z, z_len, sizeof(char), fp_in);
-
-                       printf("MTF Z (len=%d) = [", z_len);
-                       {
-                               int ii;
-                               for(ii=0; ii<z_len; ii++)
-                                       printf("%c", z[ii]);
-                               printf("]\n");
-
-                       }
-                       if (block_size > 0) {
-                               block = malloc(block_size*sizeof(char)+sizeof(Uint32));
-                               orig = malloc(block_size*sizeof(char));
-                               fread(block, block_size+sizeof(Uint32), sizeof(char), fp_in);
-
-                               printf("Antes MTF_inv = [");
-                               {
-                                       int ii;
-                                       for(ii=0; ii<block_size+sizeof(Uint32); ii++)
-                                               printf("(%c)", block[ii]);
-                                       printf("]\n");
-                               }
-                               /* Hago el MTF inverso */
-                               PERR("Haciendo MTF Inv");
-                               mtf = jacu_mtf_inv(z, block, block_size*sizeof(char)+sizeof(Uint32));
-
-                               printf("Luego de MTF = [");
-                               {
-                                       int ii;
-                                       for(ii=0; ii<block_size+sizeof(Uint32); ii++)
-                                               printf("%c", mtf[ii]);
-                                       printf("]\n");
-                               }
-
-                               /* Luego de hacer el MTF inverso ya puedo recuperar el k */
-                               PERR("Recuperando K");
-                               memcpy(&k, mtf, sizeof(Uint32));
-
-                               printf("Restored : k=%ld\n", k);
-                               PERR("BS_Restore");
-                               bs_restore(orig, block+sizeof(Uint32), k, block_size);
-
-                               PERR("Saving Data");
-                               fwrite(orig, block_size, sizeof(char), fp_out);
-                               free(block);
-                               free(orig);
-                               free(mtf);
-                       }
-               }
-               fclose(fp_in);
-               fclose(fp_out);
+       if (flags.dflag == 1) { 
+               return descomprimir(argv[optind], argv[optind+1], &flags);
        }
 
        return 0;
 }
 
        }
 
        return 0;
 }
 
-long get_file_size(const char* filename)
+long fsize(const char* filename)
 {
        FILE* file;
        long  file_size;
 {
        FILE* file;
        long  file_size;
@@ -270,3 +128,194 @@ long get_file_size(const char* filename)
        return file_size;
 }
 
        return file_size;
 }
 
+int comprimir(char *src, char *dst, Uint32 pagesize, Uint32 volumesize, t_Flags *flags, char *staticmodel)
+{
+       /* Comprimo */          
+       t_BlockSort *bs;
+       HUFF_STATE *shuff;
+       FILE *fp;
+       Uint32 i, j, total, k;
+       unsigned char *mtf;
+       unsigned char *salida, *data;
+       unsigned char *z;
+       int z_len;
+       
+       /* Abrimos el archivo a comprimir y encodeamos bloques */
+       if ((fp = fopen(src, "rb")) == NULL) return 1;
+       
+       /* Preparo el compresor huffman */
+       if ((shuff = shuff_init_encoder_bychunk(dst, volumesize*1024)) == NULL) return 1;
+       if (flags->mflag == 1) shuff_loadmodel(shuff, staticmodel);
+       
+       /* Preparo el BS alocando mem para la Salida: V(vector) + K(colnum) */
+       data = malloc(sizeof(unsigned char)*pagesize);
+       salida = malloc(sizeof(unsigned char)*pagesize+sizeof(Uint32));
+       bs = bs_create(pagesize);
+
+       /* Guardamos el pagesize como header (huffencoded) */
+       shuff_scanfreq_chunk(shuff,(char*)&pagesize,sizeof(Uint32));
+
+       /* Guardamos cabecera para indicar si usamos ZG (huffencoded) */
+       if (flags->zflag)
+               shuff_scanfreq_chunk(shuff, "\001", 1);
+       else
+               shuff_scanfreq_chunk(shuff, "\000", 1);
+
+       total = 0;
+       while (!feof(fp)) {
+               i = 0;
+               i = bs_readblock(fp, data, pagesize, flags->rflag);
+               total += i;
+
+
+               /* Aplico BS guardando su resultado + el K en salida */
+               bs_solve(data, salida, bs, &k, i);
+
+               /* Le aplico el MTF a salida */
+               mtf = jacu_mtf(salida, i+sizeof(Uint32), &z, &z_len);
+                               
+               /* Guardo el z_len y el Z */
+               shuff_scanfreq_chunk(shuff,(char*)&z_len,sizeof(int));
+               shuff_scanfreq_chunk(shuff,z,z_len);                    
+               
+               /* Si me lo piden, aplico ZG. */
+               if (flags->zflag) {
+                       Uint32 len;
+                       unsigned char buff[2];
+                       Uint32 total_size = i + sizeof(Uint32);
+                       ZG zg;
+                       /* Guardo la salida del MTF con ceros agrupados (ZG) */
+                       zg_init(&zg);
+                       for (j = 0; j < total_size; ++j)
+                               if ((len = zg_group(&zg, buff, mtf[j])))
+                                       shuff_scanfreq_chunk(shuff, buff, len);
+
+                               /* Flusheo ultimo zgrouping */
+                               if ((len = zg_group_finish(&zg,buff)))
+                                       shuff_scanfreq_chunk(shuff, buff, len);
+               } else {
+                       /* Comprimo la salida del MTF */
+                       shuff_scanfreq_chunk(shuff,mtf,i+sizeof(Uint32));
+               }
+               free(mtf);
+               free(z);
+       }
+
+       /* Limpiando */
+       if (fclose(fp)) fprintf(stderr, "Error al cerrar archivo de entrada!\n");
+       bs_destroy(bs);
+       free(data);
+       free(salida);
+
+       /* Comprimo con Huffman */              
+       shuff_encode_file(shuff);
+       if (flags->sflag == 1) shuff_savemodel(shuff);
+       /* Shutdown Huffman */
+       shuff_deinit_encoder(shuff);
+       free(shuff);
+
+       /* Muestro bpb */
+       printf("%s: %.04f bits/byte.\n", dst, vfsize(dst)*8.0f/fsize(src));
+       return 0;
+}
+
+int descomprimir(char *src, char *dst, t_Flags *flags)
+{
+       /* Descomprimo */
+       FILE *fp_out;
+       Uint32 block_size = 0, k;
+       unsigned char *block, *mtf, *orig;
+       unsigned char *z;
+       Uint32 z_len=0,moredata = 0,decoded = 0;
+       unsigned char use_zg = 0,retbytes = 0;
+       HUFF_STATE *shuff;
+
+       /* Inicializo el descompresor */
+       if ((shuff = shuff_init_decoder(src, NULL)) == NULL) return 1;
+                       
+       /* Abrimos el archivo de salida */
+       fp_out = fopen(dst, "wb");
+       
+       /* Descomprimo primero que nada el pagesize utilizado para comprimir */
+       if (!(moredata = shuff_decode_chunk(shuff,(char*)&block_size,sizeof(Uint32),&decoded))) return 1;
+
+       /* Descomprimo byte que indica si se usa ZG */
+       if (!(moredata = shuff_decode_chunk(shuff, &use_zg, 1, &decoded))) return 1;
+
+       /* Creo buffers */
+       block = malloc(block_size*sizeof(unsigned char)+sizeof(Uint32));
+       orig = malloc(block_size*sizeof(unsigned char));
+
+       /* Descomprimimos de a chunks segun convenga */
+       do {                    
+               if (block_size > 0) {
+                       /* Descomprimo el Zlen y el Z del MTF*/
+                       moredata = shuff_decode_chunk(shuff,(char*)&z_len,sizeof(int),&decoded);                                        
+                       z = malloc(sizeof(unsigned char)*z_len);
+                       moredata = shuff_decode_chunk(shuff,z,z_len,&decoded);                          
+                       
+                       /* Veo si se uso Zero Grouping para comprimir */
+                       if (use_zg) {
+                               ZG zg;
+                               unsigned char zgbuffer[255];
+                               unsigned char zgbyte = 0;
+                               int zgmoved = 0;
+                               Uint32 zgungrouped = 0;
+                               /* Desagrupo bytes hasta completar la pagina or End of Source File */
+                               zg_init(&zg);
+                               do {
+                                       /* Levanto un byte zerogrouped y lo paso por el zg_ungroup */
+                                       zgmoved = 0;
+                                       moredata = shuff_decode_chunk(shuff,&zgbyte,1,&decoded);
+                                       retbytes = zg_ungroup(&zg,zgbuffer,zgbyte);
+                                       /* Muevo del zgbuffer a mi bloque lo que corresponda */
+                                       while ((zgmoved < retbytes) && (zgungrouped < block_size+sizeof(Uint32))) {
+                                               block[zgungrouped++] = zgbuffer[zgmoved++];
+                                       }
+                               } while ((moredata) && (zgungrouped < block_size+sizeof(Uint32)));
+
+                               /* Me fijo si el ultimo byte procesado que me completo la pagina fue un 0 */
+                               if (zgbyte == 0) {
+                                       /* Leo un byte mas (un 0 seguro) y zg_ungroup cambiara su estado */
+                                       moredata = shuff_decode_chunk(shuff,&zgbyte,1,&decoded);
+                                       zg_ungroup(&zg,zgbuffer,zgbyte);
+                               }
+
+                               /* Normalizo variables para continuar en common code */
+                               decoded = zgungrouped;
+                       }
+                       else {
+                               /* Levanto una salida de MTF */
+                               moredata = shuff_decode_chunk(shuff,block,block_size+sizeof(Uint32),&decoded);
+                       }
+                       
+                       /* Le aplico MTF inverso a la salida de MTF levantada previamente */
+                       mtf = jacu_mtf_inv(z, block, decoded);
+
+                       /* Ya tengo la salida del BS, tonces levanto su K */
+                       memcpy(&k, mtf, sizeof(Uint32));
+
+                       /* Obtengo el chunk original aplicando BS Inverso */
+                       bs_restore(orig, mtf+sizeof(Uint32), k, decoded - sizeof(Uint32));
+
+                       /* XXX AHORA METO EL FLAG EN EL ARCHIVO; DON'T WORRRRRYYYYYYY XXX */
+                       if (flags->rflag == 1)
+                               orig = bs_finalblock(orig, decoded-sizeof(Uint32), &decoded);
+
+                       fwrite(orig, decoded, sizeof(unsigned char), fp_out);
+                       free(mtf);
+                       free(z);
+               }
+               else return 1;
+       } while (moredata);
+       
+       /* Close up files and free mem */
+       fclose(fp_out);
+       free(block);
+       free(orig);
+
+       /* Shutdown Huffman */
+       shuff_deinit_decoder(shuff);
+       free(shuff);
+       return 0;
+}