From: Nicolás Dimov Date: Wed, 16 Jun 2004 02:48:56 +0000 (+0000) Subject: un lzhuff que encontre por ahi X-Git-Tag: svn_import~168 X-Git-Url: https://git.llucax.com/z.facultad/75.06/jacu.git/commitdiff_plain/c4e8b3c4a021e77fb5314d6cc6c3c17ae73f4899?hp=2838ec46eb51602ac7376cb9affbc172a169194b un lzhuff que encontre por ahi --- diff --git a/src/lzhuff/lzhuf.c b/src/lzhuff/lzhuf.c new file mode 100644 index 0000000..96e6818 --- /dev/null +++ b/src/lzhuff/lzhuf.c @@ -0,0 +1,646 @@ +/************************************************************** + lzhuf.c + written by Haruyasu Yoshizaki 1988/11/20 + some minor changes 1989/04/06 + comments translated by Haruhiko Okumura 1989/04/07 + getbit and getbyte modified 1990/03/23 by Paul Edwards + so that they would work on machines where integers are + not necessarily 16 bits (although ANSI guarantees a + minimum of 16). This program has compiled and run with + no errors under Turbo C 2.0, Power C, and SAS/C 4.5 + (running on an IBM mainframe under MVS/XA 2.2). Could + people please use YYYY/MM/DD date format so that everyone + in the world can know what format the date is in? + external storage of filesize changed 1990/04/18 by Paul Edwards to + Intel's "little endian" rather than a machine-dependant style so + that files produced on one machine with lzhuf can be decoded on + any other. "little endian" style was chosen since lzhuf + originated on PC's, and therefore they should dictate the + standard. + initialization of something predicting spaces changed 1990/04/22 by + Paul Edwards so that when the compressed file is taken somewhere + else, it will decode properly, without changing ascii spaces to + ebcdic spaces. This was done by changing the ' ' (space literal) + to 0x20 (which is the far most likely character to occur, if you + don't know what environment it will be running on. +**************************************************************/ +#include +#include +#include +#include + +FILE *infile, *outfile; +static unsigned long int textsize = 0, codesize = 0, printcount = 0; + +char wterr[] = "Can't write."; + +static void Error(char *message) +{ + printf("\n%s\n", message); + exit(EXIT_FAILURE); +} + +/********** LZSS compression **********/ + +#define N 4096 /* buffer size */ +#define F 60 /* lookahead buffer size */ +#define THRESHOLD 2 +#define NIL N /* leaf of tree */ + +unsigned char + text_buf[N + F - 1]; +static int match_position, match_length, + lson[N + 1], rson[N + 257], dad[N + 1]; + +static void InitTree(void) /* initialize trees */ +{ + int i; + + for (i = N + 1; i <= N + 256; i++) + rson[i] = NIL; /* root */ + for (i = 0; i < N; i++) + dad[i] = NIL; /* node */ +} + +static void InsertNode(int r) /* insert to tree */ +{ + int i, p, cmp; + unsigned char *key; + unsigned c; + + cmp = 1; + key = &text_buf[r]; + p = N + 1 + key[0]; + rson[r] = lson[r] = NIL; + match_length = 0; + for ( ; ; ) { + if (cmp >= 0) { + if (rson[p] != NIL) + p = rson[p]; + else { + rson[p] = r; + dad[r] = p; + return; + } + } else { + if (lson[p] != NIL) + p = lson[p]; + else { + lson[p] = r; + dad[r] = p; + return; + } + } + for (i = 1; i < F; i++) + if ((cmp = key[i] - text_buf[p + i]) != 0) + break; + if (i > THRESHOLD) { + if (i > match_length) { + match_position = ((r - p) & (N - 1)) - 1; + if ((match_length = i) >= F) + break; + } + if (i == match_length) { + if ((c = ((r - p) & (N-1)) - 1) < (unsigned)match_position) { + match_position = c; + } + } + } + } + dad[r] = dad[p]; + lson[r] = lson[p]; + rson[r] = rson[p]; + dad[lson[p]] = r; + dad[rson[p]] = r; + if (rson[dad[p]] == p) + rson[dad[p]] = r; + else + lson[dad[p]] = r; + dad[p] = NIL; /* remove p */ +} + +static void DeleteNode(int p) /* remove from tree */ +{ + int q; + + if (dad[p] == NIL) + return; /* not registered */ + if (rson[p] == NIL) + q = lson[p]; + else + if (lson[p] == NIL) + q = rson[p]; + else { + q = lson[p]; + if (rson[q] != NIL) { + do { + q = rson[q]; + } while (rson[q] != NIL); + rson[dad[q]] = lson[q]; + dad[lson[q]] = dad[q]; + lson[q] = lson[p]; + dad[lson[p]] = q; + } + rson[q] = rson[p]; + dad[rson[p]] = q; + } + dad[q] = dad[p]; + if (rson[dad[p]] == p) + rson[dad[p]] = q; + else + lson[dad[p]] = q; + dad[p] = NIL; +} + +/* Huffman coding */ + +#define N_CHAR (256 - THRESHOLD + F) + /* kinds of characters (character code = 0..N_CHAR-1) */ +#define T (N_CHAR * 2 - 1) /* size of table */ +#define R (T - 1) /* position of root */ +#define MAX_FREQ 0x8000 /* updates tree when the */ +typedef unsigned char uchar; + + +/* table for encoding and decoding the upper 6 bits of position */ + +/* for encoding */ +uchar p_len[64] = { + 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 +}; + +uchar p_code[64] = { + 0x00, 0x20, 0x30, 0x40, 0x50, 0x58, 0x60, 0x68, + 0x70, 0x78, 0x80, 0x88, 0x90, 0x94, 0x98, 0x9C, + 0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC, + 0xC0, 0xC2, 0xC4, 0xC6, 0xC8, 0xCA, 0xCC, 0xCE, + 0xD0, 0xD2, 0xD4, 0xD6, 0xD8, 0xDA, 0xDC, 0xDE, + 0xE0, 0xE2, 0xE4, 0xE6, 0xE8, 0xEA, 0xEC, 0xEE, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF +}; + +/* for decoding */ +uchar d_code[256] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, + 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D, + 0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F, + 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11, + 0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13, + 0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15, + 0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17, + 0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B, + 0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F, + 0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23, + 0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27, + 0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B, + 0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, +}; + +uchar d_len[256] = { + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, +}; + +unsigned freq[T + 1]; /* frequency table */ + +int prnt[T + N_CHAR]; /* pointers to parent nodes, except for the */ + /* elements [T..T + N_CHAR - 1] which are used to get */ + /* the positions of leaves corresponding to the codes. */ + +int son[T]; /* pointers to child nodes (son[], son[] + 1) */ + +unsigned getbuf = 0; +uchar getlen = 0; + +static int GetBit(void) /* get one bit */ +{ + unsigned i; + + while (getlen <= 8) { + if ((int)(i = getc(infile)) < 0) i = 0; + getbuf |= i << (8 - getlen); + getlen += 8; + } + i = getbuf; + getbuf <<= 1; + getlen--; + return (int)((i & 0x8000) >> 15); +} + +static int GetByte(void) /* get one byte */ +{ + unsigned i; + + while (getlen <= 8) { + if ((int)(i = getc(infile)) < 0) i = 0; + getbuf |= i << (8 - getlen); + getlen += 8; + } + i = getbuf; + getbuf <<= 8; + getlen -= 8; + return (int)((i & 0xff00) >> 8); +} + +unsigned putbuf = 0; +uchar putlen = 0; + +static void Putcode(int l, unsigned c) /* output c bits of code */ +{ + putbuf |= c >> putlen; + if ((putlen += l) >= 8) { + if (putc(putbuf >> 8, outfile) == EOF) { + Error(wterr); + } + if ((putlen -= 8) >= 8) { + if (putc(putbuf, outfile) == EOF) { + Error(wterr); + } + codesize += 2; + putlen -= 8; + putbuf = c << (l - putlen); + } else { + putbuf <<= 8; + codesize++; + } + } +} + + +/* initialization of tree */ + +static void StartHuff(void) +{ + int i, j; + + for (i = 0; i < N_CHAR; i++) { + freq[i] = 1; + son[i] = i + T; + prnt[i + T] = i; + } + i = 0; j = N_CHAR; + while (j <= R) { + freq[j] = freq[i] + freq[i + 1]; + son[j] = i; + prnt[i] = prnt[i + 1] = j; + i += 2; j++; + } + freq[T] = 0xffff; + prnt[R] = 0; +} + + +/* reconstruction of tree */ + +static void reconst(void) +{ + int i, j, k; + unsigned f, l; + + /* collect leaf nodes in the first half of the table */ + /* and replace the freq by (freq + 1) / 2. */ + j = 0; + for (i = 0; i < T; i++) { + if (son[i] >= T) { + freq[j] = (freq[i] + 1) / 2; + son[j] = son[i]; + j++; + } + } + /* begin constructing tree by connecting sons */ + for (i = 0, j = N_CHAR; j < T; i += 2, j++) { + k = i + 1; + f = freq[j] = freq[i] + freq[k]; + for (k = j - 1; f < freq[k]; k--); + k++; + l = (j - k) * 2; + memmove(&freq[k + 1], &freq[k], l); + freq[k] = f; + memmove(&son[k + 1], &son[k], l); + son[k] = i; + } + /* connect prnt */ + for (i = 0; i < T; i++) { + if ((k = son[i]) >= T) { + prnt[k] = i; + } else { + prnt[k] = prnt[k + 1] = i; + } + } +} + + +/* increment frequency of given code by one, and update tree */ + +static void update(int c) +{ + int i, j, k, l; + + if (freq[R] == MAX_FREQ) { + reconst(); + } + c = prnt[c + T]; + do { + k = ++freq[c]; + + /* if the order is disturbed, exchange nodes */ + if ((unsigned)k > freq[l = c + 1]) { + while ((unsigned)k > freq[++l]); + l--; + freq[c] = freq[l]; + freq[l] = k; + + i = son[c]; + prnt[i] = l; + if (i < T) prnt[i + 1] = l; + + j = son[l]; + son[l] = i; + + prnt[j] = c; + if (j < T) prnt[j + 1] = c; + son[c] = j; + + c = l; + } + } while ((c = prnt[c]) != 0); /* repeat up to root */ +} + +unsigned code, len; + +static void EncodeChar(unsigned c) +{ + unsigned i; + int j, k; + + i = 0; + j = 0; + k = prnt[c + T]; + + /* travel from leaf to root */ + do { + i >>= 1; + + /* if node's address is odd-numbered, choose bigger brother node */ + if (k & 1) i += 0x8000; + + j++; + } while ((k = prnt[k]) != R); + Putcode(j, i); + code = i; + len = j; + update(c); +} + +static void EncodePosition(unsigned c) +{ + unsigned i; + + /* output upper 6 bits by table lookup */ + i = c >> 6; + Putcode(p_len[i], (unsigned)p_code[i] << 8); + + /* output lower 6 bits verbatim */ + Putcode(6, (c & 0x3f) << 10); +} + +static void EncodeEnd(void) +{ + if (putlen) { + if (putc(putbuf >> 8, outfile) == EOF) { + Error(wterr); + } + codesize++; + } +} + +static int DecodeChar(void) +{ + unsigned c; + + c = son[R]; + + /* travel from root to leaf, */ + /* choosing the smaller child node (son[]) if the read bit is 0, */ + /* the bigger (son[]+1} if 1 */ + while (c < T) { + c += GetBit(); + c = son[c]; + } + c -= T; + update(c); + return (int)c; +} + +static int DecodePosition(void) +{ + unsigned i, j, c; + + /* recover upper 6 bits from table */ + i = GetByte(); + c = (unsigned)d_code[i] << 6; + j = d_len[i]; + + /* read lower 6 bits verbatim */ + j -= 2; + while (j--) { + i = (i << 1) + GetBit(); + } + return (int)(c | (i & 0x3f)); +} + +/* compression */ + +static void Encode(void) /* compression */ +{ + int i, c, len, r, s, last_match_length; + + fseek(infile, 0L, 2); + textsize = ftell(infile); + fputc((int)((textsize & 0xff)),outfile); + fputc((int)((textsize & 0xff00) >> 8),outfile); + fputc((int)((textsize & 0xff0000L) >> 16),outfile); + fputc((int)((textsize & 0xff000000L) >> 24),outfile); + if (ferror(outfile)) + Error(wterr); /* output size of text */ + if (textsize == 0) + return; + rewind(infile); + textsize = 0; /* rewind and re-read */ + StartHuff(); + InitTree(); + s = 0; + r = N - F; + for (i = s; i < r; i++) + text_buf[i] = 0x20; + for (len = 0; len < F && (c = getc(infile)) != EOF; len++) + text_buf[r + len] = (unsigned char)c; + textsize = len; + for (i = 1; i <= F; i++) + InsertNode(r - i); + InsertNode(r); + do { + if (match_length > len) + match_length = len; + if (match_length <= THRESHOLD) { + match_length = 1; + EncodeChar(text_buf[r]); + } else { + EncodeChar(255 - THRESHOLD + match_length); + EncodePosition(match_position); + } + last_match_length = match_length; + for (i = 0; i < last_match_length && + (c = getc(infile)) != EOF; i++) { + DeleteNode(s); + text_buf[s] = (unsigned char)c; + if (s < F - 1) + text_buf[s + N] = (unsigned char)c; + s = (s + 1) & (N - 1); + r = (r + 1) & (N - 1); + InsertNode(r); + } + if ((textsize += i) > printcount) { + printf("%12ld\r", textsize); + printcount += 1024; + } + while (i++ < last_match_length) { + DeleteNode(s); + s = (s + 1) & (N - 1); + r = (r + 1) & (N - 1); + if (--len) InsertNode(r); + } + } while (len > 0); + EncodeEnd(); + printf("In : %ld bytes\n", textsize); + printf("Out: %ld bytes\n", codesize); + printf("Out/In: %.3f\n", 1.0 * codesize / textsize); +} + +static void Decode(void) /* recover */ +{ + int i, j, k, r, c; + unsigned long int count; + + textsize = (fgetc(infile)); + textsize |= (fgetc(infile) << 8); + textsize |= (fgetc(infile) << 16); + textsize |= (fgetc(infile) << 24); + if (ferror(infile)) + Error("Can't read"); /* read size of text */ + if (textsize == 0) + return; + StartHuff(); + for (i = 0; i < N - F; i++) + text_buf[i] = 0x20; + r = N - F; + for (count = 0; count < textsize; ) { + c = DecodeChar(); + if (c < 256) { + if (putc(c, outfile) == EOF) { + Error(wterr); + } + text_buf[r++] = (unsigned char)c; + r &= (N - 1); + count++; + } else { + i = (r - DecodePosition() - 1) & (N - 1); + j = c - 255 + THRESHOLD; + for (k = 0; k < j; k++) { + c = text_buf[(i + k) & (N - 1)]; + if (putc(c, outfile) == EOF) { + Error(wterr); + } + text_buf[r++] = (unsigned char)c; + r &= (N - 1); + count++; + } + } + if (count > printcount) { + printf("%12ld\r", count); + printcount += 1024; + } + } + printf("%12ld\n", count); +} + +int main(int argc, char *argv[]) +{ + char *s; + + if (argc != 4) { + printf("'lzhuf e file1 file2' encodes file1 into file2.\n" + "'lzhuf d file2 file1' decodes file2 into file1.\n"); + return EXIT_FAILURE; + } + if ((s = argv[1], s[1] || strpbrk(s, "DEde") == NULL) + || (s = argv[2], (infile = fopen(s, "rb")) == NULL) + || (s = argv[3], (outfile = fopen(s, "wb")) == NULL)) { + printf("??? %s\n", s); + return EXIT_FAILURE; + } + if (toupper(*argv[1]) == 'E') + Encode(); + else + Decode(); + fclose(infile); + fclose(outfile); + return EXIT_SUCCESS; +}