1 /**************************************************************
3 written by Haruyasu Yoshizaki 1988/11/20
4 some minor changes 1989/04/06
5 comments translated by Haruhiko Okumura 1989/04/07
6 getbit and getbyte modified 1990/03/23 by Paul Edwards
7 so that they would work on machines where integers are
8 not necessarily 16 bits (although ANSI guarantees a
9 minimum of 16). This program has compiled and run with
10 no errors under Turbo C 2.0, Power C, and SAS/C 4.5
11 (running on an IBM mainframe under MVS/XA 2.2). Could
12 people please use YYYY/MM/DD date format so that everyone
13 in the world can know what format the date is in?
14 external storage of filesize changed 1990/04/18 by Paul Edwards to
15 Intel's "little endian" rather than a machine-dependant style so
16 that files produced on one machine with lzhuf can be decoded on
17 any other. "little endian" style was chosen since lzhuf
18 originated on PC's, and therefore they should dictate the
20 initialization of something predicting spaces changed 1990/04/22 by
21 Paul Edwards so that when the compressed file is taken somewhere
22 else, it will decode properly, without changing ascii spaces to
23 ebcdic spaces. This was done by changing the ' ' (space literal)
24 to 0x20 (which is the far most likely character to occur, if you
25 don't know what environment it will be running on.
26 **************************************************************/
32 FILE *infile, *outfile;
33 static unsigned long int textsize = 0, codesize = 0, printcount = 0;
35 char wterr[] = "Can't write.";
37 static void Error(char *message)
39 printf("\n%s\n", message);
43 /********** LZSS compression **********/
45 #define N 4096 /* buffer size */
46 #define F 60 /* lookahead buffer size */
48 #define NIL N /* leaf of tree */
52 static int match_position, match_length,
53 lson[N + 1], rson[N + 257], dad[N + 1];
55 static void InitTree(void) /* initialize trees */
59 for (i = N + 1; i <= N + 256; i++)
60 rson[i] = NIL; /* root */
61 for (i = 0; i < N; i++)
62 dad[i] = NIL; /* node */
65 static void InsertNode(int r) /* insert to tree */
74 rson[r] = lson[r] = NIL;
94 for (i = 1; i < F; i++)
95 if ((cmp = key[i] - text_buf[p + i]) != 0)
98 if (i > match_length) {
99 match_position = ((r - p) & (N - 1)) - 1;
100 if ((match_length = i) >= F)
103 if (i == match_length) {
104 if ((c = ((r - p) & (N-1)) - 1) < (unsigned)match_position) {
115 if (rson[dad[p]] == p)
119 dad[p] = NIL; /* remove p */
122 static void DeleteNode(int p) /* remove from tree */
127 return; /* not registered */
135 if (rson[q] != NIL) {
138 } while (rson[q] != NIL);
139 rson[dad[q]] = lson[q];
140 dad[lson[q]] = dad[q];
148 if (rson[dad[p]] == p)
157 #define N_CHAR (256 - THRESHOLD + F)
158 /* kinds of characters (character code = 0..N_CHAR-1) */
159 #define T (N_CHAR * 2 - 1) /* size of table */
160 #define R (T - 1) /* position of root */
161 #define MAX_FREQ 0x8000 /* updates tree when the */
162 typedef unsigned char uchar;
165 /* table for encoding and decoding the upper 6 bits of position */
169 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05,
170 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06,
171 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
172 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
173 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
174 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
175 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
176 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08
180 0x00, 0x20, 0x30, 0x40, 0x50, 0x58, 0x60, 0x68,
181 0x70, 0x78, 0x80, 0x88, 0x90, 0x94, 0x98, 0x9C,
182 0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC,
183 0xC0, 0xC2, 0xC4, 0xC6, 0xC8, 0xCA, 0xCC, 0xCE,
184 0xD0, 0xD2, 0xD4, 0xD6, 0xD8, 0xDA, 0xDC, 0xDE,
185 0xE0, 0xE2, 0xE4, 0xE6, 0xE8, 0xEA, 0xEC, 0xEE,
186 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
187 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
191 uchar d_code[256] = {
192 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
193 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
194 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
195 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
196 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
197 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
198 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
199 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
200 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
201 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
202 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
203 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
204 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
205 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
206 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
207 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
208 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
209 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B,
210 0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D,
211 0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F,
212 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11,
213 0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13,
214 0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15,
215 0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17,
216 0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B,
217 0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F,
218 0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23,
219 0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27,
220 0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B,
221 0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F,
222 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
223 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
227 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
228 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
229 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
230 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
231 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
232 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
233 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
234 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
235 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
236 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
237 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
238 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
239 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
240 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
241 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
242 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
243 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
244 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
245 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
246 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
247 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
248 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
249 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
250 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
251 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
252 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
253 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
254 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
255 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
256 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
257 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
258 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
261 unsigned freq[T + 1]; /* frequency table */
263 int prnt[T + N_CHAR]; /* pointers to parent nodes, except for the */
264 /* elements [T..T + N_CHAR - 1] which are used to get */
265 /* the positions of leaves corresponding to the codes. */
267 int son[T]; /* pointers to child nodes (son[], son[] + 1) */
272 static int GetBit(void) /* get one bit */
276 while (getlen <= 8) {
277 if ((int)(i = getc(infile)) < 0) i = 0;
278 getbuf |= i << (8 - getlen);
284 return (int)((i & 0x8000) >> 15);
287 static int GetByte(void) /* get one byte */
291 while (getlen <= 8) {
292 if ((int)(i = getc(infile)) < 0) i = 0;
293 getbuf |= i << (8 - getlen);
299 return (int)((i & 0xff00) >> 8);
305 static void Putcode(int l, unsigned c) /* output c bits of code */
307 putbuf |= c >> putlen;
308 if ((putlen += l) >= 8) {
309 if (putc(putbuf >> 8, outfile) == EOF) {
312 if ((putlen -= 8) >= 8) {
313 if (putc(putbuf, outfile) == EOF) {
318 putbuf = c << (l - putlen);
327 /* initialization of tree */
329 static void StartHuff(void)
333 for (i = 0; i < N_CHAR; i++) {
340 freq[j] = freq[i] + freq[i + 1];
342 prnt[i] = prnt[i + 1] = j;
350 /* reconstruction of tree */
352 static void reconst(void)
357 /* collect leaf nodes in the first half of the table */
358 /* and replace the freq by (freq + 1) / 2. */
360 for (i = 0; i < T; i++) {
362 freq[j] = (freq[i] + 1) / 2;
367 /* begin constructing tree by connecting sons */
368 for (i = 0, j = N_CHAR; j < T; i += 2, j++) {
370 f = freq[j] = freq[i] + freq[k];
371 for (k = j - 1; f < freq[k]; k--);
374 memmove(&freq[k + 1], &freq[k], l);
376 memmove(&son[k + 1], &son[k], l);
380 for (i = 0; i < T; i++) {
381 if ((k = son[i]) >= T) {
384 prnt[k] = prnt[k + 1] = i;
390 /* increment frequency of given code by one, and update tree */
392 static void update(int c)
396 if (freq[R] == MAX_FREQ) {
403 /* if the order is disturbed, exchange nodes */
404 if ((unsigned)k > freq[l = c + 1]) {
405 while ((unsigned)k > freq[++l]);
412 if (i < T) prnt[i + 1] = l;
418 if (j < T) prnt[j + 1] = c;
423 } while ((c = prnt[c]) != 0); /* repeat up to root */
428 static void EncodeChar(unsigned c)
437 /* travel from leaf to root */
441 /* if node's address is odd-numbered, choose bigger brother node */
442 if (k & 1) i += 0x8000;
445 } while ((k = prnt[k]) != R);
452 static void EncodePosition(unsigned c)
456 /* output upper 6 bits by table lookup */
458 Putcode(p_len[i], (unsigned)p_code[i] << 8);
460 /* output lower 6 bits verbatim */
461 Putcode(6, (c & 0x3f) << 10);
464 static void EncodeEnd(void)
467 if (putc(putbuf >> 8, outfile) == EOF) {
474 static int DecodeChar(void)
480 /* travel from root to leaf, */
481 /* choosing the smaller child node (son[]) if the read bit is 0, */
482 /* the bigger (son[]+1} if 1 */
492 static int DecodePosition(void)
496 /* recover upper 6 bits from table */
498 c = (unsigned)d_code[i] << 6;
501 /* read lower 6 bits verbatim */
504 i = (i << 1) + GetBit();
506 return (int)(c | (i & 0x3f));
511 static void Encode(void) /* compression */
513 int i, c, len, r, s, last_match_length;
515 fseek(infile, 0L, 2);
516 textsize = ftell(infile);
517 fputc((int)((textsize & 0xff)),outfile);
518 fputc((int)((textsize & 0xff00) >> 8),outfile);
519 fputc((int)((textsize & 0xff0000L) >> 16),outfile);
520 fputc((int)((textsize & 0xff000000L) >> 24),outfile);
522 Error(wterr); /* output size of text */
526 textsize = 0; /* rewind and re-read */
531 for (i = s; i < r; i++)
533 for (len = 0; len < F && (c = getc(infile)) != EOF; len++)
534 text_buf[r + len] = (unsigned char)c;
536 for (i = 1; i <= F; i++)
540 if (match_length > len)
542 if (match_length <= THRESHOLD) {
544 EncodeChar(text_buf[r]);
546 EncodeChar(255 - THRESHOLD + match_length);
547 EncodePosition(match_position);
549 last_match_length = match_length;
550 for (i = 0; i < last_match_length &&
551 (c = getc(infile)) != EOF; i++) {
553 text_buf[s] = (unsigned char)c;
555 text_buf[s + N] = (unsigned char)c;
556 s = (s + 1) & (N - 1);
557 r = (r + 1) & (N - 1);
560 if ((textsize += i) > printcount) {
561 printf("%12ld\r", textsize);
564 while (i++ < last_match_length) {
566 s = (s + 1) & (N - 1);
567 r = (r + 1) & (N - 1);
568 if (--len) InsertNode(r);
572 printf("In : %ld bytes\n", textsize);
573 printf("Out: %ld bytes\n", codesize);
574 printf("Out/In: %.3f\n", 1.0 * codesize / textsize);
577 static void Decode(void) /* recover */
580 unsigned long int count;
582 textsize = (fgetc(infile));
583 textsize |= (fgetc(infile) << 8);
584 textsize |= (fgetc(infile) << 16);
585 textsize |= (fgetc(infile) << 24);
587 Error("Can't read"); /* read size of text */
591 for (i = 0; i < N - F; i++)
594 for (count = 0; count < textsize; ) {
597 if (putc(c, outfile) == EOF) {
600 text_buf[r++] = (unsigned char)c;
604 i = (r - DecodePosition() - 1) & (N - 1);
605 j = c - 255 + THRESHOLD;
606 for (k = 0; k < j; k++) {
607 c = text_buf[(i + k) & (N - 1)];
608 if (putc(c, outfile) == EOF) {
611 text_buf[r++] = (unsigned char)c;
616 if (count > printcount) {
617 printf("%12ld\r", count);
621 printf("%12ld\n", count);
624 int main(int argc, char *argv[])
629 printf("'lzhuf e file1 file2' encodes file1 into file2.\n"
630 "'lzhuf d file2 file1' decodes file2 into file1.\n");
633 if ((s = argv[1], s[1] || strpbrk(s, "DEde") == NULL)
634 || (s = argv[2], (infile = fopen(s, "rb")) == NULL)
635 || (s = argv[3], (outfile = fopen(s, "wb")) == NULL)) {
636 printf("??? %s\n", s);
639 if (toupper(*argv[1]) == 'E')