From: Leandro Lucarella Date: Tue, 22 Jun 2004 03:56:20 +0000 (+0000) Subject: Se cambia por un nombre mas representativo. X-Git-Tag: svn_import~92 X-Git-Url: https://git.llucax.com/z.facultad/75.06/jacu.git/commitdiff_plain/04a71a3b18d07933341ef363e3cbe408bb5a2156 Se cambia por un nombre mas representativo. --- diff --git a/examples/lzhuff/lzhuf.c b/examples/lzhuff/lzhuf.c new file mode 100644 index 0000000..96e6818 --- /dev/null +++ b/examples/lzhuff/lzhuf.c @@ -0,0 +1,646 @@ +/************************************************************** + lzhuf.c + written by Haruyasu Yoshizaki 1988/11/20 + some minor changes 1989/04/06 + comments translated by Haruhiko Okumura 1989/04/07 + getbit and getbyte modified 1990/03/23 by Paul Edwards + so that they would work on machines where integers are + not necessarily 16 bits (although ANSI guarantees a + minimum of 16). This program has compiled and run with + no errors under Turbo C 2.0, Power C, and SAS/C 4.5 + (running on an IBM mainframe under MVS/XA 2.2). Could + people please use YYYY/MM/DD date format so that everyone + in the world can know what format the date is in? + external storage of filesize changed 1990/04/18 by Paul Edwards to + Intel's "little endian" rather than a machine-dependant style so + that files produced on one machine with lzhuf can be decoded on + any other. "little endian" style was chosen since lzhuf + originated on PC's, and therefore they should dictate the + standard. + initialization of something predicting spaces changed 1990/04/22 by + Paul Edwards so that when the compressed file is taken somewhere + else, it will decode properly, without changing ascii spaces to + ebcdic spaces. This was done by changing the ' ' (space literal) + to 0x20 (which is the far most likely character to occur, if you + don't know what environment it will be running on. +**************************************************************/ +#include +#include +#include +#include + +FILE *infile, *outfile; +static unsigned long int textsize = 0, codesize = 0, printcount = 0; + +char wterr[] = "Can't write."; + +static void Error(char *message) +{ + printf("\n%s\n", message); + exit(EXIT_FAILURE); +} + +/********** LZSS compression **********/ + +#define N 4096 /* buffer size */ +#define F 60 /* lookahead buffer size */ +#define THRESHOLD 2 +#define NIL N /* leaf of tree */ + +unsigned char + text_buf[N + F - 1]; +static int match_position, match_length, + lson[N + 1], rson[N + 257], dad[N + 1]; + +static void InitTree(void) /* initialize trees */ +{ + int i; + + for (i = N + 1; i <= N + 256; i++) + rson[i] = NIL; /* root */ + for (i = 0; i < N; i++) + dad[i] = NIL; /* node */ +} + +static void InsertNode(int r) /* insert to tree */ +{ + int i, p, cmp; + unsigned char *key; + unsigned c; + + cmp = 1; + key = &text_buf[r]; + p = N + 1 + key[0]; + rson[r] = lson[r] = NIL; + match_length = 0; + for ( ; ; ) { + if (cmp >= 0) { + if (rson[p] != NIL) + p = rson[p]; + else { + rson[p] = r; + dad[r] = p; + return; + } + } else { + if (lson[p] != NIL) + p = lson[p]; + else { + lson[p] = r; + dad[r] = p; + return; + } + } + for (i = 1; i < F; i++) + if ((cmp = key[i] - text_buf[p + i]) != 0) + break; + if (i > THRESHOLD) { + if (i > match_length) { + match_position = ((r - p) & (N - 1)) - 1; + if ((match_length = i) >= F) + break; + } + if (i == match_length) { + if ((c = ((r - p) & (N-1)) - 1) < (unsigned)match_position) { + match_position = c; + } + } + } + } + dad[r] = dad[p]; + lson[r] = lson[p]; + rson[r] = rson[p]; + dad[lson[p]] = r; + dad[rson[p]] = r; + if (rson[dad[p]] == p) + rson[dad[p]] = r; + else + lson[dad[p]] = r; + dad[p] = NIL; /* remove p */ +} + +static void DeleteNode(int p) /* remove from tree */ +{ + int q; + + if (dad[p] == NIL) + return; /* not registered */ + if (rson[p] == NIL) + q = lson[p]; + else + if (lson[p] == NIL) + q = rson[p]; + else { + q = lson[p]; + if (rson[q] != NIL) { + do { + q = rson[q]; + } while (rson[q] != NIL); + rson[dad[q]] = lson[q]; + dad[lson[q]] = dad[q]; + lson[q] = lson[p]; + dad[lson[p]] = q; + } + rson[q] = rson[p]; + dad[rson[p]] = q; + } + dad[q] = dad[p]; + if (rson[dad[p]] == p) + rson[dad[p]] = q; + else + lson[dad[p]] = q; + dad[p] = NIL; +} + +/* Huffman coding */ + +#define N_CHAR (256 - THRESHOLD + F) + /* kinds of characters (character code = 0..N_CHAR-1) */ +#define T (N_CHAR * 2 - 1) /* size of table */ +#define R (T - 1) /* position of root */ +#define MAX_FREQ 0x8000 /* updates tree when the */ +typedef unsigned char uchar; + + +/* table for encoding and decoding the upper 6 bits of position */ + +/* for encoding */ +uchar p_len[64] = { + 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 +}; + +uchar p_code[64] = { + 0x00, 0x20, 0x30, 0x40, 0x50, 0x58, 0x60, 0x68, + 0x70, 0x78, 0x80, 0x88, 0x90, 0x94, 0x98, 0x9C, + 0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC, + 0xC0, 0xC2, 0xC4, 0xC6, 0xC8, 0xCA, 0xCC, 0xCE, + 0xD0, 0xD2, 0xD4, 0xD6, 0xD8, 0xDA, 0xDC, 0xDE, + 0xE0, 0xE2, 0xE4, 0xE6, 0xE8, 0xEA, 0xEC, 0xEE, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF +}; + +/* for decoding */ +uchar d_code[256] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, + 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D, + 0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F, + 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11, + 0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13, + 0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15, + 0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17, + 0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B, + 0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F, + 0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23, + 0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27, + 0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B, + 0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, +}; + +uchar d_len[256] = { + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, +}; + +unsigned freq[T + 1]; /* frequency table */ + +int prnt[T + N_CHAR]; /* pointers to parent nodes, except for the */ + /* elements [T..T + N_CHAR - 1] which are used to get */ + /* the positions of leaves corresponding to the codes. */ + +int son[T]; /* pointers to child nodes (son[], son[] + 1) */ + +unsigned getbuf = 0; +uchar getlen = 0; + +static int GetBit(void) /* get one bit */ +{ + unsigned i; + + while (getlen <= 8) { + if ((int)(i = getc(infile)) < 0) i = 0; + getbuf |= i << (8 - getlen); + getlen += 8; + } + i = getbuf; + getbuf <<= 1; + getlen--; + return (int)((i & 0x8000) >> 15); +} + +static int GetByte(void) /* get one byte */ +{ + unsigned i; + + while (getlen <= 8) { + if ((int)(i = getc(infile)) < 0) i = 0; + getbuf |= i << (8 - getlen); + getlen += 8; + } + i = getbuf; + getbuf <<= 8; + getlen -= 8; + return (int)((i & 0xff00) >> 8); +} + +unsigned putbuf = 0; +uchar putlen = 0; + +static void Putcode(int l, unsigned c) /* output c bits of code */ +{ + putbuf |= c >> putlen; + if ((putlen += l) >= 8) { + if (putc(putbuf >> 8, outfile) == EOF) { + Error(wterr); + } + if ((putlen -= 8) >= 8) { + if (putc(putbuf, outfile) == EOF) { + Error(wterr); + } + codesize += 2; + putlen -= 8; + putbuf = c << (l - putlen); + } else { + putbuf <<= 8; + codesize++; + } + } +} + + +/* initialization of tree */ + +static void StartHuff(void) +{ + int i, j; + + for (i = 0; i < N_CHAR; i++) { + freq[i] = 1; + son[i] = i + T; + prnt[i + T] = i; + } + i = 0; j = N_CHAR; + while (j <= R) { + freq[j] = freq[i] + freq[i + 1]; + son[j] = i; + prnt[i] = prnt[i + 1] = j; + i += 2; j++; + } + freq[T] = 0xffff; + prnt[R] = 0; +} + + +/* reconstruction of tree */ + +static void reconst(void) +{ + int i, j, k; + unsigned f, l; + + /* collect leaf nodes in the first half of the table */ + /* and replace the freq by (freq + 1) / 2. */ + j = 0; + for (i = 0; i < T; i++) { + if (son[i] >= T) { + freq[j] = (freq[i] + 1) / 2; + son[j] = son[i]; + j++; + } + } + /* begin constructing tree by connecting sons */ + for (i = 0, j = N_CHAR; j < T; i += 2, j++) { + k = i + 1; + f = freq[j] = freq[i] + freq[k]; + for (k = j - 1; f < freq[k]; k--); + k++; + l = (j - k) * 2; + memmove(&freq[k + 1], &freq[k], l); + freq[k] = f; + memmove(&son[k + 1], &son[k], l); + son[k] = i; + } + /* connect prnt */ + for (i = 0; i < T; i++) { + if ((k = son[i]) >= T) { + prnt[k] = i; + } else { + prnt[k] = prnt[k + 1] = i; + } + } +} + + +/* increment frequency of given code by one, and update tree */ + +static void update(int c) +{ + int i, j, k, l; + + if (freq[R] == MAX_FREQ) { + reconst(); + } + c = prnt[c + T]; + do { + k = ++freq[c]; + + /* if the order is disturbed, exchange nodes */ + if ((unsigned)k > freq[l = c + 1]) { + while ((unsigned)k > freq[++l]); + l--; + freq[c] = freq[l]; + freq[l] = k; + + i = son[c]; + prnt[i] = l; + if (i < T) prnt[i + 1] = l; + + j = son[l]; + son[l] = i; + + prnt[j] = c; + if (j < T) prnt[j + 1] = c; + son[c] = j; + + c = l; + } + } while ((c = prnt[c]) != 0); /* repeat up to root */ +} + +unsigned code, len; + +static void EncodeChar(unsigned c) +{ + unsigned i; + int j, k; + + i = 0; + j = 0; + k = prnt[c + T]; + + /* travel from leaf to root */ + do { + i >>= 1; + + /* if node's address is odd-numbered, choose bigger brother node */ + if (k & 1) i += 0x8000; + + j++; + } while ((k = prnt[k]) != R); + Putcode(j, i); + code = i; + len = j; + update(c); +} + +static void EncodePosition(unsigned c) +{ + unsigned i; + + /* output upper 6 bits by table lookup */ + i = c >> 6; + Putcode(p_len[i], (unsigned)p_code[i] << 8); + + /* output lower 6 bits verbatim */ + Putcode(6, (c & 0x3f) << 10); +} + +static void EncodeEnd(void) +{ + if (putlen) { + if (putc(putbuf >> 8, outfile) == EOF) { + Error(wterr); + } + codesize++; + } +} + +static int DecodeChar(void) +{ + unsigned c; + + c = son[R]; + + /* travel from root to leaf, */ + /* choosing the smaller child node (son[]) if the read bit is 0, */ + /* the bigger (son[]+1} if 1 */ + while (c < T) { + c += GetBit(); + c = son[c]; + } + c -= T; + update(c); + return (int)c; +} + +static int DecodePosition(void) +{ + unsigned i, j, c; + + /* recover upper 6 bits from table */ + i = GetByte(); + c = (unsigned)d_code[i] << 6; + j = d_len[i]; + + /* read lower 6 bits verbatim */ + j -= 2; + while (j--) { + i = (i << 1) + GetBit(); + } + return (int)(c | (i & 0x3f)); +} + +/* compression */ + +static void Encode(void) /* compression */ +{ + int i, c, len, r, s, last_match_length; + + fseek(infile, 0L, 2); + textsize = ftell(infile); + fputc((int)((textsize & 0xff)),outfile); + fputc((int)((textsize & 0xff00) >> 8),outfile); + fputc((int)((textsize & 0xff0000L) >> 16),outfile); + fputc((int)((textsize & 0xff000000L) >> 24),outfile); + if (ferror(outfile)) + Error(wterr); /* output size of text */ + if (textsize == 0) + return; + rewind(infile); + textsize = 0; /* rewind and re-read */ + StartHuff(); + InitTree(); + s = 0; + r = N - F; + for (i = s; i < r; i++) + text_buf[i] = 0x20; + for (len = 0; len < F && (c = getc(infile)) != EOF; len++) + text_buf[r + len] = (unsigned char)c; + textsize = len; + for (i = 1; i <= F; i++) + InsertNode(r - i); + InsertNode(r); + do { + if (match_length > len) + match_length = len; + if (match_length <= THRESHOLD) { + match_length = 1; + EncodeChar(text_buf[r]); + } else { + EncodeChar(255 - THRESHOLD + match_length); + EncodePosition(match_position); + } + last_match_length = match_length; + for (i = 0; i < last_match_length && + (c = getc(infile)) != EOF; i++) { + DeleteNode(s); + text_buf[s] = (unsigned char)c; + if (s < F - 1) + text_buf[s + N] = (unsigned char)c; + s = (s + 1) & (N - 1); + r = (r + 1) & (N - 1); + InsertNode(r); + } + if ((textsize += i) > printcount) { + printf("%12ld\r", textsize); + printcount += 1024; + } + while (i++ < last_match_length) { + DeleteNode(s); + s = (s + 1) & (N - 1); + r = (r + 1) & (N - 1); + if (--len) InsertNode(r); + } + } while (len > 0); + EncodeEnd(); + printf("In : %ld bytes\n", textsize); + printf("Out: %ld bytes\n", codesize); + printf("Out/In: %.3f\n", 1.0 * codesize / textsize); +} + +static void Decode(void) /* recover */ +{ + int i, j, k, r, c; + unsigned long int count; + + textsize = (fgetc(infile)); + textsize |= (fgetc(infile) << 8); + textsize |= (fgetc(infile) << 16); + textsize |= (fgetc(infile) << 24); + if (ferror(infile)) + Error("Can't read"); /* read size of text */ + if (textsize == 0) + return; + StartHuff(); + for (i = 0; i < N - F; i++) + text_buf[i] = 0x20; + r = N - F; + for (count = 0; count < textsize; ) { + c = DecodeChar(); + if (c < 256) { + if (putc(c, outfile) == EOF) { + Error(wterr); + } + text_buf[r++] = (unsigned char)c; + r &= (N - 1); + count++; + } else { + i = (r - DecodePosition() - 1) & (N - 1); + j = c - 255 + THRESHOLD; + for (k = 0; k < j; k++) { + c = text_buf[(i + k) & (N - 1)]; + if (putc(c, outfile) == EOF) { + Error(wterr); + } + text_buf[r++] = (unsigned char)c; + r &= (N - 1); + count++; + } + } + if (count > printcount) { + printf("%12ld\r", count); + printcount += 1024; + } + } + printf("%12ld\n", count); +} + +int main(int argc, char *argv[]) +{ + char *s; + + if (argc != 4) { + printf("'lzhuf e file1 file2' encodes file1 into file2.\n" + "'lzhuf d file2 file1' decodes file2 into file1.\n"); + return EXIT_FAILURE; + } + if ((s = argv[1], s[1] || strpbrk(s, "DEde") == NULL) + || (s = argv[2], (infile = fopen(s, "rb")) == NULL) + || (s = argv[3], (outfile = fopen(s, "wb")) == NULL)) { + printf("??? %s\n", s); + return EXIT_FAILURE; + } + if (toupper(*argv[1]) == 'E') + Encode(); + else + Decode(); + fclose(infile); + fclose(outfile); + return EXIT_SUCCESS; +} diff --git a/examples/ppmc/Makefile b/examples/ppmc/Makefile new file mode 100644 index 0000000..887aa40 --- /dev/null +++ b/examples/ppmc/Makefile @@ -0,0 +1,18 @@ + +TARGETS=ppmc unppmc +COMMON= ppmc.o ppmcdata.o range.o + +CFLAGS=-O3 -Wall -DNDEBUG + +all: $(TARGETS) + +ppmc: $(COMMON) ppmcmain.c + +unppmc: $(COMMON) unppmc.c + +clean: + + @$(RM) -f *.o $(TARGETS) + +.PHONY: all clean + diff --git a/examples/ppmc/README b/examples/ppmc/README new file mode 100644 index 0000000..5e9ae11 --- /dev/null +++ b/examples/ppmc/README @@ -0,0 +1,118 @@ + + PPMC + + +TABLE OF CONTENTS +-Description +-Compiling +-Files included in this release +-Timing +-Author +-Disclaimer + + +DESCRIPTION +This is the source code of an implementation of ppmc. +The data structures used are hash tables instead of a context trie. + +A file is compressed and decompressed like that: +ppmc inputfile compressedfile +unppmc compressedfile outputfile + +I don't recommend to use this for compressing vital information, because it +hasn't been fully tested, and moreover, the machine where the decompressor is +being run, must have at least as much memory as the encoder had. I recommend +to use this compressor only for researching pourposes. + +For further information read ac_ppmc.html also included in the package, for +the latest version visit http://www.ross.net/arturocampos + + +COMPILING +The source code is in the C programming language, and was successfully +compiled with Djgpp version 2.02, a project was made which included the +following files for the encoder (call this project ppmc): +ppmc.c +ppmcdata.c +ppmcmain.c +range.c + +And for the decoder (call this project unppmc) +ppmc.c +ppmcdata.c +range.c +unppmc.c + +Then you just have to hit F9 and wait. I tried to do makefiles, however there +was something wrong and it didn't worked. If someone has any idea about it, or +how to compile this source for other compilers, please let me know it, so in +the next release I can include makefiles. + + +FILES INCLUDED IN THIS RELEASE +In the zip file ac_ppmc_html.zip you should find: + +readme.txt -> The file you're reading now. +ac_ppmc.html -> article which explains ppmc and the data structures used +ppmc.c -> This is the main file which includes all the routines used + by both the encoder and decoder's model. +range.c -> The encoder and decoder's routines. +ppmcmain.c -> The main routine for the compressor +unppmc.c -> The main routine for the decompressor +ppmcdata.c -> Global data structures. (used mainly by ppmc.c) +ppmc.h -> Declarations of routines +ppmcdata.h -> Declarations of global data and structures +range.h -> Declarations of the routines for the range encoder and decoder +ppmc.exe -> A compiled version of the compressor +unppmc.exe -> A compiled version of the decompressor + +All this files are the implementation of ppmc order-4 using lazy exclusions. +In the /exclusion directory you can find the same files (unless readme) but +for ppmc order-4 using full exclusion. +I thought there was no need to include the files for ppmc-o3h, because they +are nothing else than ppmc with lazy exclusions, using only order-2 (and +lowers) but instead of using the hash key for order-2 it uses the one for +order-3. (you also have to take care about o3_byte, of course) + +The executables were compiled with Djgpp 2.95 using only the switch -O6. + + +TIMING +The standard function to get the +time "time()", has a maximum precision of seconds. This is not enough for +testing the speed of a compressor. Due to this timing was not included in +this release. +If you are interested on compiling it with Djgpp, my original version used the +following code: + + struct time ppmc_time, ppmc_time2; + double _time2, _time; + + // Get current time + gettime(&ppmc_time); + + //Compress file + + + // Print bpb and kbyps + gettime(&ppmc_time2); + printf("%s at %f bpb in ",argv[1],((float)filesize(file_output)/(float)size_file_input)*(float)8); + _time=((ppmc_time.ti_min)*60)+(ppmc_time.ti_sec)+(((double)ppmc_time.ti_hund)/100); + _time2=((ppmc_time2.ti_min)*60)+(ppmc_time2.ti_sec)+(((double)ppmc_time2.ti_hund)/100); + if((_time2-_time)!=0) + printf("%f kbytes/seconds.", ((float)size_file_input/(float)1024)/(_time2-_time)); + + +AUTHOR +This code was made by Arturo San Emeterio Campos, you can find his home page +at: http://www.ross.net/arturocampos +And his email is: arturo-campos@mixmail.com + + +DISCLAIMER +Copyright (c) Arturo San Emeterio Campos 1999. All rights reserved. Permission +is granted to make verbatim copies of this files for private use only. There +is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + + Arturo San Emeterio Campos, Barcelona 04-Jan-2000 diff --git a/examples/ppmc/exclusion/COMMENTS b/examples/ppmc/exclusion/COMMENTS new file mode 100644 index 0000000..bdfc5ab --- /dev/null +++ b/examples/ppmc/exclusion/COMMENTS @@ -0,0 +1,15 @@ +Order-3 and order-4 use almost the same code. With lazy exclusions it was +exactly the same. But for full exclusions is not. + +The problem is that when updating we can't be sure that the stored pointer +points where it should to (as it did with lazy exclusions). So we have to +read the whole linked list. This is done for order-2 and order-3. However +order-4 is the highest order used, so it doesn't use exclusion, therefore +it uses the same code as with lazy exclusions. + +So if you want to use higher orders remember that the code to use should +be the one for order-3 because it updates correctly when exclusions are +being used. and use the order-4 code for the highest order. + + Arturo Campos (arturo-campos@mixmail.com) + http://www.arturocampos.com \ No newline at end of file diff --git a/examples/ppmc/exclusion/Makefile b/examples/ppmc/exclusion/Makefile new file mode 100644 index 0000000..887aa40 --- /dev/null +++ b/examples/ppmc/exclusion/Makefile @@ -0,0 +1,18 @@ + +TARGETS=ppmc unppmc +COMMON= ppmc.o ppmcdata.o range.o + +CFLAGS=-O3 -Wall -DNDEBUG + +all: $(TARGETS) + +ppmc: $(COMMON) ppmcmain.c + +unppmc: $(COMMON) unppmc.c + +clean: + + @$(RM) -f *.o $(TARGETS) + +.PHONY: all clean + diff --git a/examples/ppmc/exclusion/ppmc.c b/examples/ppmc/exclusion/ppmc.c new file mode 100644 index 0000000..0730e1a --- /dev/null +++ b/examples/ppmc/exclusion/ppmc.c @@ -0,0 +1,3418 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmc.c" (exclusion) + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Part of the ppmc encoder and decoder. + + This module contains the whole ppmc encoder. It uses hash tables for + managing most of the orders. And a maximum order of 4. It codes bytes. + Order-1-0-(-1) are all handled in tables. Order-2 has a table with + direct hashing with pointers to the linked lists. Order-4 and order-3 + both have hash tables with pointers to contexts in a linked lists which + finally have a pointer to the start of the linked list with the + probability distribution. Update exclusion is used, but exclusion is not. + + Please, note that if the machine where the decoder is run doesn't has as + much memory as the computer where the encoder was ran, the decoder will + not be able to properly decode the file, because it will not be able to + keep track of new statistics, in this case it will just exit. + + For applications where the loss of data is not admisible, I suggest you to + limit both encoder and decoder's memory requeriments to a given minimum. + + Using exclusion. It's up to the main encoding routine to clear this table + for every new byte. +*/ + + +#include +#include +#include "range.h" +#include "ppmcdata.h" + + + +// Ruotines used by ppmc. Not including the range coder. +// +// They are for initializing of both encoder and decoder, and unless there +// are two version, both encoder and decoder use the same routines. Like +// "ppmc_initialize_contexts". + + +// This one allocs the memory needed by ppmc, and adjust some pointers used +// for allocating elements in the linked lists. The mempool arrays must be +// initialized now. +void ppmc_alloc_memory(void) +{ + unsigned long counter; + + + // Init mempool buffers + + for(counter=0;counter!=_mempool_max_index;++counter) + { + _bytes_pool_array[counter]=0; + _context_pool_array[counter]=0; + } + + _bytes_pool_index=1; //first entry will be used now + _context_pool_index=1; + + + // Allocate memory for ppmc structures and adjust some variables + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + + //save pointers in the array for freeing + _bytes_pool_array[0]=_bytes_pool; + _context_pool_array[0]=_context_pool; + + + //adjust variables + _bytes_pool_max=_bytes_pool+_bytes_pool_elements; + _context_pool_max=_context_pool+_context_pool_elements; + + ppmc_out_of_memory=0; //we still have memory +} + + +// This routine initializes all the contexts, and all the tables including +// those who care about the number of bytes defined in a context. +void ppmc_initialize_contexts(void) +{ + unsigned long counter, counter2; + + + // Order-0 + for(counter=0;counter!=256;++counter) //clear table + order0_array[counter]=0; + + order0_defined_bytes=0; //adjust variables + order0_max_cump=0; + + + // Order-1 + for(counter=0;counter!=256;++counter) //erase every table of every context + for(counter2=0;counter2!=256;++counter2) + order1_array[counter][counter2]=0; + + for(counter=0;counter!=256;++counter) //adjust variables + { + order1_defined_bytes_array[counter]=0; + order1_max_cump_array[counter]=0; + } + + + // Order-2 + for(counter=0;counter!=65536;++counter) + { + //order2_array[counter].prob=0; //clear pointer to bytes and frequencies + //order2_array[counter].max_cump=0; + order2_array[counter].defined_bytes=0; + } + + + // Order-4-3 + for(counter=0;counter!=65536;++counter) //order-4-3 + { + order4_hasht[counter]=0; + order3_hasht[counter]=0; + } +} + + +// This routine initializes the encode model by outputting as many bytes as +// needed to prepare the models. This should be called before the main loop +// and after the memory has been allocated and tables initialized. +// +// It does not need uses the range coder. It output the first 1 bytes. +void ppmc_encoder_initialize(void) +{ + + // Initialize order-0 and prepare different bytes for orders + fputc((byte=fgetc(file_input)),file_output); + o4_byte=byte; //order-4 + + fputc((byte=fgetc(file_input)),file_output); + o3_byte=byte; //order-3 + + fputc((byte=fgetc(file_input)),file_output); + o2_byte=byte; //order-2 + ppmc_update_order0(); + + fputc((byte=fgetc(file_input)),file_output); + o1_byte=byte; + +} + + +// This routine initializes the decoder model, should be called to do the same +// changes as "ppmc_encoder_initialize()" did. +void ppmc_decoder_initialize(void) +{ + + // Initialize order-0 and context bytes + byte=fgetc(file_input); + o4_byte=byte; //order-4 + fputc(byte,file_output); + + byte=fgetc(file_input); + o3_byte=byte; //order-3 + fputc(byte,file_output); + + byte=fgetc(file_input); + o2_byte=byte; //order-2 + + fputc(byte,file_output); //output first byte + ppmc_update_order0(); + + byte=fgetc(file_input); + o1_byte=byte; //order-1 + fputc(byte,file_output); +} + + +// Once coding or decoding is finished you have to call this routine. +// It must be called when done. +void ppmc_free_memory(void) +{ + unsigned long counter; + + // Free the memory buffers + + for(counter=0;counter!=_mempool_max_index;++counter) + { + if(_bytes_pool_array[counter]!=0) + free(_bytes_pool_array[counter]); + + if(_context_pool_array[counter]!=0) + free(_context_pool_array[counter]); + } + +} + + +// This routine flushes the memory and restarts all the tables of +// probabilities, current order bytes are not modified, this function +// is called when we ran out of memory. We have to output the code +// number 256 which means memory flushing, for doing this we have to go +// to order-(-1) so we have to output an escape code in all the orders +// till we reach order-(-1) where we can code it. Then we free all the +// memory, alloc it again, and reinitialize all the orders. +// +// However we may find the case when the current order is not initialized, +// in this case we don't need to output an escape code. +void ppmc_flush_mem_enc(void) +{ + unsigned long counter; + + + // Clear exclusion table + for(counter=0;counter!=256;++counter) + excluded[counter]=0; + + + // Code an escape code in order-4 + if(ppmc_get_totf_order4()!=0) //if 0 no need of escape code + { + + ppmc_get_escape_prob_order4(); //get prob and cump + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + } + + + // Code an escape code in order-3 + if(ppmc_get_totf_order3()!=0) //if 0 no need of escape code + { + + ppmc_get_escape_prob_order3(); //get prob and cump + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + } + + + // Code an escape code in order-2 + + o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte); + + // First check if current order-2 context is empty + if(order2_array[o2_cntxt].defined_bytes!=0) //it's not empty + { + ppmc_get_totf_order2(); + ppmc_get_escape_prob_order2(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + } + + + // Code an escape code in order-1 + + // First check if current order-1 table is empty + if(order1_defined_bytes_array[o1_byte]!=0) //it's not empty + { + ppmc_get_totf_order1(); + ppmc_get_escape_prob_order1(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + } + + + // Code an escape code in order-0. Order-0 always has at least one symbol + + ppmc_get_totf_order0(); + ppmc_get_escape_prob_order0(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + + + // Now we can code the code 256 + + symb_prob=1; + symb_cump=256; + total_cump=257; + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + + // Now that decoder knows the flushing, free memory and reinit + + ppmc_free_memory(); + ppmc_alloc_memory(); + ppmc_initialize_contexts(); + + + // Be sure that order-0 has at least one probability + + order0_array[o1_byte]++; + order0_max_cump++; + order0_defined_bytes++; + +} + + +// When the decoder gets the symbol of flushing, most of the job is done +// because we already got all the escape codes, so we only have to reinit. +void ppmc_flush_mem_dec(void) +{ + + // Free memory and reinit + + ppmc_free_memory(); + ppmc_alloc_memory(); + ppmc_initialize_contexts(); + + + // Be sure that order-0 has at least one probability + + order0_array[o1_byte]++; + order0_max_cump++; + order0_defined_bytes++; + + +} + + + +// ORDER-(-1) functions, also called ordern1 (Negative1) in functions +// +// Because order-(-1) does not need to update its probability tables, it +// has no tables, and relies on the fact that the cump of byte is its own +// value, and the probability is fixed, 1, and the total cump is 257. +// +// The alphabet has the following distribution: 0-255 the bytes. 256 is +// an special symbol which means that we have flushed the encoder tables, +// and thus the encoder must flush its tables too. +// +// The rest of the tables only have 256 symbols, because we have no need +// of assign a symbol to the flush code (which already is the order-(-1) +// table) nor to the escape code. +// +// For order-(-1) we don't use exclusion. + + +// Gets the probability for a given symbol in the order-(-1) (ordern1) +void ppmc_get_prob_ordern1(void) +{ + symb_cump=byte; //its value + symb_prob=1; //flat probability + total_cump=257; //total cump +} + + +// Returns in the variable "total_cump" the current total cump of +// order-(-1) +void ppmc_get_totf_ordern1(void) +{ + total_cump=257; //this is fixed +} + + +// Returns the symbol for a given cump under order-(-1) +unsigned long ppmc_get_symbol_ordern1 (void) +{ + return symb_cump; +} + + + +// ORDER-0 functions +// +// Due to the fact that order-0 has no context, I use an array for all the +// probabilities under order-0, just as you could do in a basic model for +// arithmetic coding. +// +// The main array is: order0_array. Where order0_array[byte] contains the +// probability for a given byte. The same applies to order-1. +// +// To ensure that the updating and coding is done correctly, "byte" can't +// be changed till all the coding and updating is done. +// +// Order-0 uses exclusions. Exclusion values are always prepared in "get_totf" +// so there's no need to get them again. However order-0 doesn't have to +// update exclude table, because order-(-1) will not use it + + +// Returns in the variable "total_cump" the current total cump of +// order-0. We have to read the whole array because we can't +// guarante that all the bytes are used. +void ppmc_get_totf_order0(void) +{ + unsigned long temp_cump, //temp value for the cump + counter; + + exc_defined_bytes=0; + exc_max_cump=0; + + // Read the number of defined bytes by reading the count of every byte + // and if it's present in the exclusion table. + for(counter=0;counter!=256;++counter) + { + if(excluded[counter]==0) //only if it's not excluded + if(order0_array[counter]!=0) //if it has a nonzero count, then it's present + { + ++exc_defined_bytes; + exc_max_cump+=order0_array[counter]; + } + } + + // Total cump is current total cump plus the probability for the escape code + exc_total_cump=exc_max_cump+exc_defined_bytes; +} + + +// Codes a byte under order-0 and returns 1, otherwise it returns a 0 and +// has coded an escape code. In this case further coding is needed. +// +// Returns: 1 in case a byte was coded. 0 in case of escape code. +char ppmc_code_byte_order0(void) +{ + unsigned long counter; + + ppmc_get_totf_order0(); //get total cump + + // It's possible that due to excluding, there's no byte left, in that case + // return. + if(exc_defined_bytes==0) + return 0; + + // See if the byte is present + if(order0_array[byte]==0) //a probability of 0 + { + + // Because it was not present, output an escape code, prepare variables + + symb_cump=exc_max_cump; //obviously its cump is current max_cump + //without escape code's space + + symb_prob=exc_defined_bytes; //the number of defined bytes + + total_cump=exc_total_cump; + + // Code the escape code + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 0; //byte not coded + } + else + { + + coded_in_order=0; + + // The symbol is present, code it under order-0 + + symb_prob=order0_array[byte]; //get probability directly + + // Make cump for current symbol + + symb_cump=0; //for first symbol is 0 + for(counter=0; counter!=byte ; ++counter) + { + if(excluded[counter]==0) + symb_cump+=order0_array[counter]; //sum probabilities before our symbol + } + + total_cump=exc_total_cump; + + // Code the symbol + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 1; //symbol coded under order-0 + } +} + + +// This functions update order-0 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +void ppmc_update_order0(void) +{ + if(order0_array[byte]==0) + { + // It had a zero probability + order0_array[byte]++; //increment symbol probability + ++order0_defined_bytes; //new byte defined + ++order0_max_cump; //total cump + return; + } + else + { + // It had a non-zero probability + + // Increment its probability + order0_array[byte]++; //increment symbol probability + ++order0_max_cump; //total cump + + // Check to see if its the maximum in this case renormalize + if(order0_array[byte]==255) + ppmc_renormalize_order0(); + + return; + } +} + + +// This functions renormalizes the probabilities at order-0 updating variables +void ppmc_renormalize_order0(void) +{ + unsigned long counter; + + // Initialize variables + order0_defined_bytes=0; //clear them + order0_max_cump=0; + + // Loop over all probabilities, divide them by a factor of 2 and update variables + for(counter=0 ; counter!=256 ; ++counter) + { + if(order0_array[counter]!=0) + { + order0_array[counter]>>=1; //divide by a factor of 2 + if(order0_array[counter]==0) + order0_array[counter]=1; + } + + if(order0_array[counter]!=0) //see if it has a non zero probability + order0_defined_bytes++; + + order0_max_cump+=order0_array[counter]; //sum to the total cump + } +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of a escape code it returns -1 +void ppmc_decode_order0(void) +{ + unsigned long current_cump, counter; + + + // Get the total cump needed for decoding symbol + ppmc_get_totf_order0(); //total cump needed for decoding + + // It's possible that due to excluding, there's no byte left, in that case + // return. + if(exc_defined_bytes==0) + { + byte=-1; + return; + } + + symb_cump=range_decoder_decode(&rc_decoder,exc_total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=exc_max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order0(); + range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + for(counter=0 ; counter!= 256 ; ++counter) + { + if(symb_cump>=1; //divide by a factor of 2 + if(order1_array[o1_byte][counter]==0) + order1_array[o1_byte][counter]=1; //don't let it have a 0 count + } + + if(order1_array[o1_byte][counter]!=0) //see if it has a non zero probability + order1_defined_bytes_array[o1_byte]++; + + order1_max_cump_array[o1_byte]+=order1_array[o1_byte][counter]; //sum to the total cump + } +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +void ppmc_decode_order1(void) +{ + unsigned long current_cump, counter; + + + // First check if current order-1 table is empty + if(order1_defined_bytes_array[o1_byte]==0) //it's empty + { + byte=-1; //byte not coded, nothing done + return; + } + + + // Get the total cump needed for decoding symbol + ppmc_get_totf_order1(); //total cump needed for decoding + + // It's possible that due to excluding, there's no byte left, in that case + // return. + if(exc_defined_bytes==0) + { + byte=-1; + return; + } + + symb_cump=range_decoder_decode(&rc_decoder,exc_total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=exc_max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order1(); + range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob); + + + // Now update "exclude" table + for(counter=0;counter!=256;++counter) + if(order1_array[o1_byte][counter]!=0) + excluded[counter]=1; //occurred but was not code, now exclude + + // Mark as escape code (in fact nothing coded) + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + for(counter=0 ; counter!= 256 ; ++counter) + { + if(symb_cumpbyte]==0) + { + exc_defined_bytes++; + exc_max_cump+=node->freq; //add the probability of this byte to the cump + } + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + + // Total cump is current total cump plus the probability for the escape code + exc_total_cump=exc_max_cump+exc_defined_bytes; + +} + + +// Codes a byte under order-2 and returns 1. +// Otherwise it returns a 0. It may be that it has coded an escape code, or +// that current table was empty. +// +// Returns: 1 in case a byte was coded. 0 in case of escape code or empty table. +// In case the byte is coded under this context, coded_in_order=2. +char ppmc_code_byte_order2(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Initialize o2_cntxt + + o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte); + + + // First check if current order-2 context is empty + if(order2_array[o2_cntxt].defined_bytes==0) //it's empty + { + return 0; //byte not coded, nothing done + } + + + // Now try to code this byte under order-2 + + ppmc_get_totf_order2(); //get total cump + + + // It's possible that due to exclusion, there's no byte left, in that case + // return. + if(exc_defined_bytes==0) + return 0; + + + // See if the byte is present and compute its cump at the same time + + node=order2_array[o2_cntxt].prob; //pointer to first element in the linked list + + symb_cump=0; //the first symbol always has a 0 cump + + + // Now search the byte in the linked list + + do{ + if(node->byte==byte) + goto ppmc_o2_byte_found; //bad thing, I know, anyone has a better idea? + if(excluded[node->byte]==0) + symb_cump+=node->freq; //add the probability of this byte to the cump + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + + // If we reach that point, the byte was not found in the linked list + // so we don't need the cump, we have to output an escape code, whose + // probabilities are know using the context structure in the table. + + // Byte was not present in the linked list, current node is the last one, + // and that's the node needed for creating a new node, save it. + + o2_ll_node=node; + + // Now get the probability and cump of the escape code + + symb_cump=exc_max_cump; + symb_prob=exc_defined_bytes; + + // Code the escape code + range_coder_encode(&rc_coder,exc_total_cump,symb_cump,symb_prob); + + // Then, update "excluded" table + + node=order2_array[o2_cntxt].prob; + + do{ + excluded[node->byte]=1; + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + return 0; //now exit + + + // That code is executed when the byte is found in the linked list + + ppmc_o2_byte_found: + + + // Everything has been tested, now we can feel free to code the byte, + // the symb_cump is already computed, now get its probability and code + // the byte, also save pointer to this element in the linked lists for + // updating. + + coded_in_order=2; //successfully coded under order-2 + + o2_ll_node=node; //save it for updating + + symb_prob=node->freq; //get the probability of the byte + + // Code it. + + range_coder_encode(&rc_coder,exc_total_cump,symb_cump,symb_prob); + + return 1; //byte coded under order-2 +} + + +// This functions update order-2 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +// Of course "o2_ll_node" must point to the element to update or the last one, +// based on the "coded_in_order" and checking the pointer of the element we +// can decide what to do. +// +// This updating is only for encoding. +void ppmc_update_order2(void) +{ + struct _byte_and_freq *node; + + + // First of all check if that's the first byte in this context, in that case + // we have to initialize some variables in the context structure. + + if(order2_array[o2_cntxt].defined_bytes==0) //no byte defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + order2_array[o2_cntxt].defined_bytes=1; + order2_array[o2_cntxt].max_cump=1; + order2_array[o2_cntxt].prob=_bytes_pool; + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + // The byte was coded under order two, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==2) //coded under order-2 + { + + // Update its count and variables of this context and check for renormalization + + o2_ll_node->freq++; //increment its frequency (rather probability) + + order2_array[o2_cntxt].max_cump++; //total cump + + if(o2_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order2(); //renormalize + + } + else + { + + // Once every paranoid check has been done we are sure that this byte + // did not existed and so we have to create a new node in the linked + // list. Also we have to take care of memory issues. + // + // However due to the use of exclusion, we have to ensure that "o2_ll_node" + // points to the last element in the linked lists of this context + + node=order2_array[o2_cntxt].prob; //get pointer to linked list + + while(1) + { + if(node->next==0) //check for the end of the linked list + break; + node=node->next; //next node + } + + o2_ll_node=node; + + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + o2_ll_node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + order2_array[o2_cntxt].max_cump++; //total cump + order2_array[o2_cntxt].defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + +} + + +// This functions renormalizes the probabilities at order-2 updating context +// variables. +void ppmc_renormalize_order2(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + // Initialize variables. Defined bytes remain the same. + order2_array[o2_cntxt].max_cump=0; //clear them + + node=order2_array[o2_cntxt].prob; //get pointer to lined lists + + // Divide all the probabilities by 2 and update context variables + while(1) + { + node->freq>>=1; //divide by a factor of 2 + + if(node->freq==0) //don't allow a probability to be 0 + node->freq=1; + + order2_array[o2_cntxt].max_cump+=node->freq; //sum to the total cump + + if(node->next==0) //last element + break; + node=node->next; + } + + + //printf("\nRenormalization, context:%c%c",o2_byte,o1_byte); + +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +// +// If we decode an escape code, we don't modify "o2_ll_node" and thus its +// work of the updating routine to read till the end of the linked list +// (for adding a new element) however if we decode a byte, we save on +// "o2_ll_node" the pointer to its node. (so updating is faster) +void ppmc_decode_order2(void) +{ + unsigned long current_cump, counter; + struct _byte_and_freq *node; + + // Initialize o2_cntxt + + o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte); + + + // First check if current order-2 context is empty + if(order2_array[o2_cntxt].defined_bytes==0) //it's empty + { + byte=-1; //byte not coded, nothing done + return; + } + + + // Get the total cump needed for decoding symbol + ppmc_get_totf_order2(); //total cump needed for decoding + + + // It's possible that due to exclusion, there's no byte left, in that case + // return. + if(exc_defined_bytes==0) + { + byte=-1; + return; //byte not coded, nothing done + } + + + symb_cump=range_decoder_decode(&rc_decoder,exc_total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=exc_max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order2(); + range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + // Then, update "excluded" table + + node=order2_array[o2_cntxt].prob; + + do{ + excluded[node->byte]=1; + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + node=order2_array[o2_cntxt].prob; //get pointer to linked lists + + while(1) + { + if(excluded[node->byte]==0) //only if it's not excluded + current_cump+=node->freq; //update cump + if(symb_cumpnext; //next element + //we have no need to check for the last symbol, we'll never read further + //the end of the linked lists, before we'll found the last byte. + } + + + //read byte value and probability + + symb_prob=node->freq; //get the probability for updating the state + byte=node->byte; //get byte + o2_ll_node=node; //used for updating + + + // Get the cump of byte + + symb_cump=current_cump-symb_prob; + + // Update coding state + + range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob); + + // Update coded_in_order used for update exclusion + + coded_in_order=2; + + return; + } + +} + + +// This is the routine for updating while decoding. We have to search the byte +// in the linked list, if it's present, update its count, otherwise we have +// hitted the end of the linked list, and there we have to create a new node. +// +// Of course if the byte was matched in order-2 we'll have a pointer to it +// in "o2_ll_node" so we don't need to read the linked list. (we already did +// in decoding) +// +// Another case which we also have to specially deal with, this is the case +// when the context has not been initalized yet. +void ppmc_update_dec_order2(void) +{ + struct _byte_and_freq *node; + + + // Handle the case when the context is not initialized + // This code is the same as the one for the encoding. + + if(order2_array[o2_cntxt].defined_bytes==0) //no byte defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + order2_array[o2_cntxt].defined_bytes=1; + order2_array[o2_cntxt].max_cump=1; + order2_array[o2_cntxt].prob=_bytes_pool; + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + + return; //nothing else to do + } + + + // Current context is initalized, proceed + + if(coded_in_order==2) //check if it was decoded under order-2 + { + + // We can be sure that the pointer "o2_ll_node" points to its entry, and + // it has a non 0 probability (otherwise it couldn't be coded) so just + // update its probability and max_cump + + o2_ll_node->freq++; //the probability of the byte + order2_array[o2_cntxt].max_cump++; //the max_cump + + if(o2_ll_node->freq==255) //check for renormalization + ppmc_renormalize_order2(); + + } + else + { + + // An escape code was decoded under order-2, we have to read till the + // end of the linked list so we can add a new node for this new byte. + + node=order2_array[o2_cntxt].prob; //get pointer to linked list + + while(1) + { + if(node->next==0) //check for the end of the linked list + break; + node=node->next; //next node + } + + + // We reached the end of the linked list, add a new node if possible, + // we are using the same code of "ppmc_update_order2()" with the + // difference that the pointer to the linked list is "node" + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + order2_array[o2_cntxt].max_cump++; //total cump + order2_array[o2_cntxt].defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //we are finished updating + + } + +} + + +// This function returns the probability for the escape codes in the variables +void ppmc_get_escape_prob_order2(void) +{ + // To understand that remember that the escape allocated for the escape code + // is above the current maximum cump and that it has a probability determined + // by the scheme C. + + symb_prob=exc_defined_bytes; + symb_cump=exc_max_cump; +} + + + +// ORDER-3 functions +// +// The difference between order-3 and order-3 are just a few, instead of +// keeping a table with the context structures, we keep a hash table with +// pointers to linked lists with the context, so it's only a matter of +// searching current context in the linked list corresponding to its hash +// entry. This is done in "ppmc_get_totf_order3" because that's the first +// routine that both encoding and decoding routines call. + + +// Returns in the variable "total_cump" the current total cump of +// order-3. Must be called while encoding or decoding before anything else +// because it initializes the pointers to the context structure in +// "o3_context" and o3_cntxt. +// +// If the hash entry is not initialized it returns "o3_context"=0 +// If the context is not present in the linked list of context, "o3_context" +// will point to the last element in the linked list. +// If the context is present "o3_context" will point to the context to use. +// One can distinguish the last two by checking the context value of the +// structure, if it's not the same, is the last element. +// +// The routine returns 0 if the hash entry is not initialized or if the +// the context was not present. Otherwise it returns 1, meaning that we +// have to code under this context. +char ppmc_get_totf_order3(void) +{ + struct context *cntxt_node; + struct _byte_and_freq *node; + + + // First make the hash key for order-3 + + o3_cntxt=ppmc_order3_hash_key(o1_byte,o2_byte,o3_byte); + full_o3_cntxt=(o1_byte)+(o2_byte<<8)+(o3_byte<<16); //order-3 + + + // Now check the hash entry in the table + + if(order3_hasht[o3_cntxt]==0) //if 0, not initialized + { + + o3_context=0; //no hash entry + + return 0; //hash entry not initialized + } + + + // Now read trough the linked list of context searching current one + + cntxt_node=order3_hasht[o3_cntxt]; + + while(1) + { + + if(cntxt_node->order4321==full_o3_cntxt) //compare context + goto ppmc_gtf_cntxt_found; + + if(cntxt_node->next==0) //end of context's linked list + break; + + cntxt_node=cntxt_node->next; //next element + + } + + + // Once there the context was not found + o3_context=cntxt_node; //pointer to last element in the linked list + + return 0; //it was not present + + + // The context is found, so return pointer and cump + + ppmc_gtf_cntxt_found: + + o3_context=cntxt_node; + + // Read the whole linked list for making the values + node=o3_context->prob; + exc_max_cump=0; + exc_defined_bytes=0; + + do{ + if(excluded[node->byte]==0) + { + exc_defined_bytes++; + exc_max_cump+=node->freq; //add the probability of this byte to the cump + } + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + + // Total cump is current total cump plus the probability for the escape code + exc_total_cump=exc_max_cump+exc_defined_bytes; + + + return 1; //context found + +} + + +// Codes a byte under order-3 and returns 1. +// Otherwise it returns a 0. +// +// In case the byte is coded under this context, coded_in_order=3. +char ppmc_code_byte_order3(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order3()==0) + return 0; + + + // It's possible that due to exclusion, there's no byte left, in that case + // return. + if(exc_defined_bytes==0) + return 0; + + + // See if the byte is present and compute its cump at the same time + + node=o3_context->prob; //pointer to first element in the linked list + + symb_cump=0; //the first symbol always has a 0 cump + + + // Now search the byte in the linked list + + do{ + if(node->byte==byte) + goto ppmc_o3_byte_found; //bad thing, I know, anyone has a better idea? + if(excluded[node->byte]==0) + symb_cump+=node->freq; //add the probability of this byte to the cump + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + + // If we reach that point, the byte was not found in the linked list + // so we don't need the cump, we have to output an escape code, whose + // probabilities are know using the context structure in the table. + + // Byte was not present in the linked list, current node is the last one, + // and that's the node needed for creating a new node, save it. + + o3_ll_node=node; + + // Now get the probability and cump of the escape code + + symb_cump=exc_max_cump; + symb_prob=exc_defined_bytes; + + // Code the escape code + range_coder_encode(&rc_coder,exc_total_cump,symb_cump,symb_prob); + + // Then, update "excluded" table + + node=o3_context->prob; + + do{ + excluded[node->byte]=1; + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + return 0; + + + // That code is executed when the byte is found in the linked list + + ppmc_o3_byte_found: + + + // Everything has been tested, now we can feel free to code the byte, + // the symb_cump is already computed, now get its probability and code + // the byte, also save pointer to this element in the linked lists for + // updating. + + coded_in_order=3; //successfully coded under order-3 + + o3_ll_node=node; //save it for updating + + symb_prob=node->freq; //get the probability of the byte + + // Code it. + + range_coder_encode(&rc_coder,exc_total_cump,symb_cump,symb_prob); + + return 1; //byte coded under order-3 +} + + +// This functions update order-3 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +// +// "o3_ll_node" must point to the element to update or the last one, +// based on the "coded_in_order" and checking the pointer of the element we +// can decide what to do. Also "o3_context" must be initialized. +// +// This updating is only for encoding. +void ppmc_update_order3(void) +{ + struct _byte_and_freq *node; + + + // First thing first, check if the hash entry is initialized + + if(order3_hasht[o3_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order3_hasht[o3_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + + + // The byte was coded under order three, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==3) //coded under order-3 + { + + // Update its count and variables of this context and check for renormalization + + o3_ll_node->freq++; //increment its frequency (rather probability) + + o3_context->max_cump++; //total cump + + if(o3_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order3(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o3_context" points to the last element, so we can put the new element. + + if(o3_context->order4321==full_o3_cntxt) //chech if that's the last + { //element or a context found + + // However due to the use of exclusion, we have to ensure that "o3_ll_node" + // points to the last element in the linked lists of this context + + node=o3_context->prob; //get pointer to linked list + + while(1) + { + if(node->next==0) //check for the end of the linked list + break; + node=node->next; //next node + } + + o3_ll_node=node; + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + o3_ll_node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o3_context->max_cump++; //total cump + o3_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // Ensure that we are at the end of the linked list of contexts + o3_context=order3_hasht[o3_cntxt]; + + do{ + if(o3_context->next==0) + break; + o3_context=o3_context->next; + }while(1); + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o3_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + +// This functions renormalizes the probabilities at order-3 updating context +// variables. +void ppmc_renormalize_order3(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Initialize variables. Defined bytes remain the same. + o3_context->max_cump=0; //clear them + + node=o3_context->prob; //get pointer to lined lists + + // Divide all the probabilities by 2 and update context variables + while(1) + { + node->freq>>=1; //divide by a factor of 2 + + if(node->freq==0) //don't allow a probability to be 0 + node->freq=1; + + o3_context->max_cump+=node->freq; //sum to the total cump + + if(node->next==0) //last element + break; + node=node->next; + } + + +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +// +// If we decode an escape code, we don't modify "o3_ll_node" and thus its +// work of the updating routine to read till the end of the linked list +// (for adding a new element) however if we decode a byte, we save on +// "o3_ll_node" the pointer to its node. (so updating is faster) +void ppmc_decode_order3(void) +{ + unsigned long current_cump, counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order3()==0) + { + byte=-1; + return; + } + + // It's possible that due to exclusion, there's no byte left, in that case + // return. + if(exc_defined_bytes==0) + { + byte=-1; + return; + } + + + + // Decode current cump + + symb_cump=range_decoder_decode(&rc_decoder,exc_total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=exc_max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order3(); + range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + // Then, update "excluded" table + + node=o3_context->prob; + + do{ + excluded[node->byte]=1; + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + node=o3_context->prob; //get pointer to linked lists + + while(1) + { + if(excluded[node->byte]==0) + current_cump+=node->freq; //update cump + if(symb_cumpnext; //next element + //we have no need to check for the last symbol, we'll never read further + //the end of the linked lists, before we'll found the last byte. + } + + + //read byte value and probability + + symb_prob=node->freq; //get the probability for updating the state + byte=node->byte; //get byte + o3_ll_node=node; //used for updating + + + // Get the cump of byte + + symb_cump=current_cump-symb_prob; + + // Update coding state + + range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob); + + // Update coded_in_order used for update exclusion + + coded_in_order=3; + + return; + } + +} + + +// This is the routine for updating while decoding. The only difference with +// the routine for coding is that when an escape code was coded, "o3_ll_node" +// is not initialized so we have to read till the end of the linked list. +// Fortunately "o3_context" will be initialized so we don't need to read its +// linked list. +void ppmc_update_dec_order3(void) +{ + struct _byte_and_freq *node; + + // First thing first, check if the hash entry is initialized + + if(order3_hasht[o3_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order3_hasht[o3_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + // The byte was coded under order three, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==3) //coded under order-3 + { + + // Update its count and variables of this context and check for renormalization + + o3_ll_node->freq++; //increment its frequency (rather probability) + + o3_context->max_cump++; //total cump + + if(o3_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order3(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o3_context" points to the last element, so we can put the new element. + + if(o3_context->order4321==full_o3_cntxt) //chech if that's the last + { //element or the a context found + + // Read till the end of the linked list + + node=o3_context->prob; //get pointer to linked list + + while(1) + { + if(node->next==0) //check for the end of the linked list + break; + node=node->next; //next node + } + + // Now add element + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o3_context->max_cump++; //total cump + o3_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o3_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool."); + exit(1); + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + + +// This function returns the probability for the escape codes in the variables +void ppmc_get_escape_prob_order3(void) +{ + // To understand that remember that the escape allocated for the escape code + // is above the current maximum cump and that it has a probability determined + // by the scheme C. + + symb_prob=exc_defined_bytes; + symb_cump=exc_max_cump; +} + + + +// ORDER-4 functions +// +// The routines for order-4 are *equal* to those for order-3, there are a few +// changes like different global variables, and different hash keys. +// +// If you want to go to higher orders, you'd use the same code and data +// structures, with the difference of the context bytes (order4321) +// stored in every context's linked list. + + +// Returns in the variable "total_cump" the current total cump of +// order-4. Must be called while encoding or decoding before anything else +// because it initializes the pointers to the context structure in +// "o4_context" and o4_cntxt. +// +// If the hash entry is not initialized it returns "o4_context"=0 +// If the context is not present in the linked list of context, "o4_context" +// will point to the last element in the linked list. +// If the context is present "o4_context" will point to the context to use. +// One can distinguish the last two by checking the context value of the +// structure, if it's not the same, is the last element. +// +// The routine returns 0 if the hash entry is not initialized or if the +// the context was not present. Otherwise it returns 1, meaning that we +// have to code under this context. +char ppmc_get_totf_order4(void) +{ + struct context *cntxt_node; + struct _byte_and_freq *node; + + + // First make the hash key for order-4 + + o4_cntxt=ppmc_order4_hash_key(o1_byte,o2_byte,o3_byte,o4_byte); + full_o4_cntxt=(o1_byte)+(o2_byte<<8)+(o3_byte<<16)+(o4_byte<<24); //order-4 + + + // Now check the hash entry in the table + + if(order4_hasht[o4_cntxt]==0) //if 0, not initialized + { + + o4_context=0; //no hash entry + + return 0; //hash entry not initialized + } + + + // Now read trough the linked list of context searching current one + + cntxt_node=order4_hasht[o4_cntxt]; + + while(1) + { + + if(cntxt_node->order4321==full_o4_cntxt) //compare context + goto ppmc_gtfo4_cntxt_found; + + if(cntxt_node->next==0) //end of context's linked list + break; + + cntxt_node=cntxt_node->next; //next element + + } + + + // Once there the context was not found + o4_context=cntxt_node; //pointer to last element in the linked list + + return 0; //it was not present + + + // The context is found, so return pointer and cump + + ppmc_gtfo4_cntxt_found: + + o4_context=cntxt_node; + + // Read the whole linked list for making the values + node=o4_context->prob; + exc_max_cump=0; + exc_defined_bytes=0; + + do{ + if(excluded[node->byte]==0) + { + exc_defined_bytes++; + exc_max_cump+=node->freq; //add the probability of this byte to the cump + } + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + + // Total cump is current total cump plus the probability for the escape code + exc_total_cump=exc_max_cump+exc_defined_bytes; + + + return 1; //context found + +} + + +// Codes a byte under order-4 and returns 1. +// Otherwise it returns a 0. +// +// In case the byte is coded under this context, coded_in_order=4. +char ppmc_code_byte_order4(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order4()==0) + return 0; + + + // It's possible that due to exclusion, there's no byte left, in that case + // return. + if(exc_defined_bytes==0) + return 0; + + + // See if the byte is present and compute its cump at the same time + + node=o4_context->prob; //pointer to first element in the linked list + + symb_cump=0; //the first symbol always has a 0 cump + + + // Now search the byte in the linked list + + do{ + if(node->byte==byte) + goto ppmc_o4_byte_found; //bad thing, I know, anyone has a better idea? + if(excluded[node->byte]==0) + symb_cump+=node->freq; //add the probability of this byte to the cump + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + + // If we reach that point, the byte was not found in the linked list + // so we don't need the cump, we have to output an escape code, whose + // probabilities are know using the context structure in the table. + + // Byte was not present in the linked list, current node is the last one, + // and that's the node needed for creating a new node, save it. + + o4_ll_node=node; + + // Now get the probability and cump of the escape code + + symb_cump=exc_max_cump; + symb_prob=exc_defined_bytes; + + // Code the escape code + range_coder_encode(&rc_coder,exc_total_cump,symb_cump,symb_prob); + + // Then, update "excluded" table + + node=o4_context->prob; + + do{ + excluded[node->byte]=1; + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + return 0; + + + // That code is executed when the byte is found in the linked list + + ppmc_o4_byte_found: + + + // Everything has been tested, now we can feel free to code the byte, + // the symb_cump is already computed, now get its probability and code + // the byte, also save pointer to this element in the linked lists for + // updating. + + coded_in_order=4; //successfully coded under order-4 + + o4_ll_node=node; //save it for updating + + symb_prob=node->freq; //get the probability of the byte + + // Code it. + + range_coder_encode(&rc_coder,exc_total_cump,symb_cump,symb_prob); + + return 1; //byte coded under order-4 +} + + +// This functions update order-4 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +// +// "o4_ll_node" must point to the element to update or the last one, +// based on the "coded_in_order" and checking the pointer of the element we +// can decide what to do. Also "o4_context" must be initialized. +// +// This updating is only for encoding. +void ppmc_update_order4(void) +{ + + // First thing first, check if the hash entry is initialized + + if(order4_hasht[o4_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order4_hasht[o4_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool."); + exit(1); + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + + + // The byte was coded under order three, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==4) //coded under order-4 + { + + // Update its count and variables of this context and check for renormalization + + o4_ll_node->freq++; //increment its frequency (rather probability) + + o4_context->max_cump++; //total cump + + if(o4_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order4(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o4_context" points to the last element, so we can put the new element. + + if(o4_context->order4321==full_o4_cntxt) //chech if that's the last + { //element or the a context found + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + o4_ll_node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o4_context->max_cump++; //total cump + o4_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o4_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool."); + exit(1); + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + +// This functions renormalizes the probabilities at order-4 updating context +// variables. +void ppmc_renormalize_order4(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Initialize variables. Defined bytes remain the same. + o4_context->max_cump=0; //clear them + + node=o4_context->prob; //get pointer to lined lists + + // Divide all the probabilities by 2 and update context variables + while(1) + { + node->freq>>=1; //divide by a factor of 2 + + if(node->freq==0) //don't allow a probability to be 0 + node->freq=1; + + o4_context->max_cump+=node->freq; //sum to the total cump + + if(node->next==0) //last element + break; + node=node->next; + } + + +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +// +// If we decode an escape code, we don't modify "o4_ll_node" and thus its +// work of the updating routine to read till the end of the linked list +// (for adding a new element) however if we decode a byte, we save on +// "o4_ll_node" the pointer to its node. (so updating is faster) +void ppmc_decode_order4(void) +{ + unsigned long current_cump, counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order4()==0) + { + byte=-1; + return; + } + + + // It's possible that due to exclusion, there's no byte left, in that case + // return. + if(exc_defined_bytes==0) + { + byte=-1; + return; + } + + + // Decode current cump + + symb_cump=range_decoder_decode(&rc_decoder,exc_total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=exc_max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order4(); + range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + // Then, update "excluded" table + + node=o4_context->prob; + + do{ + excluded[node->byte]=1; + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + node=o4_context->prob; //get pointer to linked lists + + while(1) + { + if(excluded[node->byte]==0) + current_cump+=node->freq; //update cump + if(symb_cumpnext; //next element + //we have no need to check for the last symbol, we'll never read further + //the end of the linked lists, before we'll found the last byte. + } + + + //read byte value and probability + + symb_prob=node->freq; //get the probability for updating the state + byte=node->byte; //get byte + o4_ll_node=node; //used for updating + + + // Get the cump of byte + + symb_cump=current_cump-symb_prob; + + // Update coding state + + range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob); + + // Update coded_in_order used for update exclusion + + coded_in_order=4; + + return; + } + +} + + +// This is the routine for updating while decoding. The only difference with +// the routine for coding is that when an escape code was coded, "o4_ll_node" +// is not initialized so we have to read till the end of the linked list. +// Fortunately "o4_context" will be initialized so we don't need to read its +// linked list. +void ppmc_update_dec_order4(void) +{ + struct _byte_and_freq *node; + + // First thing first, check if the hash entry is initialized + + if(order4_hasht[o4_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order4_hasht[o4_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool."); + exit(1); + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + // The byte was coded under order four, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==4) //coded under order-4 + { + + // Update its count and variables of this context and check for renormalization + + o4_ll_node->freq++; //increment its frequency (rather probability) + + o4_context->max_cump++; //total cump + + if(o4_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order4(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o4_context" points to the last element, so we can put the new element. + + if(o4_context->order4321==full_o4_cntxt) //chech if that's the last + { //element or the a context found + + // Read till the end of the linked list + + node=o4_context->prob; //get pointer to linked list + + while(1) + { + if(node->next==0) //check for the end of the linked list + break; + node=node->next; //next node + } + + // Now add element + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o4_context->max_cump++; //total cump + o4_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o4_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool."); + exit(1); + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + + +// This function returns the probability for the escape codes in the variables +void ppmc_get_escape_prob_order4(void) +{ + // To understand that remember that the escape allocated for the escape code + // is above the current maximum cump and that it has a probability determined + // by the scheme C. + + symb_prob=exc_defined_bytes; + symb_cump=exc_max_cump; +} + diff --git a/examples/ppmc/exclusion/ppmc.h b/examples/ppmc/exclusion/ppmc.h new file mode 100644 index 0000000..392da20 --- /dev/null +++ b/examples/ppmc/exclusion/ppmc.h @@ -0,0 +1,134 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmc.h" (exclusions) + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + Part of the ppmc encoder and decoder. + + This module contains the definitions of different functions and all the + data structures defined by ppmc. Also contains defines. +*/ + +// Definitions + +#define ppmc_order4_hash_size 65536 +#define ppmc_order4_hash_key(k,j,i,l) ( (k)+(j<<8)+(i<<1)+(l<<9) )& ppmc_order4_hash_size-1 +#define ppmc_order3_hash_size 65536 +#define ppmc_order3_hash_key(k,j,i) ((k)+(j<<7)+(i<<11)) & ppmc_order3_hash_size-1 +#define ppmc_order2_hash_key(k,j) ((k)+(j<<8)) +#define _bytes_pool_elements 125000 //this is used the first time + //that we allocate memory, that's + //the number of entries +#define _bytes_pool_elements_inc 125000 //if we need to alloc again, this + //is the number of entries to get +#define _context_pool_elements 50000 +#define _context_pool_elements_inc 50000 + +#define _mempool_max_index 1000 //the number of entries in the array with + //pointers + + +// Data structures + +// This structure contains a single element of a linked lists which contains +// the probability distribution of a given order. This structure takes 6 bytes. +struct _byte_and_freq{ +unsigned char byte; //the byte itself +unsigned char freq; //and the frequency of it +struct _byte_and_freq *next; //pointer to next element in linked list or 0 +}; + + +// This structure is used for both order-3 and order-4. It takes 20 bytes, +// and it can still hold another byte more. (only 19 being used) +// Order 2-1-0-(-1) use different structures for a faster accessing. +struct context{ +struct context *next; //next context in the hash entry +unsigned long order4321; //order-4-3-2-1 (or order-3-2-1 for order-3) +struct _byte_and_freq *prob; //pointer to linked lists containing probability distribution +unsigned int max_cump; //maximum cumulative probability (can't exceed (2^16)-1 ) +unsigned int defined_bytes; //the number of bytes in this context +}; + +// That's the same but for order-2 where there's no hash collisions. +struct context_o2{ +struct _byte_and_freq *prob; //pointer to linked lists containing probability distribution +unsigned int max_cump; //maximum cumulative probability (can't exceed (2^16)-1 ) +unsigned int defined_bytes; //the number of bytes in this context +}; + + +// Declaration of functions + + +// Functions for initializing +void ppmc_alloc_memory(void); +void ppmc_initialize_contexts(void); +void ppmc_encoder_initialize(void); +void ppmc_decoder_initialize(void); +void ppmc_free_memory(void); +void ppmc_flush_mem_enc(void); +void ppmc_flush_mem_dec(void); + +// Functions for order-(-1) +void ppmc_get_prob_ordern1(void); +unsigned long ppmc_get_symbol_ordern1(void); +void ppmc_get_totf_ordern1(void); +void ppmc_renormalize_order1(void); + +// Functions for order-0 +void ppmc_get_totf_order0(void); +char ppmc_code_byte_order0(void); +void ppmc_update_order0(void); +void ppmc_renormalize_order0(void); +void ppmc_decode_order0(void); +void ppmc_get_escape_prob_order0(void); +void ppmc_get_prob_order0(void); + +// Functions for order-1 +void ppmc_get_totf_order1(void); +char ppmc_code_byte_order1(void); +void ppmc_update_order1(void); +void ppmc_renormalize_order1(void); +void ppmc_decode_order1(void); +void ppmc_get_escape_prob_order1(void); +void ppmc_get_prob_order1(void); + + +// Functions for order-2 +void ppmc_get_totf_order2(void); +char ppmc_code_byte_order2(void); +void ppmc_update_order2(void); +void ppmc_renormalize_order2(void); +void ppmc_decode_order2(void); +void ppmc_update_dec_order2(void); +void ppmc_get_escape_prob_order2(void); +void ppmc_get_prob_order2(void); + + +// Functions for order-3 +char ppmc_get_totf_order3(void); +char ppmc_code_byte_order3(void); +void ppmc_update_order3(void); +void ppmc_renormalize_order3(void); +void ppmc_decode_order3(void); +void ppmc_update_dec_order3(void); +void ppmc_get_escape_prob_order3(void); +void ppmc_get_prob_order3(void); + + +// Functions for order-4 +char ppmc_get_totf_order4(void); +char ppmc_code_byte_order4(void); +void ppmc_update_order4(void); +void ppmc_renormalize_order4(void); +void ppmc_decode_order4(void); +void ppmc_update_dec_order4(void); +void ppmc_get_escape_prob_order4(void); +void ppmc_get_prob_order4(void); + + + diff --git a/examples/ppmc/exclusion/ppmcdata.c b/examples/ppmc/exclusion/ppmcdata.c new file mode 100644 index 0000000..6bec0cf --- /dev/null +++ b/examples/ppmc/exclusion/ppmcdata.c @@ -0,0 +1,132 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmcdata.c" (exclusions) + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Part of the ppmc encoder and decoder. + + This module contains global data. +*/ + +#include "ppmc.h" //defines +#include "range.h" + +// Order-4 uses a hash table which points to the start of a linked list with +// the different context, which has the cump, the number of different symbols +// and a pointer to the linked list with the bytes and frequencies. +// Order-3 is almost the same, both take 262144 bytes. +struct context *order4_hasht[ppmc_order4_hash_size]; + +struct context *order3_hasht[ppmc_order3_hash_size]; + + +// The array for order-2 is different, as we do directly hashing, and thus +// we have no need to do the stuff of linked lists for the context itself, +// so it contains the context used. This takes 1310720 bytes. +struct context_o2 order2_array[65536]; + + +// Those are the multiple arrays for order-1. It takes 65536 bytes. +unsigned char order1_array[256][256]; +unsigned int order1_defined_bytes_array[256]; //the defined bytes in every context +unsigned int order1_max_cump_array[256]; //max cump of every context + + +// This is the array for order-0. It takes 256 bytes. +unsigned char order0_array[256]; +unsigned int order0_defined_bytes; +unsigned int order0_max_cump; + + +// No need of variables for order-(-1), because it's fixed. + + + +// Those are the pointers and variables used for managing the mem pool for +// both context, and bytes and frequencies. +struct _byte_and_freq *_bytes_pool, //pointer to pool containing linked + //lists with bytes and frequencies + *_bytes_pool_max; //the maximum of this buffer +struct context *_context_pool; //pointer to pool containing contexts +struct context *_context_pool_max; //the same as with _bytes_pool + +unsigned long _bytes_pool_index; //index in array of pointers +unsigned long _context_pool_index; + +//the following is an array keeping pointers to different buffers. A new +//buffer is allocated when the current one is full, so we always have a +//buffer for linked lists. (without allocating a buffer for every element) +struct _byte_and_freq *_bytes_pool_array[_mempool_max_index]; +struct context *_context_pool_array[_mempool_max_index]; + +char ppmc_out_of_memory; //0 if we have enough memory, 1 instead, any + //routine that needs to allocate memory must + //quit if that's 1. + + +// Variables which contain current byte to code and order +unsigned long byte, //current byte to code + o1_byte, //order-1 byte + o2_byte, //order-2 byte + o3_byte, //order-3 byte + o4_byte; //order-4 byte + +unsigned long o2_cntxt; //used in the hash key of order-2 +unsigned long o3_cntxt; //use as hash key for order-3 +unsigned long o4_cntxt; //use as hash key for order-4 +unsigned long full_o3_cntxt; //o1_byte, o2_byte and o3_byte together +unsigned long full_o4_cntxt; //order-4-3-2-1 + +unsigned long coded_in_order; //in which order the last byte was coded + //it's for update exclusion + //also used for decoding + +// Variables used for coding + +unsigned long + total_cump, //the total cumulative probability + symb_cump, //the symbol cumulative probability + symb_prob; //the symbol frequency + +rangecoder rc_coder; //state of range coder +rangecoder rc_decoder; //state of range decoder + +// File handles + + FILE *file_input, //file to code + *file_output; //file where the coded data is placed + + +// Pointers to linked lists and context structures used for faster updating +// or creation of new nodes, because instead of reading again all the linked +// list till the end (in the case of creation) we have a pointer to the last +// element. In the case that a byte was present in the linked lists but it +// had a 0 count, we just have to update its probability. And in the case +// that it already was present and we coded it under that context or a lower +// one, we just have to update its probability. + +struct _byte_and_freq *o2_ll_node; //pointer to linked lists under order-2 + //where does it points depends in which + //order the byte was coded. +struct _byte_and_freq *o3_ll_node; //the same but for order-3 +struct _byte_and_freq *o4_ll_node; + +struct context *o3_context; //pointer to current order-3 context +struct context *o4_context; //pointer to current order-3 context + + +// Those are the variables especially used for exclusion. + +// the array for doing exclusion. Every routine from every order can +// use them. +unsigned char excluded[256]; //if 0 it was not present, if it's 1 then + //it appeared in a higher order. + +unsigned long exc_total_cump, //total cump of context (after exclusion) + exc_defined_bytes,//defined bytes in context (after exclusion) + exc_max_cump; + diff --git a/examples/ppmc/exclusion/ppmcdata.h b/examples/ppmc/exclusion/ppmcdata.h new file mode 100644 index 0000000..3d7a2a9 --- /dev/null +++ b/examples/ppmc/exclusion/ppmcdata.h @@ -0,0 +1,128 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmcdata.h" (exclusions) + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Part of the ppmc encoder and decoder. + + This module contains externs definition for global data. +*/ + +#include "ppmc.h" + +// Order-4 uses a hash table which points to the start of a linked list with +// the different context, which has the cump, the number of different symbols +// and a pointer to the linked list with the bytes and frequencies. +// Order-3 is almost the same, both take 262144 bytes. +extern struct context *order4_hasht[]; + +extern struct context *order3_hasht[]; + + +// The array for order-2 is different, as we do directly hashing, and thus +// we have no need to do the stuff of linked lists for the context itself, +// so it contains the context used. This takes 1310720 bytes. +extern struct context_o2 order2_array[]; + + +// Those are the multiple arrays for order-1. It takes 65536 bytes. +extern unsigned char order1_array[256][256]; +extern unsigned int order1_defined_bytes_array[]; //the defined bytes in every context +extern unsigned int order1_max_cump_array[]; //max cump of every context + + +// This is the array for order-0. It takes 256 bytes. +extern unsigned char order0_array[]; +extern unsigned int order0_defined_bytes; +extern unsigned int order0_max_cump; + + +// Those are the pointers and variables used for managing the mem pool for +// both context, and bytes and frequencies. +extern struct _byte_and_freq *_bytes_pool, //pointer to pool containing linked + //lists with bytes and frequencies + *_bytes_pool_max; //the maximum of this buffer +extern struct context *_context_pool; //pointer to pool containing contexts +extern struct context *_context_pool_max; //the same as with _bytes_pool + +extern unsigned long _bytes_pool_index; //index in array of pointers +extern unsigned long _context_pool_index; + +//the following is an array keeping pointers to different buffers. A new +//buffer is allocated when the current one is full, so we always have a +//buffer for linked lists. (without allocating a buffer for every element) +extern struct _byte_and_freq *_bytes_pool_array[_mempool_max_index]; +extern struct context *_context_pool_array[_mempool_max_index]; + +extern char ppmc_out_of_memory; //0 if we have enough memory, 1 instead, any + //routine that needs to allocate memory must + //quit if that's 1. + + + +// Variables which contain current byte to code and order +extern unsigned long //they are only bytes + byte, //current byte to code + o1_byte, //order-1 byte + o2_byte, //order-2 byte + o3_byte, //order-3 byte + o4_byte; //order-4 byte + +extern unsigned long o2_cntxt; //used in the hash key of order-2 +extern unsigned long o3_cntxt; //use as hash key for order-3 +unsigned long o4_cntxt; //use as hash key for order-4 +extern unsigned long full_o3_cntxt; //o1_byte, o2_byte and o3_byte together +extern unsigned long full_o4_cntxt; //order-4-3-2-1 + +extern unsigned long coded_in_order; //in which order the last byte was coded + //it's for update exclusion + //also used for decoding +// Variables used for coding + +extern unsigned long //no need for negative values + total_cump, //the total cumulative probability + symb_cump, //the symbol cumulative probability + symb_prob; //the symbol frequency + +extern rangecoder rc_coder; //state of range coder +extern rangecoder rc_decoder; //state of range decoder + +// File handles + + FILE *file_input, //file to code + *file_output; //file where the coded data is placed + + + +// Pointers to linked lists and context structures used for faster updating +// or creation of new nodes, because instead of reading again all the linked +// list till the end (in the case of creation) we have a pointer to the last +// element. In the case that a byte was present in the linked lists but it +// had a 0 count, we just have to update its probability. And in the case +// that it already was present and we coded it under that context or a lower +// one, we just have to update its probability. + + +extern struct _byte_and_freq *o2_ll_node;//pointer to linked lists under order-2 + //where does it points depends in which + //order the byte was coded. +extern struct _byte_and_freq *o3_ll_node; //the same but for order-3 +extern struct _byte_and_freq *o4_ll_node; + +extern struct context *o3_context; //pointer to current order-3 context +extern struct context *o4_context; //pointer to current order-3 context + + +// the array for doing exclusion. Every routine from every order can +// use them. +extern unsigned char excluded[256]; //if 0 it was not present, if it's 1 then + //it appeared in a higher order. + +extern unsigned long exc_total_cump, //total cump of context (after exclusion) + exc_defined_bytes,//defined bytes in context (after exclusion) + exc_max_cump; + diff --git a/examples/ppmc/exclusion/ppmcmain.c b/examples/ppmc/exclusion/ppmcmain.c new file mode 100644 index 0000000..5169de3 --- /dev/null +++ b/examples/ppmc/exclusion/ppmcmain.c @@ -0,0 +1,184 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmcmain.c" (exclusions) + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Part of the ppmc encoder only. Using exclusion. + + This module is the main module and calls the different modules to do + the encoding of a file. When done prints bpb and kbyps. +*/ + + +#include +#include +#include "range.h" //the range coder functions and data +#include "ppmcdata.h" + + +long filesize(FILE *stream); + +unsigned long debug=0; + + +//Main +void main (char argc, char *argv[]) +{ + unsigned long counter, //temporal counter for loops like for or while + counter2, //another temporal counter for sub loops + size_file_input; //the size of the input file + + + // Print title + printf("PPMC using range coder. (with exclusion)\n"); + printf("Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.\n"); + printf("Permission is granted to make verbatim copies of this program for private\n"); + printf("use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.\n"); + + + + // Check for correct number of parameters + if(argc!=3) + { + printf("Bad number of arguments.\n"); + exit(1); + } + + + // Try to open input and output files + if((file_input=fopen(argv[1],"r+b"))==NULL) + { + printf("Couldn't open %s.\n",argv[1]); + exit(1); + } + + if((file_output=fopen(argv[2],"w+b"))==NULL) + { + printf("Couldn't create %s.\n",argv[2]); + exit(1); + } + + + // Check input file length and not accept 0 length files + size_file_input=filesize(file_input); + + if(size_file_input<5) + { + printf("Can't work with files below than 5 bytes!"); + exit(1); + } + + + // First output file length + fwrite(&size_file_input,1,4,file_output); //input length + + + // Initialize ppmc encoder + ppmc_alloc_memory(); //get memory + ppmc_initialize_contexts(); //initialize model + ppmc_encoder_initialize(); + + // Initialize range coder + range_coder_init(&rc_coder,file_output); + + + // Start main loop which codes the file + while((byte=fgetc(file_input))!=EOF) + { + + // Clear exclusion table + for(counter=0;counter!=256;++counter) + excluded[counter]=0; + + + // Try to code current byte under order-4 if possible then go to lower orders + if(ppmc_code_byte_order4()==0) + if(ppmc_code_byte_order3()==0) + if(ppmc_code_byte_order2()==0) + if(ppmc_code_byte_order1()==0) + if(ppmc_code_byte_order0()==0) //else try to code under order-0 + { + // Code under order-(-1) + ppmc_get_prob_ordern1(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + coded_in_order=0; //update all the tables (unless order-(-1)) + } + + + // Now do update exclusion + + switch(coded_in_order) + { + case 0: ppmc_update_order0(); //update only order-0 + case 1: ppmc_update_order1(); //update order-0 and order-1 + case 2: ppmc_update_order2(); //update order-2 1 and 0... + case 3: ppmc_update_order3(); + case 4: ppmc_update_order4(); + default: break; + }; + + + // Update order variables + + o4_byte=o3_byte; + o3_byte=o2_byte; + o2_byte=o1_byte; + o1_byte=byte; //current one is next time order-1 + + debug++; + + // Check if we run out of memory, in that case, flush the encoder + + if(ppmc_out_of_memory==1) + { + printf("Flushing memory! Output file might be not decodable.\n"); + ppmc_flush_mem_enc(); + } + + + } + + + // Flush range coder + range_coder_flush(&rc_coder); + + // Free memory + ppmc_free_memory(); + + + // Print bpb and kbyps + printf("%s at %f bpb.\n",argv[1],((float)filesize(file_output)/(float)size_file_input)*(float)8); + + + // Close file handles + fclose(file_input); + fclose(file_output); + + + + // Nicely exit + exit(0); +} + + +// Routines not used by ppmc but rather by main. +// Not including the range coder. + + +// Returns the file size of a given file. +long filesize(FILE *stream) +{ + long curpos, length; + + curpos = ftell(stream); + fseek(stream, 0L, SEEK_END); + length = ftell(stream); + fseek(stream, curpos, SEEK_SET); + return length; +} + + diff --git a/examples/ppmc/exclusion/range.c b/examples/ppmc/exclusion/range.c new file mode 100644 index 0000000..7be4ce6 --- /dev/null +++ b/examples/ppmc/exclusion/range.c @@ -0,0 +1,221 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "range.c" (exclusion) + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + This module contains the routines of both the range coder and decoder. + + The range coder works internally in 32 bits, and uses bytes as symbols. + Also the end of message symbol is used. So z=257. + + Both input and output use rc_file as the file stream. Of course we can't + code and decode at the same time. All the input or output comes from the + same file, no matter what range coder structure are we using. The modules + here provided don't manage the io except for reading and writing, they + don't open nor close the files. The reading and writing is done via + putc and getc. +*/ + +#include "range.h" + +/* + Inits the range coder state. Must be called before encoding any symbol. + It uses a magic number 0xB3 as the first byte outputted. + -rangecoder *rc, the range coder to be used. + + Shoulde be called like that: + range_coder_init(&o0_rc_state,file_output); +*/ +void range_coder_init(rangecoder *rc, FILE *stream) +{ + rc_file=stream; + rc->low=0; //define state + rc->range=0x80000000; + rc->byte_buffer=0xB3; //magic number + rc->help=0; //temp value +} + + +/* + Encodes a symbol. + -rangecoder *rc, the range coder to be used. + -unsigned long tot_f, the maximum cumulative frequency + -unsigned long lt_f, the cumulative probabilty of the symbol + -unsigned long sy_f, the probability of the symbol +*/ +void range_coder_encode(rangecoder *rc,unsigned long tot_f, unsigned long lt_f,unsigned long sy_f) +{ + unsigned long temp, r; + + if(lt_f>tot_f) + { + printf("BUG!!"); + exit(1); + } + +// printf("\nc> %d,%d,%d ",tot_f,lt_f,sy_f); + + range_coder_renormalize(rc); //&rc? + + r=rc->range/tot_f; + temp=r*lt_f; + if(lt_f+sy_frange=r*sy_f; + else + rc->range-=temp; + rc->low+=temp; +} + +/* + Renormalizes the state when coding. + -rangecoder *rc, the range coder to be used. +*/ + +void range_coder_renormalize(rangecoder *rc) +{ + while(rc->range<=(unsigned long)0x00800000) + { + if(rc->low<(unsigned long)0x7F800000) + { + putc(rc->byte_buffer,rc_file); + for(;rc->help;rc->help--) + putc(0xFF,rc_file); + rc->byte_buffer=(unsigned char)(rc->low>>23); + } + else + { + if(rc->low&(unsigned long)0x80000000) + { + putc(rc->byte_buffer+1,rc_file); + for(;rc->help;rc->help--) + putc(0x00,rc_file); + rc->byte_buffer=(unsigned char)(rc->low>>23); + } + else + rc->help++; + } + rc->range<<=8; + rc->low=(rc->low<<8)&(unsigned long)(0x7FFFFFFF); + } +} + + +/* + Flushes the encoder. Must be called when the coding is done. + -rangecoder *rc, the range coder to be used. + + Shoulde be called like that: + range_coder_flush(&o0_rc_state); +*/ +void range_coder_flush(rangecoder *rc) +{ + unsigned long tmp; + + range_coder_renormalize(rc); + tmp = rc->low >> 23; + if (tmp > 0xff) + { + putc(rc->byte_buffer+1,rc_file); + for(; rc->help; rc->help--) + putc(0,rc_file); + } + else + { + putc(rc->byte_buffer,rc_file); + for(; rc->help; rc->help--) + putc(0xff,rc_file); + } + + putc(tmp & 0xff,rc_file); + putc((tmp = rc->low >> (23-8)) & 0xff,rc_file); +} + + +/* + Inits the range decoder state. Also checks for the magic number, and + quits in case it isn't the first, so be careful. + -rangecoder *rc, the range coder to be used. +*/ +void range_decoder_init(rangecoder *rc, FILE *stream) +{ + unsigned int _rd_c; + + rc_file=stream; + if((_rd_c=getc(rc_file))!=0xB3) + { + printf("\nThis is not range coded data. Magic number not found. Exiting."); + exit(1); + } + rc->byte_buffer=getc(rc_file); + rc->low=rc->byte_buffer>>1; + rc->range=0x80; +} + + +/* + Decode a symbol, get its cumulative probability. + Input: + -rangecoder *rc, the range coder to be used. + -unsigned long tot_f, the maximum cumulative probability + Output: + -unsigned long, cumulative probability of the current symbol + Should be called like that: + current_cump=range_decoder_decode(&o0_rc_state,o0_tot_f); +*/ +unsigned long range_decoder_decode(rangecoder *rc, unsigned long tot_f) +{ + unsigned long temp; + + range_decoder_renormalize(rc); + rc->help=rc->range/tot_f; + temp=rc->low/rc->help; + if(temp>=tot_f) + return tot_f-1; + else + return temp; +} + + +/* + Updates the state so next symbol can be decoded. + Input: + -rangecoder *rc, the range coder to be used. + -unsigned long tot_f, the maximum cumulative probability + -unsigned long lt_f, the cumulative probabilty of the symbol + -unsigned long sy_f, the probability of the symbol + +*/ +void range_decoder_update(rangecoder *rc, unsigned long tot_f, unsigned long lt_f,unsigned long sy_f) +{ + unsigned long temp; + +// printf("\nd> %d,%d,%d ",tot_f,lt_f,sy_f); + + temp=rc->help*lt_f; + rc->low-=temp; + if(lt_f+sy_frange=rc->help*sy_f; + else + rc->range-=temp; +} + + +/* + Renormalizes the state while decoding. + -rangecoder *rc, the range coder to be used. +*/ +void range_decoder_renormalize(rangecoder *rc) +{ + while(rc->range<=0x00800000) + { + rc->low=(rc->low<<8)|((rc->byte_buffer<<7)&0xFF); + rc->byte_buffer=getc(rc_file); + rc->low |= rc->byte_buffer >> (1); + rc->range<<=8; + } +} + diff --git a/examples/ppmc/exclusion/range.h b/examples/ppmc/exclusion/range.h new file mode 100644 index 0000000..23721e4 --- /dev/null +++ b/examples/ppmc/exclusion/range.h @@ -0,0 +1,39 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "range.h" (exclusions) + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Declarations for the coder. +*/ + +#include + +typedef struct{ + unsigned long low, range, help; + unsigned char byte_buffer; +}rangecoder; + +FILE *rc_file; + +void range_coder_init(rangecoder *rc, FILE *stream); //coding routines +void range_coder_encode(rangecoder *rc,unsigned long tot_f, unsigned long lt_f,unsigned long sy_f); +void range_coder_renormalize(rangecoder *rc); +void range_coder_flush(rangecoder *rc); +void range_decoder_init(rangecoder *rc, FILE *stream);//decoding routines +unsigned long range_decoder_decode(rangecoder *rc, unsigned long tot_f); +void range_decoder_update(rangecoder *rc, unsigned long tot_f, unsigned long lt_f,unsigned long sy_f); +void range_decoder_renormalize(rangecoder *rc); + + +typedef unsigned long code_value; +#define CODE_BITS 32 +#define Top_value ((code_value)1 << (CODE_BITS-1)) +#define SHIFT_BITS (CODE_BITS - 9) +#define EXTRA_BITS ((CODE_BITS-2) % 8 + 1) +#define Bottom_value (Top_value >> 8) +#define outbyte(cod,x) putc(x,stdout) +#define inbyte(cod) getc(stdin) diff --git a/examples/ppmc/exclusion/unppmc.c b/examples/ppmc/exclusion/unppmc.c new file mode 100644 index 0000000..bfb3aaf --- /dev/null +++ b/examples/ppmc/exclusion/unppmc.c @@ -0,0 +1,209 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "unppmc.c" (exclusion) + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Part of the ppmc decoder. + + This module is the main module and calls the different modules to do + the decoding of a file. When done prints kbyps. +*/ + + +// Bibliotecas necesarias +#include +#include +#include "range.h" //the range coder functions and data +#include "ppmcdata.h" + + +// Declaracion de funciones del ppmcmain.c +long filesize(FILE *stream); + + + + +//Main +void main (int argc, char *argv[]) +{ + unsigned long counter, //temporal counter for loops like for or while + counter2, //another temporal counter for sub loops + size_file_output, //the size of the output file + main_counter; //used in main + char expected_flush=0; //used for checking flushing which can't be done + + + // Print title and version. + printf("UNPPMC using range coder.\n"); + printf("Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.\n"); + printf("Permission is granted to make verbatim copies of this program for private\n"); + printf("use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.\n"); + + + // Check for correct number of parameters + if(argc!=3) + { + printf("Bad number of arguments.\n"); + exit(1); + } + + + // Try to open input and output files + if((file_input=fopen(argv[1],"r+b"))==NULL) + { + printf("Couldn't open %s.\n",argv[1]); + exit(1); + } + + if((file_output=fopen(argv[2],"w+b"))==NULL) + { + printf("Couldn't create %s.\n",argv[2]); + exit(1); + } + + + // Get output length + fread(&size_file_output,1,4,file_input); + + + // Initialize ppmc decoder + ppmc_alloc_memory(); + ppmc_initialize_contexts(); + ppmc_decoder_initialize(); + + + + // Initialize decoder + range_decoder_init(&rc_decoder,file_input); + + + // Start main loop which decodes the file + main_counter=size_file_output-4; //take in account the bytes already written + expected_flush=0; //we don't expect a flush yet + + while(main_counter!=0) + { + + // Clear exclusion table + for(counter=0;counter!=256;++counter) + excluded[counter]=0; + +// Try to decode current byte in order-4 if possible, else in lower ones +ppmc_decode_order4(); +if(byte==-1) + ppmc_decode_order3(); + if(byte==-1) + { + ppmc_decode_order2(); + if(byte==-1) + { + ppmc_decode_order1(); + if(byte==-1) + { + ppmc_decode_order0(); + if(byte==-1) //check if it was an escape code + { + // Decode in order-(-1) + ppmc_get_totf_ordern1(); + symb_cump=range_decoder_decode(&rc_decoder,total_cump); + byte=ppmc_get_symbol_ordern1(); + ppmc_get_prob_ordern1(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + coded_in_order=0; //update all orders + + // Now see if it's the code of flushing + + if(symb_cump==256) + { + printf("Flushing.\n"); + ppmc_flush_mem_dec(); + expected_flush=0; + continue; //do not output byte nor update + } + + } + } + } + } + + // Output byte and update model + + fputc(byte,file_output); + + switch(coded_in_order) //update exclusion + { + case 0: ppmc_update_order0(); //update only order-0 + case 1: ppmc_update_order1(); //update order-0 and order-1 + case 2: ppmc_update_dec_order2(); //update order-0 1 and 2 + case 3: ppmc_update_dec_order3(); + case 4: ppmc_update_dec_order4(); + default: break; + }; + + + // Check if flushing has to be done and has not been done. + // This is optional, in case you limit the memory usage, you don't + // need to include this + + if(expected_flush==1) // If flushing didn't happen, we can't decode + { + printf("Can't decompress file. Not enough memory.\nTry in a machine with more memory.\n"); + exit(1); + } + if(ppmc_out_of_memory==1) + { + expected_flush=1; // Next code must be a flush code, otherwise we don't + // have enough memory, and therefore we can't decode + } + + + // Update order variables + + o4_byte=o3_byte; + o3_byte=o2_byte; + o2_byte=o1_byte; + o1_byte=byte; //current one, is next time order-1 + + // Byte decoded and model updated, loop + main_counter--; + +//printf("\n%d",size_file_output-main_counter); + + } + + + ppmc_free_memory(); + + // Close file handles and free memory + fclose(file_input); + fclose(file_output); + + + + + // Nicely exit + exit(0); +} + + +// Ruotines not used by ppmc but rather by main. +// Not including the range coder. + + +// Returns the file size of a given file. +long filesize(FILE *stream) +{ + long curpos, length; + + curpos = ftell(stream); + fseek(stream, 0L, SEEK_END); + length = ftell(stream); + fseek(stream, curpos, SEEK_SET); + return length; +} + + diff --git a/examples/ppmc/luca/Makefile b/examples/ppmc/luca/Makefile new file mode 100644 index 0000000..887aa40 --- /dev/null +++ b/examples/ppmc/luca/Makefile @@ -0,0 +1,18 @@ + +TARGETS=ppmc unppmc +COMMON= ppmc.o ppmcdata.o range.o + +CFLAGS=-O3 -Wall -DNDEBUG + +all: $(TARGETS) + +ppmc: $(COMMON) ppmcmain.c + +unppmc: $(COMMON) unppmc.c + +clean: + + @$(RM) -f *.o $(TARGETS) + +.PHONY: all clean + diff --git a/examples/ppmc/luca/ppmc.c b/examples/ppmc/luca/ppmc.c new file mode 100644 index 0000000..371c39e --- /dev/null +++ b/examples/ppmc/luca/ppmc.c @@ -0,0 +1,3122 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmc.c" + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Part of the ppmc encoder and decoder. + + This module contains the whole ppmc encoder. It uses hash tables for + managing most of the orders. And a maximum order of 4. It codes bytes. + Order-1-0-(-1) are all handled in tables. Order-2 has a table with + direct hashing with pointers to the linked lists. Order-4 and order-3 + both have hash tables with pointers to contexts in a linked lists which + finally have a pointer to the start of the linked list with the + probability distribution. Update exclusion is used, but exclusion is not. + + Please, note that if the machine where the decoder is run doesn't has as + much memory as the computer where the encoder was ran, the decoder will + not be able to properly decode the file, because it will not be able to + keep track of new statistics, in this case it will just exit. + + For applications where the loss of data is not admisible, I suggest you to + limit both encoder and decoder's memory requeriments to a given minimum + which both machines have. +*/ + + +#include +#include +#include "range.h" +#include "ppmcdata.h" + +static unsigned ordern1_table[] = { + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 4303574, 88451153, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 143318, 9172380, 1146547, 100, 143318, 573273, 859910, 10318928, + 10318928, 6019374, 573273, 26657231, 1576502, 43138853, 10462246, 3439642, + 6162693, 6162693, 6162693, 6162693, 6162693, 6162693, 6162693, 6162693, + 6162693, 6162693, 8742425, 4299553, 14188526, 3439642, 9888973, 429955, + 143318, 27230505, 2436413, 22357678, 10892202, 40989076, 1576502, 1433184, + 286636, 37406115, 573273, 286636, 19204672, 13901889, 19491309, 10462246, + 16481621, 1003229, 21211130, 13615252, 17914806, 2866368, 4012916, 429955, + 1719821, 1146547, 143318, 286636, 143318, 286636, 143318, 1576502, + 143318, 315013952, 32246651, 165962764, 171122228, 418203235, 23790862, 26800550, + 17054895, 230742703, 7452559, 1003229, 163669669, 75385504, 225153284, 261412851, + 88140846, 24794091, 215837585, 223003507, 162523121, 110355206, 38122707, 5302782, + 7595877, 18918035, 8885743, 859910, 100, 859910, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, +}; +#define ordern1_total_cump 3651941255u + +// Ruotines used by ppmc. Not including the range coder. +// +// They are for initializing of both encoder and decoder, and unless there +// are two version, both encoder and decoder use the same routines. Like +// "ppmc_initialize_contexts". + + +// This one allocs the memory needed by ppmc, and adjust some pointers used +// for allocating elements in the linked lists. The mempool arrays must be +// initialized now. +void ppmc_alloc_memory(void) +{ + unsigned long counter; + + + // Init mempool buffers + + for(counter=0;counter!=_mempool_max_index;++counter) + { + _bytes_pool_array[counter]=0; + _context_pool_array[counter]=0; + } + + _bytes_pool_index=1; //first entry will be used now + _context_pool_index=1; + + + // Allocate memory for ppmc structures and adjust some variables + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + + //save pointers in the array for freeing + _bytes_pool_array[0]=_bytes_pool; + _context_pool_array[0]=_context_pool; + + + //adjust variables + _bytes_pool_max=_bytes_pool+_bytes_pool_elements; + _context_pool_max=_context_pool+_context_pool_elements; + + ppmc_out_of_memory=0; //we still have memory +} + + +// This routine initializes all the contexts, and all the tables including +// those who care about the number of bytes defined in a context. +void ppmc_initialize_contexts(void) +{ + unsigned long counter, counter2; + + + // Order-0 + for(counter=0;counter!=256;++counter) //clear table + order0_array[counter]=0; + + order0_defined_bytes=0; //adjust variables + order0_max_cump=0; + + + // Order-1 + for(counter=0;counter!=256;++counter) //erase every table of every context + for(counter2=0;counter2!=256;++counter2) + order1_array[counter][counter2]=0; + + for(counter=0;counter!=256;++counter) //adjust variables + { + order1_defined_bytes_array[counter]=0; + order1_max_cump_array[counter]=0; + } + + + // Order-2 + for(counter=0;counter!=65536;++counter) + { + //order2_array[counter].prob=0; //clear pointer to bytes and frequencies + //order2_array[counter].max_cump=0; + order2_array[counter].defined_bytes=0; + } + + + // Order-4-3 + for(counter=0;counter!=65536;++counter) //order-4-3 + { + order4_hasht[counter]=0; + order3_hasht[counter]=0; + } +} + + +// This routine initializes the encode model by outputting as many bytes as +// needed to prepare the models. This should be called before the main loop +// and after the memory has been allocated and tables initialized. +// +// It does not need uses the range coder. It output the first 1 bytes. +void ppmc_encoder_initialize(void) +{ + + // Initialize order-0 and prepare different bytes for orders + fputc((byte=fgetc(file_input)),file_output); + o4_byte=byte; //order-4 + + fputc((byte=fgetc(file_input)),file_output); + o3_byte=byte; //order-3 + + fputc((byte=fgetc(file_input)),file_output); + o2_byte=byte; //order-2 + ppmc_update_order0(); + + fputc((byte=fgetc(file_input)),file_output); + o1_byte=byte; + +} + + +// This routine initializes the decoder model, should be called to do the same +// changes as "ppmc_encoder_initialize()" did. +void ppmc_decoder_initialize(void) +{ + + // Initialize order-0 and context bytes + byte=fgetc(file_input); + o4_byte=byte; //order-4 + fputc(byte,file_output); + + byte=fgetc(file_input); + o3_byte=byte; //order-3 + fputc(byte,file_output); + + byte=fgetc(file_input); + o2_byte=byte; //order-2 + + fputc(byte,file_output); //output first byte + ppmc_update_order0(); + + byte=fgetc(file_input); + o1_byte=byte; //order-1 + fputc(byte,file_output); +} + + +// Once coding or decoding is finished you have to call this routine. +// It must be called when done. +void ppmc_free_memory(void) +{ + unsigned long counter; + + // Free the memory buffers + + for(counter=0;counter!=_mempool_max_index;++counter) + { + if(_bytes_pool_array[counter]!=0) + free(_bytes_pool_array[counter]); + + if(_context_pool_array[counter]!=0) + free(_context_pool_array[counter]); + } + +} + + +// This routine flushes the memory and restarts all the tables of +// probabilities, current order bytes are not modified, this function +// is called when we ran out of memory. We have to output the code +// number 256 which means memory flushing, for doing this we have to go +// to order-(-1) so we have to output an escape code in all the orders +// till we reach order-(-1) where we can code it. Then we free all the +// memory, alloc it again, and reinitialize all the orders. +// +// However we may find the case when the current order is not initialized, +// in this case we don't need to output an escape code. +void ppmc_flush_mem_enc(void) +{ + + // Code an escape code in order-4 + if(ppmc_get_totf_order4()!=0) //if 0 no need of escape code + { + + ppmc_get_escape_prob_order4(); //get prob and cump + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + } + + + // Code an escape code in order-3 + if(ppmc_get_totf_order3()!=0) //if 0 no need of escape code + { + + ppmc_get_escape_prob_order3(); //get prob and cump + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + } + + + // Code an escape code in order-2 + + o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte); + + // First check if current order-2 context is empty + if(order2_array[o2_cntxt].defined_bytes!=0) //it's not empty + { + ppmc_get_totf_order2(); + ppmc_get_escape_prob_order2(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + } + + + // Code an escape code in order-1 + + // First check if current order-1 table is empty + if(order1_defined_bytes_array[o1_byte]!=0) //it's not empty + { + ppmc_get_totf_order1(); + ppmc_get_escape_prob_order1(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + } + + + // Code an escape code in order-0. Order-0 always has at least one symbol + + ppmc_get_totf_order0(); + ppmc_get_escape_prob_order0(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + + + // Now we can code the code 256 + + symb_prob=1; + symb_cump=256; + total_cump=257; + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + + // Now that decoder knows the flushing, free memory and reinit + + ppmc_free_memory(); + ppmc_alloc_memory(); + ppmc_initialize_contexts(); + + + // Be sure that order-0 has at least one probability + + order0_array[o1_byte]++; + order0_max_cump++; + order0_defined_bytes++; + +} + + +// When the decoder gets the symbol of flushing, most of the job is done +// because we already got all the escape codes, so we only have to reinit. +void ppmc_flush_mem_dec(void) +{ + + // Free memory and reinit + + ppmc_free_memory(); + ppmc_alloc_memory(); + ppmc_initialize_contexts(); + + + // Be sure that order-0 has at least one probability + + order0_array[o1_byte]++; + order0_max_cump++; + order0_defined_bytes++; + + +} + + + +// ORDER-(-1) functions, also called ordern1 (Negative1) in functions +// +// Because order-(-1) does not need to update its probability tables, it +// has no tables, and relies on the fact that the cump of byte is its own +// value, and the probability is fixed, 1, and the total cump is 257. +// +// The alphabet has the following distribution: 0-255 the bytes. 256 is +// an special symbol which means that we have flushed the encoder tables, +// and thus the encoder must flush its tables too. +// +// The rest of the tables only have 256 symbols, because we have no need +// of assign a symbol to the flush code (which already is the order-(-1) +// table) nor to the escape code. + + +// Gets the probability for a given symbol in the order-(-1) (ordern1) +void ppmc_get_prob_ordern1(void) +{ + int i; + unsigned acum = 0; + for (i = 0; i < 256 || i == byte; ++i) + acum += ordern1_table[i]; + symb_cump=acum; + symb_prob=ordern1_table[i]; + total_cump=ordern1_total_cump; +} + + +// Returns in the variable "total_cump" the current total cump of +// order-(-1) +void ppmc_get_totf_ordern1(void) +{ + total_cump=ordern1_total_cump; +} + + +// Returns the symbol for a given cump under order-(-1) +unsigned long ppmc_get_symbol_ordern1 (void) +{ + return symb_cump; +} + + + +// ORDER-0 functions +// +// Due to the fact that order-0 has no context, I use an array for all the +// probabilities under order-0, just as you could do in a basic model for +// arithmetic coding. +// +// The main array is: order0_array. Where order0_array[byte] contains the +// probability for a given byte. The same applies to order-1. +// +// To ensure that the updating and coding is done correctly, "byte" can't +// be changed till all the coding and updating is done. + + +// Returns in the variable "total_cump" the current total cump of +// order-0 +void ppmc_get_totf_order0(void) +{ + // Total cump is current total cump plus the escape for the escape code + total_cump=order0_defined_bytes+order0_max_cump; +} + + +// Codes a byte under order-0 and returns 1, otherwise it returns a 0 and +// has coded an escape code. In this case further coding is needed. +// +// Returns: 1 in case a byte was coded. 0 in case of escape code. +char ppmc_code_byte_order0(void) +{ + unsigned long counter; + + ppmc_get_totf_order0(); //get total cump + + // See if the byte is present + if(order0_array[byte]==0) //a probability of 0 + { + + // Because it was not present, output an escape code, prepare variables + + symb_cump=order0_max_cump; //obviously its cump is current max_cump + //without escape code's space + + symb_prob=order0_defined_bytes; //the number of defined bytes + + // Code the escape code + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 0; //byte not coded + } + else + { + + coded_in_order=0; + + // The symbol is present, code it under order-0 + + symb_prob=order0_array[byte]; //get probability directly + + // Make cump for current symbol + + symb_cump=0; //for first symbol is 0 + for(counter=0; counter!=byte ; ++counter) + symb_cump+=order0_array[counter]; //sum probabilities before our symbol + + // Code the symbol + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 1; //symbol coded under order-0 + } +} + + +// This functions update order-0 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +void ppmc_update_order0(void) +{ + if(order0_array[byte]==0) + { + // It had a zero probability + order0_array[byte]++; //increment symbol probability + ++order0_defined_bytes; //new byte defined + ++order0_max_cump; //total cump + return; + } + else + { + // It had a non-zero probability + + // Increment its probability + order0_array[byte]++; //increment symbol probability + ++order0_max_cump; //total cump + + // Check to see if its the maximum in this case renormalize + if(order0_array[byte]==255) + ppmc_renormalize_order0(); + + return; + } +} + + +// This functions renormalizes the probabilities at order-0 updating variables +void ppmc_renormalize_order0(void) +{ + unsigned long counter; + + // Initialize variables + order0_defined_bytes=0; //clear them + order0_max_cump=0; + + // Loop over all probabilities, divide them by a factor of 2 and update variables + for(counter=0 ; counter!=256 ; ++counter) + { + order0_array[counter]>>=1; //divide by a factor of 2 + + if(order0_array[counter]!=0) //see if it has a non zero probability + order0_defined_bytes++; + + order0_max_cump+=order0_array[counter]; //sum to the total cump + } +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of a escape code it returns -1 +void ppmc_decode_order0(void) +{ + unsigned long current_cump, counter; + + + // Get the total cump needed for decoding symbol + ppmc_get_totf_order0(); //total cump needed for decoding + symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=order0_max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order0(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + for(counter=0 ; counter!= 256 ; ++counter) + { + if(symb_cump>=1; //divide by a factor of 2 + + if(order1_array[o1_byte][counter]!=0) //see if it has a non zero probability + order1_defined_bytes_array[o1_byte]++; + + order1_max_cump_array[o1_byte]+=order1_array[o1_byte][counter]; //sum to the total cump + } +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +void ppmc_decode_order1(void) +{ + unsigned long current_cump, counter; + + + // First check if current order-1 table is empty + if(order1_defined_bytes_array[o1_byte]==0) //it's empty + { + byte=-1; //byte not coded, nothing done + return; + } + + + // Get the total cump needed for decoding symbol + ppmc_get_totf_order1(); //total cump needed for decoding + symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=order1_max_cump_array[o1_byte]) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order1(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + for(counter=0 ; counter!= 256 ; ++counter) + { + if(symb_cumpbyte==byte) + goto ppmc_o2_byte_found; //bad thing, I know, anyone has a better idea? + symb_cump+=node->freq; //add the probability of this byte to the cump + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + + // If we reach that point, the byte was not found in the linked list + // so we don't need the cump, we have to output an escape code, whose + // probabilities are know using the context structure in the table. + + // Byte was not present in the linked list, current node is the last one, + // and that's the node needed for creating a new node, save it. + + o2_ll_node=node; + + // Now get the probability and cump of the escape code + + symb_cump=order2_array[o2_cntxt].max_cump; + symb_prob=order2_array[o2_cntxt].defined_bytes; + + // Code the escape code + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 0; + + + // That code is executed when the byte is found in the linked list + +ppmc_o2_byte_found: + + + // Everything has been tested, now we can feel free to code the byte, + // the symb_cump is already computed, now get its probability and code + // the byte, also save pointer to this element in the linked lists for + // updating. + + coded_in_order=2; //successfully coded under order-2 + + o2_ll_node=node; //save it for updating + + symb_prob=node->freq; //get the probability of the byte + + // Code it. + + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 1; //byte coded under order-2 +} + + +// This functions update order-2 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +// Of course "o2_ll_node" must point to the element to update or the last one, +// based on the "coded_in_order" and checking the pointer of the element we +// can decide what to do. +// +// This updating is only for encoding. +void ppmc_update_order2(void) +{ + + // First of all check if that's the first byte in this context, in that case + // we have to initialize some variables in the context structure. + + if(order2_array[o2_cntxt].defined_bytes==0) //no byte defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + order2_array[o2_cntxt].defined_bytes=1; + order2_array[o2_cntxt].max_cump=1; + order2_array[o2_cntxt].prob=_bytes_pool; + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + // The byte was coded under order two, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==2) //coded under order-2 + { + + // Update its count and variables of this context and check for renormalization + + o2_ll_node->freq++; //increment its frequency (rather probability) + + order2_array[o2_cntxt].max_cump++; //total cump + + if(o2_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order2(); //renormalize + + } + else + { + + // Once every paranoid check has been done we are sure that this byte + // did not existed and so we have to create a new node in the linked + // list. Also we have to take care of memory issues. + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + o2_ll_node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + order2_array[o2_cntxt].max_cump++; //total cump + order2_array[o2_cntxt].defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + +} + + +// This functions renormalizes the probabilities at order-2 updating context +// variables. +void ppmc_renormalize_order2(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + // Initialize variables. Defined bytes remain the same. + order2_array[o2_cntxt].max_cump=0; //clear them + + node=order2_array[o2_cntxt].prob; //get pointer to lined lists + + // Divide all the probabilities by 2 and update context variables + while(1) + { + node->freq>>=1; //divide by a factor of 2 + + if(node->freq==0) //don't allow a probability to be 0 + node->freq=1; + + order2_array[o2_cntxt].max_cump+=node->freq; //sum to the total cump + + if(node->next==0) //last element + break; + node=node->next; + } + + + //printf("\nRenormalization, context:%c%c",o2_byte,o1_byte); + +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +// +// If we decode an escape code, we don't modify "o2_ll_node" and thus its +// work of the updating routine to read till the end of the linked list +// (for adding a new element) however if we decode a byte, we save on +// "o2_ll_node" the pointer to its node. (so updating is faster) +void ppmc_decode_order2(void) +{ + unsigned long current_cump, counter; + struct _byte_and_freq *node; + + // Initialize o2_cntxt + + o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte); + + + // First check if current order-2 context is empty + if(order2_array[o2_cntxt].defined_bytes==0) //it's empty + { + byte=-1; //byte not coded, nothing done + return; + } + + + // Get the total cump needed for decoding symbol + ppmc_get_totf_order2(); //total cump needed for decoding + symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=order2_array[o2_cntxt].max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order2(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + node=order2_array[o2_cntxt].prob; //get pointer to linked lists + + while(1) + { + current_cump+=node->freq; //update cump + if(symb_cumpnext; //next element + //we have no need to check for the last symbol, we'll never read further + //the end of the linked lists, before we'll found the last byte. + } + + + //read byte value and probability + + symb_prob=node->freq; //get the probability for updating the state + byte=node->byte; //get byte + o2_ll_node=node; //used for updating + + + // Get the cump of byte + + symb_cump=current_cump-symb_prob; + + // Update coding state + + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Update coded_in_order used for update exclusion + + coded_in_order=2; + + return; + } + +} + + +// This is the routine for updating while decoding. We have to search the byte +// in the linked list, if it's present, update its count, otherwise we have +// hitted the end of the linked list, and there we have to create a new node. +// +// Of course if the byte was matched in order-2 we'll have a pointer to it +// in "o2_ll_node" so we don't need to read the linked list. (we already did +// in decoding) +// +// Another case which we also have to specially deal with, this is the case +// when the context has not been initalized yet. +void ppmc_update_dec_order2(void) +{ + struct _byte_and_freq *node; + + + // Handle the case when the context is not initialized + // This code is the same as the one for the encoding. + + if(order2_array[o2_cntxt].defined_bytes==0) //no byte defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + order2_array[o2_cntxt].defined_bytes=1; + order2_array[o2_cntxt].max_cump=1; + order2_array[o2_cntxt].prob=_bytes_pool; + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + + return; //nothing else to do + } + + + // Current context is initalized, proceed + + if(coded_in_order==2) //check if it was decoded under order-2 + { + + // We can be sure that the pointer "o2_ll_node" points to its entry, and + // it has a non 0 probability (otherwise it couldn't be coded) so just + // update its probability and max_cump + + o2_ll_node->freq++; //the probability of the byte + order2_array[o2_cntxt].max_cump++; //the max_cump + + if(o2_ll_node->freq==255) //check for renormalization + ppmc_renormalize_order2(); + + } + else + { + + // An escape code was decoded under order-2, we have to read till the + // end of the linked list so we can add a new node for this new byte. + + node=order2_array[o2_cntxt].prob; //get pointer to linked list + + while(1) + { + if(node->next==0) //check for the end of the linked list + break; + node=node->next; //next node + } + + + // We reached the end of the linked list, add a new node if possible, + // we are using the same code of "ppmc_update_order2()" with the + // difference that the pointer to the linked list is "node" + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + order2_array[o2_cntxt].max_cump++; //total cump + order2_array[o2_cntxt].defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //we are finished updating + + } + +} + + +// This function returns the probability for the escape codes in the variables +void ppmc_get_escape_prob_order2(void) +{ + // To understand that remember that the escape allocated for the escape code + // is above the current maximum cump and that it has a probability determined + // by the scheme C. + + symb_prob=order2_array[o2_cntxt].defined_bytes; + symb_cump=order2_array[o2_cntxt].max_cump; +} + + + +// ORDER-3 functions +// +// The difference between order-3 and order-3 are just a few, instead of +// keeping a table with the context structures, we keep a hash table with +// pointers to linked lists with the context, so it's only a matter of +// searching current context in the linked list corresponding to its hash +// entry. This is done in "ppmc_get_totf_order3" because that's the first +// routine that both encoding and decoding routines call. + + +// Returns in the variable "total_cump" the current total cump of +// order-3. Must be called while encoding or decoding before anything else +// because it initializes the pointers to the context structure in +// "o3_context" and o3_cntxt. +// +// If the hash entry is not initialized it returns "o3_context"=0 +// If the context is not present in the linked list of context, "o3_context" +// will point to the last element in the linked list. +// If the context is present "o3_context" will point to the context to use. +// One can distinguish the last two by checking the context value of the +// structure, if it's not the same, is the last element. +// +// The routine returns 0 if the hash entry is not initialized or if the +// the context was not present. Otherwise it returns 1, meaning that we +// have to code under this context. +char ppmc_get_totf_order3(void) +{ + struct context *cntxt_node; + + + // First make the hash key for order-3 + + o3_cntxt=ppmc_order3_hash_key(o1_byte,o2_byte,o3_byte); + full_o3_cntxt=(o1_byte)+(o2_byte<<8)+(o3_byte<<16); //order-3 + + + // Now check the hash entry in the table + + if(order3_hasht[o3_cntxt]==0) //if 0, not initialized + { + + o3_context=0; //no hash entry + + return 0; //hash entry not initialized + } + + + // Now read trough the linked list of context searching current one + + cntxt_node=order3_hasht[o3_cntxt]; + + while(1) + { + + if(cntxt_node->order4321==full_o3_cntxt) //compare context + goto ppmc_gtf_cntxt_found; + + if(cntxt_node->next==0) //end of context's linked list + break; + + cntxt_node=cntxt_node->next; //next element + + } + + + // Once there the context was not found + o3_context=cntxt_node; //pointer to last element in the linked list + + return 0; //it was not present + + + // The context is found, so return pointer and cump + +ppmc_gtf_cntxt_found: + + o3_context=cntxt_node; + + // Total cump is current total cump plus the escape for the escape code + + total_cump=o3_context->defined_bytes+o3_context->max_cump; + + return 1; //context found + +} + + +// Codes a byte under order-3 and returns 1. +// Otherwise it returns a 0. +// +// In case the byte is coded under this context, coded_in_order=3. +char ppmc_code_byte_order3(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order3()==0) + return 0; + + + // See if the byte is present and compute its cump at the same time + + node=o3_context->prob; //pointer to first element in the linked list + + symb_cump=0; //the first symbol always has a 0 cump + + + // Now search the byte in the linked list + + do{ + if(node->byte==byte) + goto ppmc_o3_byte_found; //bad thing, I know, anyone has a better idea? + symb_cump+=node->freq; //add the probability of this byte to the cump + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + + // If we reach that point, the byte was not found in the linked list + // so we don't need the cump, we have to output an escape code, whose + // probabilities are know using the context structure in the table. + + // Byte was not present in the linked list, current node is the last one, + // and that's the node needed for creating a new node, save it. + + o3_ll_node=node; + + // Now get the probability and cump of the escape code + + symb_cump=o3_context->max_cump; + symb_prob=o3_context->defined_bytes; + + // Code the escape code + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 0; + + + // That code is executed when the byte is found in the linked list + +ppmc_o3_byte_found: + + + // Everything has been tested, now we can feel free to code the byte, + // the symb_cump is already computed, now get its probability and code + // the byte, also save pointer to this element in the linked lists for + // updating. + + coded_in_order=3; //successfully coded under order-3 + + o3_ll_node=node; //save it for updating + + symb_prob=node->freq; //get the probability of the byte + + // Code it. + + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 1; //byte coded under order-3 +} + + +// This functions update order-3 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +// +// "o3_ll_node" must point to the element to update or the last one, +// based on the "coded_in_order" and checking the pointer of the element we +// can decide what to do. Also "o3_context" must be initialized. +// +// This updating is only for encoding. +void ppmc_update_order3(void) +{ + + // First thing first, check if the hash entry is initialized + + if(order3_hasht[o3_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order3_hasht[o3_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + + + // The byte was coded under order three, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==3) //coded under order-3 + { + + // Update its count and variables of this context and check for renormalization + + o3_ll_node->freq++; //increment its frequency (rather probability) + + o3_context->max_cump++; //total cump + + if(o3_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order3(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o3_context" points to the last element, so we can put the new element. + + if(o3_context->order4321==full_o3_cntxt) //chech if that's the last + { //element or the a context found + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + o3_ll_node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o3_context->max_cump++; //total cump + o3_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o3_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + +// This functions renormalizes the probabilities at order-3 updating context +// variables. +void ppmc_renormalize_order3(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Initialize variables. Defined bytes remain the same. + o3_context->max_cump=0; //clear them + + node=o3_context->prob; //get pointer to lined lists + + // Divide all the probabilities by 2 and update context variables + while(1) + { + node->freq>>=1; //divide by a factor of 2 + + if(node->freq==0) //don't allow a probability to be 0 + node->freq=1; + + o3_context->max_cump+=node->freq; //sum to the total cump + + if(node->next==0) //last element + break; + node=node->next; + } + + +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +// +// If we decode an escape code, we don't modify "o3_ll_node" and thus its +// work of the updating routine to read till the end of the linked list +// (for adding a new element) however if we decode a byte, we save on +// "o3_ll_node" the pointer to its node. (so updating is faster) +void ppmc_decode_order3(void) +{ + unsigned long current_cump, counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order3()==0) + { + byte=-1; + return; + } + + + // Decode current cump + + symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=o3_context->max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order3(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + node=o3_context->prob; //get pointer to linked lists + + while(1) + { + current_cump+=node->freq; //update cump + if(symb_cumpnext; //next element + //we have no need to check for the last symbol, we'll never read further + //the end of the linked lists, before we'll found the last byte. + } + + + //read byte value and probability + + symb_prob=node->freq; //get the probability for updating the state + byte=node->byte; //get byte + o3_ll_node=node; //used for updating + + + // Get the cump of byte + + symb_cump=current_cump-symb_prob; + + // Update coding state + + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Update coded_in_order used for update exclusion + + coded_in_order=3; + + return; + } + +} + + +// This is the routine for updating while decoding. The only difference with +// the routine for coding is that when an escape code was coded, "o3_ll_node" +// is not initialized so we have to read till the end of the linked list. +// Fortunately "o3_context" will be initialized so we don't need to read its +// linked list. +void ppmc_update_dec_order3(void) +{ + struct _byte_and_freq *node; + + // First thing first, check if the hash entry is initialized + + if(order3_hasht[o3_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order3_hasht[o3_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + // The byte was coded under order three, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==3) //coded under order-3 + { + + // Update its count and variables of this context and check for renormalization + + o3_ll_node->freq++; //increment its frequency (rather probability) + + o3_context->max_cump++; //total cump + + if(o3_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order3(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o3_context" points to the last element, so we can put the new element. + + if(o3_context->order4321==full_o3_cntxt) //chech if that's the last + { //element or the a context found + + // Read till the end of the linked list + + node=o3_context->prob; //get pointer to linked list + + while(1) + { + if(node->next==0) //check for the end of the linked list + break; + node=node->next; //next node + } + + // Now add element + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o3_context->max_cump++; //total cump + o3_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o3_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + + +// This function returns the probability for the escape codes in the variables +void ppmc_get_escape_prob_order3(void) +{ + // To understand that remember that the escape allocated for the escape code + // is above the current maximum cump and that it has a probability determined + // by the scheme C. + + symb_prob=o3_context->defined_bytes; + symb_cump=o3_context->max_cump; +} + + + +// ORDER-4 functions +// +// The routines for order-4 are *equal* to those for order-3, there are a few +// changes like different global variables, and different hash keys. +// +// If you want to go to higher orders, you'd use the same code and data +// structures, with the difference of the context bytes (order4321) +// stored in every context's linked list. + + +// Returns in the variable "total_cump" the current total cump of +// order-4. Must be called while encoding or decoding before anything else +// because it initializes the pointers to the context structure in +// "o4_context" and o4_cntxt. +// +// If the hash entry is not initialized it returns "o4_context"=0 +// If the context is not present in the linked list of context, "o4_context" +// will point to the last element in the linked list. +// If the context is present "o4_context" will point to the context to use. +// One can distinguish the last two by checking the context value of the +// structure, if it's not the same, is the last element. +// +// The routine returns 0 if the hash entry is not initialized or if the +// the context was not present. Otherwise it returns 1, meaning that we +// have to code under this context. +char ppmc_get_totf_order4(void) +{ + struct context *cntxt_node; + + + // First make the hash key for order-4 + + o4_cntxt=ppmc_order4_hash_key(o1_byte,o2_byte,o3_byte,o4_byte); + full_o4_cntxt=(o1_byte)+(o2_byte<<8)+(o3_byte<<16)+(o4_byte<<24); //order-4 + + + // Now check the hash entry in the table + + if(order4_hasht[o4_cntxt]==0) //if 0, not initialized + { + + o4_context=0; //no hash entry + + return 0; //hash entry not initialized + } + + + // Now read trough the linked list of context searching current one + + cntxt_node=order4_hasht[o4_cntxt]; + + while(1) + { + + if(cntxt_node->order4321==full_o4_cntxt) //compare context + goto ppmc_gtfo4_cntxt_found; + + if(cntxt_node->next==0) //end of context's linked list + break; + + cntxt_node=cntxt_node->next; //next element + + } + + + // Once there the context was not found + o4_context=cntxt_node; //pointer to last element in the linked list + + return 0; //it was not present + + + // The context is found, so return pointer and cump + +ppmc_gtfo4_cntxt_found: + + o4_context=cntxt_node; + + // Total cump is current total cump plus the escape for the escape code + + total_cump=o4_context->defined_bytes+o4_context->max_cump; + + return 1; //context found + +} + + +// Codes a byte under order-4 and returns 1. +// Otherwise it returns a 0. +// +// In case the byte is coded under this context, coded_in_order=4. +char ppmc_code_byte_order4(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order4()==0) + return 0; + + + // See if the byte is present and compute its cump at the same time + + node=o4_context->prob; //pointer to first element in the linked list + + symb_cump=0; //the first symbol always has a 0 cump + + + // Now search the byte in the linked list + + do{ + if(node->byte==byte) + goto ppmc_o4_byte_found; //bad thing, I know, anyone has a better idea? + symb_cump+=node->freq; //add the probability of this byte to the cump + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + + // If we reach that point, the byte was not found in the linked list + // so we don't need the cump, we have to output an escape code, whose + // probabilities are know using the context structure in the table. + + // Byte was not present in the linked list, current node is the last one, + // and that's the node needed for creating a new node, save it. + + o4_ll_node=node; + + // Now get the probability and cump of the escape code + + symb_cump=o4_context->max_cump; + symb_prob=o4_context->defined_bytes; + + // Code the escape code + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 0; + + + // That code is executed when the byte is found in the linked list + +ppmc_o4_byte_found: + + + // Everything has been tested, now we can feel free to code the byte, + // the symb_cump is already computed, now get its probability and code + // the byte, also save pointer to this element in the linked lists for + // updating. + + coded_in_order=4; //successfully coded under order-4 + + o4_ll_node=node; //save it for updating + + symb_prob=node->freq; //get the probability of the byte + + // Code it. + + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 1; //byte coded under order-4 +} + + +// This functions update order-4 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +// +// "o4_ll_node" must point to the element to update or the last one, +// based on the "coded_in_order" and checking the pointer of the element we +// can decide what to do. Also "o4_context" must be initialized. +// +// This updating is only for encoding. +void ppmc_update_order4(void) +{ + + // First thing first, check if the hash entry is initialized + + if(order4_hasht[o4_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order4_hasht[o4_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + + + // The byte was coded under order three, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==4) //coded under order-4 + { + + // Update its count and variables of this context and check for renormalization + + o4_ll_node->freq++; //increment its frequency (rather probability) + + o4_context->max_cump++; //total cump + + if(o4_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order4(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o4_context" points to the last element, so we can put the new element. + + if(o4_context->order4321==full_o4_cntxt) //chech if that's the last + { //element or the a context found + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + o4_ll_node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o4_context->max_cump++; //total cump + o4_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o4_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + +// This functions renormalizes the probabilities at order-4 updating context +// variables. +void ppmc_renormalize_order4(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Initialize variables. Defined bytes remain the same. + o4_context->max_cump=0; //clear them + + node=o4_context->prob; //get pointer to lined lists + + // Divide all the probabilities by 2 and update context variables + while(1) + { + node->freq>>=1; //divide by a factor of 2 + + if(node->freq==0) //don't allow a probability to be 0 + node->freq=1; + + o4_context->max_cump+=node->freq; //sum to the total cump + + if(node->next==0) //last element + break; + node=node->next; + } + + +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +// +// If we decode an escape code, we don't modify "o4_ll_node" and thus its +// work of the updating routine to read till the end of the linked list +// (for adding a new element) however if we decode a byte, we save on +// "o4_ll_node" the pointer to its node. (so updating is faster) +void ppmc_decode_order4(void) +{ + unsigned long current_cump, counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order4()==0) + { + byte=-1; + return; + } + + + // Decode current cump + + symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=o4_context->max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order4(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + node=o4_context->prob; //get pointer to linked lists + + while(1) + { + current_cump+=node->freq; //update cump + if(symb_cumpnext; //next element + //we have no need to check for the last symbol, we'll never read further + //the end of the linked lists, before we'll found the last byte. + } + + + //read byte value and probability + + symb_prob=node->freq; //get the probability for updating the state + byte=node->byte; //get byte + o4_ll_node=node; //used for updating + + + // Get the cump of byte + + symb_cump=current_cump-symb_prob; + + // Update coding state + + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Update coded_in_order used for update exclusion + + coded_in_order=4; + + return; + } + +} + + +// This is the routine for updating while decoding. The only difference with +// the routine for coding is that when an escape code was coded, "o4_ll_node" +// is not initialized so we have to read till the end of the linked list. +// Fortunately "o4_context" will be initialized so we don't need to read its +// linked list. +void ppmc_update_dec_order4(void) +{ + struct _byte_and_freq *node; + + // First thing first, check if the hash entry is initialized + + if(order4_hasht[o4_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order4_hasht[o4_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + // The byte was coded under order four, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==4) //coded under order-4 + { + + // Update its count and variables of this context and check for renormalization + + o4_ll_node->freq++; //increment its frequency (rather probability) + + o4_context->max_cump++; //total cump + + if(o4_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order4(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o4_context" points to the last element, so we can put the new element. + + if(o4_context->order4321==full_o4_cntxt) //chech if that's the last + { //element or the a context found + + // Read till the end of the linked list + + node=o4_context->prob; //get pointer to linked list + + while(1) + { + if(node->next==0) //check for the end of the linked list + break; + node=node->next; //next node + } + + // Now add element + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o4_context->max_cump++; //total cump + o4_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o4_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + + +// This function returns the probability for the escape codes in the variables +void ppmc_get_escape_prob_order4(void) +{ + // To understand that remember that the escape allocated for the escape code + // is above the current maximum cump and that it has a probability determined + // by the scheme C. + + symb_prob=o4_context->defined_bytes; + symb_cump=o4_context->max_cump; +} + diff --git a/examples/ppmc/luca/ppmc.h b/examples/ppmc/luca/ppmc.h new file mode 100644 index 0000000..e21545e --- /dev/null +++ b/examples/ppmc/luca/ppmc.h @@ -0,0 +1,135 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmc.h" + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Part of the ppmc encoder and decoder. + + This module contains the definitions of different functions and all the + data structures defined by ppmc. Also contains defines. +*/ + +// Definitions + +#define ppmc_order4_hash_size 65536 +#define ppmc_order4_hash_key(k,j,i,l) ( (k)+(j<<8)+(i<<1)+(l<<9) )& ppmc_order4_hash_size-1 +#define ppmc_order3_hash_size 65536 +#define ppmc_order3_hash_key(k,j,i) ((k)+(j<<7)+(i<<11)) & ppmc_order3_hash_size-1 +#define ppmc_order2_hash_key(k,j) ((k)+(j<<8)) +#define _bytes_pool_elements 125000 //this is used the first time + //that we allocate memory, that's + //the number of entries +#define _bytes_pool_elements_inc 125000 //if we need to alloc again, this + //is the number of entries to get +#define _context_pool_elements 50000 +#define _context_pool_elements_inc 50000 + +#define _mempool_max_index 1000 //the number of entries in the array with + //pointers + + +// Data structures + +// This structure contains a single element of a linked lists which contains +// the probability distribution of a given order. This structure takes 6 bytes. +struct _byte_and_freq{ +unsigned char byte; //the byte itself +unsigned char freq; //and the frequency of it +struct _byte_and_freq *next; //pointer to next element in linked list or 0 +}; + + +// This structure is used for both order-3 and order-4. It takes 20 bytes, +// and it can still hold another byte more. (only 19 being used) +// Order 2-1-0-(-1) use different structures for a faster accessing. +struct context{ +struct context *next; //next context in the hash entry +unsigned long order4321; //order-4-3-2-1 (or order-3-2-1 for order-3) +struct _byte_and_freq *prob; //pointer to linked lists containing probability distribution +unsigned int max_cump; //maximum cumulative probability (can't exceed (2^16)-1 ) +unsigned int defined_bytes; //the number of bytes in this context +}; + +// That's the same but for order-2 where there's no hash collisions. +struct context_o2{ +struct _byte_and_freq *prob; //pointer to linked lists containing probability distribution +unsigned int max_cump; //maximum cumulative probability (can't exceed (2^16)-1 ) +unsigned int defined_bytes; //the number of bytes in this context +}; + + +// Declaration of functions + + +// Functions for initializing +void ppmc_alloc_memory(void); +void ppmc_initialize_contexts(void); +void ppmc_encoder_initialize(void); +void ppmc_decoder_initialize(void); +void ppmc_free_memory(void); +void ppmc_flush_mem_enc(void); +void ppmc_flush_mem_dec(void); + +// Functions for order-(-1) +void ppmc_get_prob_ordern1(void); +unsigned long ppmc_get_symbol_ordern1(void); +void ppmc_get_totf_ordern1(void); +void ppmc_renormalize_order1(void); + +// Functions for order-0 +void ppmc_get_totf_order0(void); +char ppmc_code_byte_order0(void); +void ppmc_update_order0(void); +void ppmc_renormalize_order0(void); +void ppmc_decode_order0(void); +void ppmc_get_escape_prob_order0(void); +void ppmc_get_prob_order0(void); + +// Functions for order-1 +void ppmc_get_totf_order1(void); +char ppmc_code_byte_order1(void); +void ppmc_update_order1(void); +void ppmc_renormalize_order1(void); +void ppmc_decode_order1(void); +void ppmc_get_escape_prob_order1(void); +void ppmc_get_prob_order1(void); + + +// Functions for order-2 +void ppmc_get_totf_order2(void); +char ppmc_code_byte_order2(void); +void ppmc_update_order2(void); +void ppmc_renormalize_order2(void); +void ppmc_decode_order2(void); +void ppmc_update_dec_order2(void); +void ppmc_get_escape_prob_order2(void); +void ppmc_get_prob_order2(void); + + +// Functions for order-3 +char ppmc_get_totf_order3(void); +char ppmc_code_byte_order3(void); +void ppmc_update_order3(void); +void ppmc_renormalize_order3(void); +void ppmc_decode_order3(void); +void ppmc_update_dec_order3(void); +void ppmc_get_escape_prob_order3(void); +void ppmc_get_prob_order3(void); + + +// Functions for order-4 +char ppmc_get_totf_order4(void); +char ppmc_code_byte_order4(void); +void ppmc_update_order4(void); +void ppmc_renormalize_order4(void); +void ppmc_decode_order4(void); +void ppmc_update_dec_order4(void); +void ppmc_get_escape_prob_order4(void); +void ppmc_get_prob_order4(void); + + + diff --git a/examples/ppmc/luca/ppmcdata.c b/examples/ppmc/luca/ppmcdata.c new file mode 100644 index 0000000..bea0926 --- /dev/null +++ b/examples/ppmc/luca/ppmcdata.c @@ -0,0 +1,119 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmcdata.c" +Email: arturo@arturocampos.com +Web: http://www.arturocampos.com + +Part of the ppmc encoder and decoder. + +This module contains global data. +*/ + +#include "ppmc.h" //defines +#include "range.h" + +// Order-4 uses a hash table which points to the start of a linked list with +// the different context, which has the cump, the number of different symbols +// and a pointer to the linked list with the bytes and frequencies. +// Order-3 is almost the same, both take 262144 bytes. +struct context *order4_hasht[ppmc_order4_hash_size]; + +struct context *order3_hasht[ppmc_order3_hash_size]; + + +// The array for order-2 is different, as we do directly hashing, and thus +// we have no need to do the stuff of linked lists for the context itself, +// so it contains the context used. This takes 1310720 bytes. +struct context_o2 order2_array[65536]; + + +// Those are the multiple arrays for order-1. It takes 65536 bytes. +unsigned char order1_array[256][256]; +unsigned int order1_defined_bytes_array[256]; //the defined bytes in every context +unsigned int order1_max_cump_array[256]; //max cump of every context + + +// This is the array for order-0. It takes 256 bytes. +unsigned char order0_array[256]; +unsigned int order0_defined_bytes; +unsigned int order0_max_cump; + + +// No need of variables for order-(-1), because it's fixed. + + +// Those are the pointers and variables used for managing the mem pool for +// both context, and bytes and frequencies. +struct _byte_and_freq *_bytes_pool, //pointer to pool containing linked + //lists with bytes and frequencies + *_bytes_pool_max; //the maximum of this buffer + struct context *_context_pool; //pointer to pool containing contexts + struct context *_context_pool_max; //the same as with _bytes_pool + + unsigned long _bytes_pool_index; //index in array of pointers + unsigned long _context_pool_index; + + //the following is an array keeping pointers to different buffers. A new + //buffer is allocated when the current one is full, so we always have a + //buffer for linked lists. (without allocating a buffer for every element) + struct _byte_and_freq *_bytes_pool_array[_mempool_max_index]; + struct context *_context_pool_array[_mempool_max_index]; + + char ppmc_out_of_memory; //0 if we have enough memory, 1 instead, any + //routine that needs to allocate memory must + //quit if that's 1. + + + // Variables which contain current byte to code and order + unsigned long byte, //current byte to code + o1_byte, //order-1 byte + o2_byte, //order-2 byte + o3_byte, //order-3 byte + o4_byte; //order-4 byte + +unsigned long o2_cntxt; //used in the hash key of order-2 +unsigned long o3_cntxt; //use as hash key for order-3 +unsigned long o4_cntxt; //use as hash key for order-4 +unsigned long full_o3_cntxt; //o1_byte, o2_byte and o3_byte together +unsigned long full_o4_cntxt; //order-4-3-2-1 + +unsigned long coded_in_order; //in which order the last byte was coded +//it's for update exclusion +//also used for decoding + +// Variables used for coding + +unsigned long +total_cump, //the total cumulative probability + symb_cump, //the symbol cumulative probability + symb_prob; //the symbol frequency + +rangecoder rc_coder; //state of range coder +rangecoder rc_decoder; //state of range decoder + +// File handles + +FILE *file_input, //file to code + *file_output; //file where the coded data is placed + + +// Pointers to linked lists and context structures used for faster updating +// or creation of new nodes, because instead of reading again all the linked +// list till the end (in the case of creation) we have a pointer to the last +// element. In the case that a byte was present in the linked lists but it +// had a 0 count, we just have to update its probability. And in the case +// that it already was present and we coded it under that context or a lower +// one, we just have to update its probability. + + +struct _byte_and_freq *o2_ll_node; //pointer to linked lists under order-2 +//where does it points depends in which +//order the byte was coded. +struct _byte_and_freq *o3_ll_node; //the same but for order-3 +struct _byte_and_freq *o4_ll_node; + +struct context *o3_context; //pointer to current order-3 context +struct context *o4_context; //pointer to current order-3 context diff --git a/examples/ppmc/luca/ppmcdata.h b/examples/ppmc/luca/ppmcdata.h new file mode 100644 index 0000000..f4bddc1 --- /dev/null +++ b/examples/ppmc/luca/ppmcdata.h @@ -0,0 +1,117 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmcdata.h" + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Part of the ppmc encoder and decoder. + + This module contains externs definition for global data. +*/ + +#include "ppmc.h" + +// Order-4 uses a hash table which points to the start of a linked list with +// the different context, which has the cump, the number of different symbols +// and a pointer to the linked list with the bytes and frequencies. +// Order-3 is almost the same, both take 262144 bytes. +extern struct context *order4_hasht[]; + +extern struct context *order3_hasht[]; + + +// The array for order-2 is different, as we do directly hashing, and thus +// we have no need to do the stuff of linked lists for the context itself, +// so it contains the context used. This takes 1310720 bytes. +extern struct context_o2 order2_array[]; + + +// Those are the multiple arrays for order-1. It takes 65536 bytes. +extern unsigned char order1_array[256][256]; +extern unsigned int order1_defined_bytes_array[]; //the defined bytes in every context +extern unsigned int order1_max_cump_array[]; //max cump of every context + + +// This is the array for order-0. It takes 256 bytes. +extern unsigned char order0_array[]; +extern unsigned int order0_defined_bytes; +extern unsigned int order0_max_cump; + + +// Those are the pointers and variables used for managing the mem pool for +// both context, and bytes and frequencies. +extern struct _byte_and_freq *_bytes_pool, //pointer to pool containing linked + //lists with bytes and frequencies + *_bytes_pool_max; //the maximum of this buffer +extern struct context *_context_pool; //pointer to pool containing contexts +extern struct context *_context_pool_max; //the same as with _bytes_pool + +extern unsigned long _bytes_pool_index; //index in array of pointers +extern unsigned long _context_pool_index; + +//the following is an array keeping pointers to different buffers. A new +//buffer is allocated when the current one is full, so we always have a +//buffer for linked lists. (without allocating a buffer for every element) +extern struct _byte_and_freq *_bytes_pool_array[_mempool_max_index]; +extern struct context *_context_pool_array[_mempool_max_index]; + +extern char ppmc_out_of_memory; //0 if we have enough memory, 1 instead, any + //routine that needs to allocate memory must + //quit if that's 1. + + + +// Variables which contain current byte to code and order +extern unsigned long //they are only bytes + byte, //current byte to code + o1_byte, //order-1 byte + o2_byte, //order-2 byte + o3_byte, //order-3 byte + o4_byte; //order-4 byte + +extern unsigned long o2_cntxt; //used in the hash key of order-2 +extern unsigned long o3_cntxt; //use as hash key for order-3 +unsigned long o4_cntxt; //use as hash key for order-4 +extern unsigned long full_o3_cntxt; //o1_byte, o2_byte and o3_byte together +extern unsigned long full_o4_cntxt; //order-4-3-2-1 + +extern unsigned long coded_in_order; //in which order the last byte was coded + //it's for update exclusion + //also used for decoding +// Variables used for coding + +extern unsigned long //no need for negative values + total_cump, //the total cumulative probability + symb_cump, //the symbol cumulative probability + symb_prob; //the symbol frequency + +extern rangecoder rc_coder; //state of range coder +extern rangecoder rc_decoder; //state of range decoder + +// File handles + + FILE *file_input, //file to code + *file_output; //file where the coded data is placed + + + +// Pointers to linked lists and context structures used for faster updating +// or creation of new nodes, because instead of reading again all the linked +// list till the end (in the case of creation) we have a pointer to the last +// element. In the case that a byte was present in the linked lists but it +// had a 0 count, we just have to update its probability. And in the case +// that it already was present and we coded it under that context or a lower +// one, we just have to update its probability. + + +extern struct _byte_and_freq *o2_ll_node;//pointer to linked lists under order-2 + //where does it points depends in which + //order the byte was coded. +extern struct _byte_and_freq *o3_ll_node; //the same but for order-3 +extern struct _byte_and_freq *o4_ll_node; + +extern struct context *o3_context; //pointer to current order-3 context +extern struct context *o4_context; //pointer to current order-3 context diff --git a/examples/ppmc/luca/ppmcmain.c b/examples/ppmc/luca/ppmcmain.c new file mode 100644 index 0000000..7a99d12 --- /dev/null +++ b/examples/ppmc/luca/ppmcmain.c @@ -0,0 +1,176 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmcmain.c" +Email: arturo@arturocampos.com +Web: http://www.arturocampos.com + +Part of the ppmc encoder only. + +This module is the main module and calls the different modules to do +the encoding of a file. When done prints bpb and kbyps. +*/ + + +#include +#include +#include "range.h" //the range coder functions and data +#include "ppmcdata.h" + + +long filesize(FILE *stream); + + + +//Main +int main (char argc, char *argv[]) +{ + unsigned long counter, //temporal counter for loops like for or while + counter2, //another temporal counter for sub loops + size_file_input; //the size of the input file + + + // Print title, version and copyright + printf("PPMC using range coder. (without exclusion)\n"); + printf("Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.\n"); + printf("Permission is granted to make verbatim copies of this program for private\n"); + printf("use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.\n"); + + // Check for correct number of parameters + if(argc!=3) + { + printf("Bad number of arguments.\n"); + exit(1); + } + + + // Try to open input and output files + if((file_input=fopen(argv[1],"r+b"))==NULL) + { + printf("Couldn't open %s.\n",argv[1]); + exit(1); + } + + if((file_output=fopen(argv[2],"w+b"))==NULL) + { + printf("Couldn't create %s.\n",argv[2]); + exit(1); + } + + + // Check input file length and not accept 0 length files + size_file_input=filesize(file_input); + + if(size_file_input<5) + { + printf("Can't work with files below than 5 bytes!"); + exit(1); + } + + + // First output file length + fwrite(&size_file_input,1,4,file_output); //input length + + + // Initialize ppmc encoder + ppmc_alloc_memory(); //get memory + ppmc_initialize_contexts(); //initialize model + ppmc_encoder_initialize(); + + // Initialize range coder + range_coder_init(&rc_coder,file_output); + + + // Start main loop which codes the file + while((byte=fgetc(file_input))!=EOF) + { + + // Try to code current byte under order-4 if possible then go to lower orders + if(ppmc_code_byte_order4()==0) + if(ppmc_code_byte_order3()==0) + if(ppmc_code_byte_order2()==0) + if(ppmc_code_byte_order1()==0) + if(ppmc_code_byte_order0()==0) //else try to code under order-0 + { + // Code under order-(-1) + ppmc_get_prob_ordern1(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + coded_in_order=0; //update all the tables (unless order-(-1)) + } + + + // Now do update exclusion + + switch(coded_in_order) + { + case 0: ppmc_update_order0(); //update only order-0 + case 1: ppmc_update_order1(); //update order-0 and order-1 + case 2: ppmc_update_order2(); //update order-2 1 and 0... + case 3: ppmc_update_order3(); + case 4: ppmc_update_order4(); + default: break; + }; + + + + // Update order variables + + o4_byte=o3_byte; + o3_byte=o2_byte; + o2_byte=o1_byte; + o1_byte=byte; //current one is next time order-1 + + + // Check if we run out of memory, in that case, flush the encoder + + if(ppmc_out_of_memory==1) + { + printf("Flushing memory! Output file might be not decodable.\n"); + ppmc_flush_mem_enc(); + } + + + } + + + // Flush range coder + range_coder_flush(&rc_coder); + + // Free memory + ppmc_free_memory(); + + + // Print bpb and kbyps + printf("%s at %f bpb.\n",argv[1],((float)filesize(file_output)/(float)size_file_input)*(float)8); + + + // Close file handles + fclose(file_input); + fclose(file_output); + + + + // Nicely exit + return 0; +} + + +// Routines not used by ppmc but rather by main. +// Not including the range coder. + + +// Returns the file size of a given file. +long filesize(FILE *stream) +{ + long curpos, length; + + curpos = ftell(stream); + fseek(stream, 0L, SEEK_END); + length = ftell(stream); + fseek(stream, curpos, SEEK_SET); + return length; +} + + diff --git a/examples/ppmc/luca/range.c b/examples/ppmc/luca/range.c new file mode 100644 index 0000000..168ddc2 --- /dev/null +++ b/examples/ppmc/luca/range.c @@ -0,0 +1,212 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "range.c" +Email: arturo@arturocampos.com +Web: http://www.arturocampos.com + +This module contains the routines of both the range coder and decoder. + +The range coder works internally in 32 bits, and uses bytes as symbols. +Also the end of message symbol is used. So z=257. + +Both input and output use rc_file as the file stream. Of course we can't +code and decode at the same time. All the input or output comes from the +same file, no matter what range coder structure are we using. The modules +here provided don't manage the io except for reading and writing, they +don't open nor close the files. The reading and writing is done via +putc and getc. +*/ + +#include "range.h" +#include + +/* + Inits the range coder state. Must be called before encoding any symbol. + It uses a magic number 0xB3 as the first byte outputted. + -rangecoder *rc, the range coder to be used. + + Shoulde be called like that: + range_coder_init(&o0_rc_state,file_output); + */ +void range_coder_init(rangecoder *rc, FILE *stream) +{ + rc_file=stream; + rc->low=0; //define state + rc->range=0x80000000; + rc->byte_buffer=0xB3; //magic number + rc->help=0; //temp value +} + + +/* + Encodes a symbol. + -rangecoder *rc, the range coder to be used. + -unsigned long tot_f, the maximum cumulative frequency + -unsigned long lt_f, the cumulative probabilty of the symbol + -unsigned long sy_f, the probability of the symbol + */ +void range_coder_encode(rangecoder *rc,unsigned long tot_f, unsigned long lt_f,unsigned long sy_f) +{ + unsigned long temp, r; + + range_coder_renormalize(rc); //&rc? + + r=rc->range/tot_f; + temp=r*lt_f; + if(lt_f+sy_frange=r*sy_f; + else + rc->range-=temp; + rc->low+=temp; +} + +/* + Renormalizes the state when coding. + -rangecoder *rc, the range coder to be used. + */ + +void range_coder_renormalize(rangecoder *rc) +{ + while(rc->range<=(unsigned long)0x00800000) + { + if(rc->low<(unsigned long)0x7F800000) + { + putc(rc->byte_buffer,rc_file); + for(;rc->help;rc->help--) + putc(0xFF,rc_file); + rc->byte_buffer=(unsigned char)(rc->low>>23); + } + else + { + if(rc->low&(unsigned long)0x80000000) + { + putc(rc->byte_buffer+1,rc_file); + for(;rc->help;rc->help--) + putc(0x00,rc_file); + rc->byte_buffer=(unsigned char)(rc->low>>23); + } + else + rc->help++; + } + rc->range<<=8; + rc->low=(rc->low<<8)&(unsigned long)(0x7FFFFFFF); + } +} + + +/* + Flushes the encoder. Must be called when the coding is done. + -rangecoder *rc, the range coder to be used. + + Shoulde be called like that: + range_coder_flush(&o0_rc_state); + */ +void range_coder_flush(rangecoder *rc) +{ + unsigned long tmp; + + range_coder_renormalize(rc); + tmp = rc->low >> 23; + if (tmp > 0xff) + { + putc(rc->byte_buffer+1,rc_file); + for(; rc->help; rc->help--) + putc(0,rc_file); + } + else + { + putc(rc->byte_buffer,rc_file); + for(; rc->help; rc->help--) + putc(0xff,rc_file); + } + + putc(tmp & 0xff,rc_file); + putc((tmp = rc->low >> (23-8)) & 0xff,rc_file); +} + + +/* + Inits the range decoder state. Also checks for the magic number, and + quits in case it isn't the first, so be careful. + -rangecoder *rc, the range coder to be used. + */ +void range_decoder_init(rangecoder *rc, FILE *stream) +{ + unsigned int _rd_c; + + rc_file=stream; + if((_rd_c=getc(rc_file))!=0xB3) + { + printf("\nThis is not range coded data. Magic number not found. Exiting."); + exit(1); + } + rc->byte_buffer=getc(rc_file); + rc->low=rc->byte_buffer>>1; + rc->range=0x80; +} + + +/* + Decode a symbol, get its cumulative probability. +Input: +-rangecoder *rc, the range coder to be used. +-unsigned long tot_f, the maximum cumulative probability +Output: +-unsigned long, cumulative probability of the current symbol +Should be called like that: +current_cump=range_decoder_decode(&o0_rc_state,o0_tot_f); +*/ +unsigned long range_decoder_decode(rangecoder *rc, unsigned long tot_f) +{ + unsigned long temp; + + range_decoder_renormalize(rc); + rc->help=rc->range/tot_f; + temp=rc->low/rc->help; + if(temp>=tot_f) + return tot_f-1; + else + return temp; +} + + +/* + Updates the state so next symbol can be decoded. +Input: +-rangecoder *rc, the range coder to be used. +-unsigned long tot_f, the maximum cumulative probability +-unsigned long lt_f, the cumulative probabilty of the symbol +-unsigned long sy_f, the probability of the symbol + +*/ +void range_decoder_update(rangecoder *rc, unsigned long tot_f, unsigned long lt_f,unsigned long sy_f) +{ + unsigned long temp; + + temp=rc->help*lt_f; + rc->low-=temp; + if(lt_f+sy_frange=rc->help*sy_f; + else + rc->range-=temp; +} + + +/* + Renormalizes the state while decoding. + -rangecoder *rc, the range coder to be used. + */ +void range_decoder_renormalize(rangecoder *rc) +{ + while(rc->range<=0x00800000) + { + rc->low=(rc->low<<8)|((rc->byte_buffer<<7)&0xFF); + rc->byte_buffer=getc(rc_file); + rc->low |= rc->byte_buffer >> (1); + rc->range<<=8; + } +} + diff --git a/examples/ppmc/luca/range.h b/examples/ppmc/luca/range.h new file mode 100644 index 0000000..04b6fcc --- /dev/null +++ b/examples/ppmc/luca/range.h @@ -0,0 +1,39 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "range.h" + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Declarations for the coder. +*/ + +#include + +typedef struct{ + unsigned long low, range, help; + unsigned char byte_buffer; +}rangecoder; + +FILE *rc_file; + +void range_coder_init(rangecoder *rc, FILE *stream); //coding routines +void range_coder_encode(rangecoder *rc,unsigned long tot_f, unsigned long lt_f,unsigned long sy_f); +void range_coder_renormalize(rangecoder *rc); +void range_coder_flush(rangecoder *rc); +void range_decoder_init(rangecoder *rc, FILE *stream);//decoding routines +unsigned long range_decoder_decode(rangecoder *rc, unsigned long tot_f); +void range_decoder_update(rangecoder *rc, unsigned long tot_f, unsigned long lt_f,unsigned long sy_f); +void range_decoder_renormalize(rangecoder *rc); + + +typedef unsigned long code_value; +#define CODE_BITS 32 +#define Top_value ((code_value)1 << (CODE_BITS-1)) +#define SHIFT_BITS (CODE_BITS - 9) +#define EXTRA_BITS ((CODE_BITS-2) % 8 + 1) +#define Bottom_value (Top_value >> 8) +#define outbyte(cod,x) putc(x,stdout) +#define inbyte(cod) getc(stdin) diff --git a/examples/ppmc/luca/unppmc.c b/examples/ppmc/luca/unppmc.c new file mode 100644 index 0000000..501f0c4 --- /dev/null +++ b/examples/ppmc/luca/unppmc.c @@ -0,0 +1,204 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this program for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "unppmc.c" +Email: arturo@arturocampos.com +Web: http://www.arturocampos.com + + +Part of the ppmc decoder. + +This module is the main module and calls the different modules to do +the decoding of a file. When done prints kbyps. +*/ + + +// Bibliotecas necesarias +#include +#include +#include "range.h" //the range coder functions and data +#include "ppmcdata.h" + + +// Declaracion de funciones del ppmcmain.c +long filesize(FILE *stream); + + + + +//Main +void main (int argc, char *argv[]) +{ + unsigned long counter, //temporal counter for loops like for or while + counter2, //another temporal counter for sub loops + size_file_output, //the size of the output file + main_counter; //used in main + char expected_flush=0; //used for checking flushing which can't be done + + + // Print title, version and copyright + printf("UNPPMC using range coder.\n"); + printf("Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.\n"); + printf("Permission is granted to make verbatim copies of this program for private\n"); + printf("use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.\n"); + + + + // Check for correct number of parameters + if(argc!=3) + { + printf("Bad number of arguments.\n"); + exit(1); + } + + + // Try to open input and output files + if((file_input=fopen(argv[1],"r+b"))==NULL) + { + printf("Couldn't open %s.\n",argv[1]); + exit(1); + } + + if((file_output=fopen(argv[2],"w+b"))==NULL) + { + printf("Couldn't create %s.\n",argv[2]); + exit(1); + } + + + // Get output length + fread(&size_file_output,1,4,file_input); + + + // Initialize ppmc decoder + ppmc_alloc_memory(); + ppmc_initialize_contexts(); + ppmc_decoder_initialize(); + + + + // Initialize decoder + range_decoder_init(&rc_decoder,file_input); + + + // Start main loop which decodes the file + main_counter=size_file_output-4; //take in account the bytes already written + expected_flush=0; //we don't expect a flush yet + + while(main_counter!=0) + { + + // Try to decode current byte in order-4 if possible, else in lower ones + ppmc_decode_order4(); + if(byte==-1) + ppmc_decode_order3(); + if(byte==-1) + { + ppmc_decode_order2(); + if(byte==-1) + { + ppmc_decode_order1(); + if(byte==-1) + { + ppmc_decode_order0(); + if(byte==-1) //check if it was an escape code + { + // Decode in order-(-1) + ppmc_get_totf_ordern1(); + symb_cump=range_decoder_decode(&rc_decoder,total_cump); + byte=ppmc_get_symbol_ordern1(); + ppmc_get_prob_ordern1(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + coded_in_order=0; //update all orders + + // Now see if it's the code of flushing + + if(symb_cump==256) + { + printf("Flushing.\n"); + ppmc_flush_mem_dec(); + expected_flush=0; + continue; //do not output byte nor update + } + + } + } + } + } + + // Output byte and update model + + fputc(byte,file_output); + + switch(coded_in_order) //update exclusion + { + case 0: ppmc_update_order0(); //update only order-0 + case 1: ppmc_update_order1(); //update order-0 and order-1 + case 2: ppmc_update_dec_order2(); //update order-0 1 and 2 + case 3: ppmc_update_dec_order3(); + case 4: ppmc_update_dec_order4(); + default: break; + }; + + + // Check if flushing has to be done and has not been done. + // This is optional, in case you limit the memory usage, you don't + // need to include this + + if(expected_flush==1) // If flushing didn't happen, we can't decode + { + printf("Can't decompress file. Not enough memory.\nTry in a machine with more memory.\n"); + exit(1); + } + if(ppmc_out_of_memory==1) + { + expected_flush=1; // Next code must be a flush code, otherwise we don't + // have enough memory, and therefore we can't decode + } + + + // Update order variables + + o4_byte=o3_byte; + o3_byte=o2_byte; + o2_byte=o1_byte; + o1_byte=byte; //current one, is next time order-1 + + // Byte decoded and model updated, loop + main_counter--; + + + } + + + ppmc_free_memory(); + + // Close file handles and free memory + fclose(file_input); + fclose(file_output); + + + // Nicely exit + exit(0); +} + + +// Ruotines not used by ppmc but rather by main. +// Not including the range coder. + + +// Returns the file size of a given file. +long filesize(FILE *stream) +{ + long curpos, length; + + curpos = ftell(stream); + fseek(stream, 0L, SEEK_END); + length = ftell(stream); + fseek(stream, curpos, SEEK_SET); + return length; +} + + diff --git a/examples/ppmc/ppmc.c b/examples/ppmc/ppmc.c new file mode 100644 index 0000000..91e448f --- /dev/null +++ b/examples/ppmc/ppmc.c @@ -0,0 +1,3084 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmc.c" + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Part of the ppmc encoder and decoder. + + This module contains the whole ppmc encoder. It uses hash tables for + managing most of the orders. And a maximum order of 4. It codes bytes. + Order-1-0-(-1) are all handled in tables. Order-2 has a table with + direct hashing with pointers to the linked lists. Order-4 and order-3 + both have hash tables with pointers to contexts in a linked lists which + finally have a pointer to the start of the linked list with the + probability distribution. Update exclusion is used, but exclusion is not. + + Please, note that if the machine where the decoder is run doesn't has as + much memory as the computer where the encoder was ran, the decoder will + not be able to properly decode the file, because it will not be able to + keep track of new statistics, in this case it will just exit. + + For applications where the loss of data is not admisible, I suggest you to + limit both encoder and decoder's memory requeriments to a given minimum + which both machines have. +*/ + + +#include +#include +#include "range.h" +#include "ppmcdata.h" + + + +// Ruotines used by ppmc. Not including the range coder. +// +// They are for initializing of both encoder and decoder, and unless there +// are two version, both encoder and decoder use the same routines. Like +// "ppmc_initialize_contexts". + + +// This one allocs the memory needed by ppmc, and adjust some pointers used +// for allocating elements in the linked lists. The mempool arrays must be +// initialized now. +void ppmc_alloc_memory(void) +{ + unsigned long counter; + + + // Init mempool buffers + + for(counter=0;counter!=_mempool_max_index;++counter) + { + _bytes_pool_array[counter]=0; + _context_pool_array[counter]=0; + } + + _bytes_pool_index=1; //first entry will be used now + _context_pool_index=1; + + + // Allocate memory for ppmc structures and adjust some variables + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + + //save pointers in the array for freeing + _bytes_pool_array[0]=_bytes_pool; + _context_pool_array[0]=_context_pool; + + + //adjust variables + _bytes_pool_max=_bytes_pool+_bytes_pool_elements; + _context_pool_max=_context_pool+_context_pool_elements; + + ppmc_out_of_memory=0; //we still have memory +} + + +// This routine initializes all the contexts, and all the tables including +// those who care about the number of bytes defined in a context. +void ppmc_initialize_contexts(void) +{ + unsigned long counter, counter2; + + + // Order-0 + for(counter=0;counter!=256;++counter) //clear table + order0_array[counter]=0; + + order0_defined_bytes=0; //adjust variables + order0_max_cump=0; + + + // Order-1 + for(counter=0;counter!=256;++counter) //erase every table of every context + for(counter2=0;counter2!=256;++counter2) + order1_array[counter][counter2]=0; + + for(counter=0;counter!=256;++counter) //adjust variables + { + order1_defined_bytes_array[counter]=0; + order1_max_cump_array[counter]=0; + } + + + // Order-2 + for(counter=0;counter!=65536;++counter) + { + //order2_array[counter].prob=0; //clear pointer to bytes and frequencies + //order2_array[counter].max_cump=0; + order2_array[counter].defined_bytes=0; + } + + + // Order-4-3 + for(counter=0;counter!=65536;++counter) //order-4-3 + { + order4_hasht[counter]=0; + order3_hasht[counter]=0; + } +} + + +// This routine initializes the encode model by outputting as many bytes as +// needed to prepare the models. This should be called before the main loop +// and after the memory has been allocated and tables initialized. +// +// It does not need uses the range coder. It output the first 1 bytes. +void ppmc_encoder_initialize(void) +{ + + // Initialize order-0 and prepare different bytes for orders + fputc((byte=fgetc(file_input)),file_output); + o4_byte=byte; //order-4 + + fputc((byte=fgetc(file_input)),file_output); + o3_byte=byte; //order-3 + + fputc((byte=fgetc(file_input)),file_output); + o2_byte=byte; //order-2 + ppmc_update_order0(); + + fputc((byte=fgetc(file_input)),file_output); + o1_byte=byte; + +} + + +// This routine initializes the decoder model, should be called to do the same +// changes as "ppmc_encoder_initialize()" did. +void ppmc_decoder_initialize(void) +{ + + // Initialize order-0 and context bytes + byte=fgetc(file_input); + o4_byte=byte; //order-4 + fputc(byte,file_output); + + byte=fgetc(file_input); + o3_byte=byte; //order-3 + fputc(byte,file_output); + + byte=fgetc(file_input); + o2_byte=byte; //order-2 + + fputc(byte,file_output); //output first byte + ppmc_update_order0(); + + byte=fgetc(file_input); + o1_byte=byte; //order-1 + fputc(byte,file_output); +} + + +// Once coding or decoding is finished you have to call this routine. +// It must be called when done. +void ppmc_free_memory(void) +{ + unsigned long counter; + + // Free the memory buffers + + for(counter=0;counter!=_mempool_max_index;++counter) + { + if(_bytes_pool_array[counter]!=0) + free(_bytes_pool_array[counter]); + + if(_context_pool_array[counter]!=0) + free(_context_pool_array[counter]); + } + +} + + +// This routine flushes the memory and restarts all the tables of +// probabilities, current order bytes are not modified, this function +// is called when we ran out of memory. We have to output the code +// number 256 which means memory flushing, for doing this we have to go +// to order-(-1) so we have to output an escape code in all the orders +// till we reach order-(-1) where we can code it. Then we free all the +// memory, alloc it again, and reinitialize all the orders. +// +// However we may find the case when the current order is not initialized, +// in this case we don't need to output an escape code. +void ppmc_flush_mem_enc(void) +{ + + // Code an escape code in order-4 + if(ppmc_get_totf_order4()!=0) //if 0 no need of escape code + { + + ppmc_get_escape_prob_order4(); //get prob and cump + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + } + + + // Code an escape code in order-3 + if(ppmc_get_totf_order3()!=0) //if 0 no need of escape code + { + + ppmc_get_escape_prob_order3(); //get prob and cump + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + } + + + // Code an escape code in order-2 + + o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte); + + // First check if current order-2 context is empty + if(order2_array[o2_cntxt].defined_bytes!=0) //it's not empty + { + ppmc_get_totf_order2(); + ppmc_get_escape_prob_order2(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + } + + + // Code an escape code in order-1 + + // First check if current order-1 table is empty + if(order1_defined_bytes_array[o1_byte]!=0) //it's not empty + { + ppmc_get_totf_order1(); + ppmc_get_escape_prob_order1(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + } + + + // Code an escape code in order-0. Order-0 always has at least one symbol + + ppmc_get_totf_order0(); + ppmc_get_escape_prob_order0(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + + + // Now we can code the code 256 + + symb_prob=1; + symb_cump=256; + total_cump=257; + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + + // Now that decoder knows the flushing, free memory and reinit + + ppmc_free_memory(); + ppmc_alloc_memory(); + ppmc_initialize_contexts(); + + + // Be sure that order-0 has at least one probability + + order0_array[o1_byte]++; + order0_max_cump++; + order0_defined_bytes++; + +} + + +// When the decoder gets the symbol of flushing, most of the job is done +// because we already got all the escape codes, so we only have to reinit. +void ppmc_flush_mem_dec(void) +{ + + // Free memory and reinit + + ppmc_free_memory(); + ppmc_alloc_memory(); + ppmc_initialize_contexts(); + + + // Be sure that order-0 has at least one probability + + order0_array[o1_byte]++; + order0_max_cump++; + order0_defined_bytes++; + + +} + + + +// ORDER-(-1) functions, also called ordern1 (Negative1) in functions +// +// Because order-(-1) does not need to update its probability tables, it +// has no tables, and relies on the fact that the cump of byte is its own +// value, and the probability is fixed, 1, and the total cump is 257. +// +// The alphabet has the following distribution: 0-255 the bytes. 256 is +// an special symbol which means that we have flushed the encoder tables, +// and thus the encoder must flush its tables too. +// +// The rest of the tables only have 256 symbols, because we have no need +// of assign a symbol to the flush code (which already is the order-(-1) +// table) nor to the escape code. + + +// Gets the probability for a given symbol in the order-(-1) (ordern1) +void ppmc_get_prob_ordern1(void) +{ + symb_cump=byte; //its value + symb_prob=1; //flat probability + total_cump=257; //total cump +} + + +// Returns in the variable "total_cump" the current total cump of +// order-(-1) +void ppmc_get_totf_ordern1(void) +{ + total_cump=257; //this is fixed +} + + +// Returns the symbol for a given cump under order-(-1) +unsigned long ppmc_get_symbol_ordern1 (void) +{ + return symb_cump; +} + + + +// ORDER-0 functions +// +// Due to the fact that order-0 has no context, I use an array for all the +// probabilities under order-0, just as you could do in a basic model for +// arithmetic coding. +// +// The main array is: order0_array. Where order0_array[byte] contains the +// probability for a given byte. The same applies to order-1. +// +// To ensure that the updating and coding is done correctly, "byte" can't +// be changed till all the coding and updating is done. + + +// Returns in the variable "total_cump" the current total cump of +// order-0 +void ppmc_get_totf_order0(void) +{ + // Total cump is current total cump plus the escape for the escape code + total_cump=order0_defined_bytes+order0_max_cump; +} + + +// Codes a byte under order-0 and returns 1, otherwise it returns a 0 and +// has coded an escape code. In this case further coding is needed. +// +// Returns: 1 in case a byte was coded. 0 in case of escape code. +char ppmc_code_byte_order0(void) +{ + unsigned long counter; + + ppmc_get_totf_order0(); //get total cump + + // See if the byte is present + if(order0_array[byte]==0) //a probability of 0 + { + + // Because it was not present, output an escape code, prepare variables + + symb_cump=order0_max_cump; //obviously its cump is current max_cump + //without escape code's space + + symb_prob=order0_defined_bytes; //the number of defined bytes + + // Code the escape code + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 0; //byte not coded + } + else + { + + coded_in_order=0; + + // The symbol is present, code it under order-0 + + symb_prob=order0_array[byte]; //get probability directly + + // Make cump for current symbol + + symb_cump=0; //for first symbol is 0 + for(counter=0; counter!=byte ; ++counter) + symb_cump+=order0_array[counter]; //sum probabilities before our symbol + + // Code the symbol + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 1; //symbol coded under order-0 + } +} + + +// This functions update order-0 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +void ppmc_update_order0(void) +{ + if(order0_array[byte]==0) + { + // It had a zero probability + order0_array[byte]++; //increment symbol probability + ++order0_defined_bytes; //new byte defined + ++order0_max_cump; //total cump + return; + } + else + { + // It had a non-zero probability + + // Increment its probability + order0_array[byte]++; //increment symbol probability + ++order0_max_cump; //total cump + + // Check to see if its the maximum in this case renormalize + if(order0_array[byte]==255) + ppmc_renormalize_order0(); + + return; + } +} + + +// This functions renormalizes the probabilities at order-0 updating variables +void ppmc_renormalize_order0(void) +{ + unsigned long counter; + + // Initialize variables + order0_defined_bytes=0; //clear them + order0_max_cump=0; + + // Loop over all probabilities, divide them by a factor of 2 and update variables + for(counter=0 ; counter!=256 ; ++counter) + { + order0_array[counter]>>=1; //divide by a factor of 2 + + if(order0_array[counter]!=0) //see if it has a non zero probability + order0_defined_bytes++; + + order0_max_cump+=order0_array[counter]; //sum to the total cump + } +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of a escape code it returns -1 +void ppmc_decode_order0(void) +{ + unsigned long current_cump, counter; + + + // Get the total cump needed for decoding symbol + ppmc_get_totf_order0(); //total cump needed for decoding + symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=order0_max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order0(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + for(counter=0 ; counter!= 256 ; ++counter) + { + if(symb_cump>=1; //divide by a factor of 2 + + if(order1_array[o1_byte][counter]!=0) //see if it has a non zero probability + order1_defined_bytes_array[o1_byte]++; + + order1_max_cump_array[o1_byte]+=order1_array[o1_byte][counter]; //sum to the total cump + } +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +void ppmc_decode_order1(void) +{ + unsigned long current_cump, counter; + + + // First check if current order-1 table is empty + if(order1_defined_bytes_array[o1_byte]==0) //it's empty + { + byte=-1; //byte not coded, nothing done + return; + } + + + // Get the total cump needed for decoding symbol + ppmc_get_totf_order1(); //total cump needed for decoding + symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=order1_max_cump_array[o1_byte]) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order1(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + for(counter=0 ; counter!= 256 ; ++counter) + { + if(symb_cumpbyte==byte) + goto ppmc_o2_byte_found; //bad thing, I know, anyone has a better idea? + symb_cump+=node->freq; //add the probability of this byte to the cump + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + + // If we reach that point, the byte was not found in the linked list + // so we don't need the cump, we have to output an escape code, whose + // probabilities are know using the context structure in the table. + + // Byte was not present in the linked list, current node is the last one, + // and that's the node needed for creating a new node, save it. + + o2_ll_node=node; + + // Now get the probability and cump of the escape code + + symb_cump=order2_array[o2_cntxt].max_cump; + symb_prob=order2_array[o2_cntxt].defined_bytes; + + // Code the escape code + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 0; + + + // That code is executed when the byte is found in the linked list + +ppmc_o2_byte_found: + + + // Everything has been tested, now we can feel free to code the byte, + // the symb_cump is already computed, now get its probability and code + // the byte, also save pointer to this element in the linked lists for + // updating. + + coded_in_order=2; //successfully coded under order-2 + + o2_ll_node=node; //save it for updating + + symb_prob=node->freq; //get the probability of the byte + + // Code it. + + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 1; //byte coded under order-2 +} + + +// This functions update order-2 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +// Of course "o2_ll_node" must point to the element to update or the last one, +// based on the "coded_in_order" and checking the pointer of the element we +// can decide what to do. +// +// This updating is only for encoding. +void ppmc_update_order2(void) +{ + + // First of all check if that's the first byte in this context, in that case + // we have to initialize some variables in the context structure. + + if(order2_array[o2_cntxt].defined_bytes==0) //no byte defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + order2_array[o2_cntxt].defined_bytes=1; + order2_array[o2_cntxt].max_cump=1; + order2_array[o2_cntxt].prob=_bytes_pool; + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + // The byte was coded under order two, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==2) //coded under order-2 + { + + // Update its count and variables of this context and check for renormalization + + o2_ll_node->freq++; //increment its frequency (rather probability) + + order2_array[o2_cntxt].max_cump++; //total cump + + if(o2_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order2(); //renormalize + + } + else + { + + // Once every paranoid check has been done we are sure that this byte + // did not existed and so we have to create a new node in the linked + // list. Also we have to take care of memory issues. + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + o2_ll_node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + order2_array[o2_cntxt].max_cump++; //total cump + order2_array[o2_cntxt].defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + +} + + +// This functions renormalizes the probabilities at order-2 updating context +// variables. +void ppmc_renormalize_order2(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + // Initialize variables. Defined bytes remain the same. + order2_array[o2_cntxt].max_cump=0; //clear them + + node=order2_array[o2_cntxt].prob; //get pointer to lined lists + + // Divide all the probabilities by 2 and update context variables + while(1) + { + node->freq>>=1; //divide by a factor of 2 + + if(node->freq==0) //don't allow a probability to be 0 + node->freq=1; + + order2_array[o2_cntxt].max_cump+=node->freq; //sum to the total cump + + if(node->next==0) //last element + break; + node=node->next; + } + + + //printf("\nRenormalization, context:%c%c",o2_byte,o1_byte); + +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +// +// If we decode an escape code, we don't modify "o2_ll_node" and thus its +// work of the updating routine to read till the end of the linked list +// (for adding a new element) however if we decode a byte, we save on +// "o2_ll_node" the pointer to its node. (so updating is faster) +void ppmc_decode_order2(void) +{ + unsigned long current_cump, counter; + struct _byte_and_freq *node; + + // Initialize o2_cntxt + + o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte); + + + // First check if current order-2 context is empty + if(order2_array[o2_cntxt].defined_bytes==0) //it's empty + { + byte=-1; //byte not coded, nothing done + return; + } + + + // Get the total cump needed for decoding symbol + ppmc_get_totf_order2(); //total cump needed for decoding + symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=order2_array[o2_cntxt].max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order2(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + node=order2_array[o2_cntxt].prob; //get pointer to linked lists + + while(1) + { + current_cump+=node->freq; //update cump + if(symb_cumpnext; //next element + //we have no need to check for the last symbol, we'll never read further + //the end of the linked lists, before we'll found the last byte. + } + + + //read byte value and probability + + symb_prob=node->freq; //get the probability for updating the state + byte=node->byte; //get byte + o2_ll_node=node; //used for updating + + + // Get the cump of byte + + symb_cump=current_cump-symb_prob; + + // Update coding state + + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Update coded_in_order used for update exclusion + + coded_in_order=2; + + return; + } + +} + + +// This is the routine for updating while decoding. We have to search the byte +// in the linked list, if it's present, update its count, otherwise we have +// hitted the end of the linked list, and there we have to create a new node. +// +// Of course if the byte was matched in order-2 we'll have a pointer to it +// in "o2_ll_node" so we don't need to read the linked list. (we already did +// in decoding) +// +// Another case which we also have to specially deal with, this is the case +// when the context has not been initalized yet. +void ppmc_update_dec_order2(void) +{ + struct _byte_and_freq *node; + + + // Handle the case when the context is not initialized + // This code is the same as the one for the encoding. + + if(order2_array[o2_cntxt].defined_bytes==0) //no byte defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + order2_array[o2_cntxt].defined_bytes=1; + order2_array[o2_cntxt].max_cump=1; + order2_array[o2_cntxt].prob=_bytes_pool; + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + + return; //nothing else to do + } + + + // Current context is initalized, proceed + + if(coded_in_order==2) //check if it was decoded under order-2 + { + + // We can be sure that the pointer "o2_ll_node" points to its entry, and + // it has a non 0 probability (otherwise it couldn't be coded) so just + // update its probability and max_cump + + o2_ll_node->freq++; //the probability of the byte + order2_array[o2_cntxt].max_cump++; //the max_cump + + if(o2_ll_node->freq==255) //check for renormalization + ppmc_renormalize_order2(); + + } + else + { + + // An escape code was decoded under order-2, we have to read till the + // end of the linked list so we can add a new node for this new byte. + + node=order2_array[o2_cntxt].prob; //get pointer to linked list + + while(1) + { + if(node->next==0) //check for the end of the linked list + break; + node=node->next; //next node + } + + + // We reached the end of the linked list, add a new node if possible, + // we are using the same code of "ppmc_update_order2()" with the + // difference that the pointer to the linked list is "node" + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + order2_array[o2_cntxt].max_cump++; //total cump + order2_array[o2_cntxt].defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //we are finished updating + + } + +} + + +// This function returns the probability for the escape codes in the variables +void ppmc_get_escape_prob_order2(void) +{ + // To understand that remember that the escape allocated for the escape code + // is above the current maximum cump and that it has a probability determined + // by the scheme C. + + symb_prob=order2_array[o2_cntxt].defined_bytes; + symb_cump=order2_array[o2_cntxt].max_cump; +} + + + +// ORDER-3 functions +// +// The difference between order-3 and order-3 are just a few, instead of +// keeping a table with the context structures, we keep a hash table with +// pointers to linked lists with the context, so it's only a matter of +// searching current context in the linked list corresponding to its hash +// entry. This is done in "ppmc_get_totf_order3" because that's the first +// routine that both encoding and decoding routines call. + + +// Returns in the variable "total_cump" the current total cump of +// order-3. Must be called while encoding or decoding before anything else +// because it initializes the pointers to the context structure in +// "o3_context" and o3_cntxt. +// +// If the hash entry is not initialized it returns "o3_context"=0 +// If the context is not present in the linked list of context, "o3_context" +// will point to the last element in the linked list. +// If the context is present "o3_context" will point to the context to use. +// One can distinguish the last two by checking the context value of the +// structure, if it's not the same, is the last element. +// +// The routine returns 0 if the hash entry is not initialized or if the +// the context was not present. Otherwise it returns 1, meaning that we +// have to code under this context. +char ppmc_get_totf_order3(void) +{ + struct context *cntxt_node; + + + // First make the hash key for order-3 + + o3_cntxt=ppmc_order3_hash_key(o1_byte,o2_byte,o3_byte); + full_o3_cntxt=(o1_byte)+(o2_byte<<8)+(o3_byte<<16); //order-3 + + + // Now check the hash entry in the table + + if(order3_hasht[o3_cntxt]==0) //if 0, not initialized + { + + o3_context=0; //no hash entry + + return 0; //hash entry not initialized + } + + + // Now read trough the linked list of context searching current one + + cntxt_node=order3_hasht[o3_cntxt]; + + while(1) + { + + if(cntxt_node->order4321==full_o3_cntxt) //compare context + goto ppmc_gtf_cntxt_found; + + if(cntxt_node->next==0) //end of context's linked list + break; + + cntxt_node=cntxt_node->next; //next element + + } + + + // Once there the context was not found + o3_context=cntxt_node; //pointer to last element in the linked list + + return 0; //it was not present + + + // The context is found, so return pointer and cump + +ppmc_gtf_cntxt_found: + + o3_context=cntxt_node; + + // Total cump is current total cump plus the escape for the escape code + + total_cump=o3_context->defined_bytes+o3_context->max_cump; + + return 1; //context found + +} + + +// Codes a byte under order-3 and returns 1. +// Otherwise it returns a 0. +// +// In case the byte is coded under this context, coded_in_order=3. +char ppmc_code_byte_order3(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order3()==0) + return 0; + + + // See if the byte is present and compute its cump at the same time + + node=o3_context->prob; //pointer to first element in the linked list + + symb_cump=0; //the first symbol always has a 0 cump + + + // Now search the byte in the linked list + + do{ + if(node->byte==byte) + goto ppmc_o3_byte_found; //bad thing, I know, anyone has a better idea? + symb_cump+=node->freq; //add the probability of this byte to the cump + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + + // If we reach that point, the byte was not found in the linked list + // so we don't need the cump, we have to output an escape code, whose + // probabilities are know using the context structure in the table. + + // Byte was not present in the linked list, current node is the last one, + // and that's the node needed for creating a new node, save it. + + o3_ll_node=node; + + // Now get the probability and cump of the escape code + + symb_cump=o3_context->max_cump; + symb_prob=o3_context->defined_bytes; + + // Code the escape code + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 0; + + + // That code is executed when the byte is found in the linked list + +ppmc_o3_byte_found: + + + // Everything has been tested, now we can feel free to code the byte, + // the symb_cump is already computed, now get its probability and code + // the byte, also save pointer to this element in the linked lists for + // updating. + + coded_in_order=3; //successfully coded under order-3 + + o3_ll_node=node; //save it for updating + + symb_prob=node->freq; //get the probability of the byte + + // Code it. + + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 1; //byte coded under order-3 +} + + +// This functions update order-3 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +// +// "o3_ll_node" must point to the element to update or the last one, +// based on the "coded_in_order" and checking the pointer of the element we +// can decide what to do. Also "o3_context" must be initialized. +// +// This updating is only for encoding. +void ppmc_update_order3(void) +{ + + // First thing first, check if the hash entry is initialized + + if(order3_hasht[o3_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order3_hasht[o3_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + + + // The byte was coded under order three, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==3) //coded under order-3 + { + + // Update its count and variables of this context and check for renormalization + + o3_ll_node->freq++; //increment its frequency (rather probability) + + o3_context->max_cump++; //total cump + + if(o3_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order3(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o3_context" points to the last element, so we can put the new element. + + if(o3_context->order4321==full_o3_cntxt) //chech if that's the last + { //element or the a context found + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + o3_ll_node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o3_context->max_cump++; //total cump + o3_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o3_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + +// This functions renormalizes the probabilities at order-3 updating context +// variables. +void ppmc_renormalize_order3(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Initialize variables. Defined bytes remain the same. + o3_context->max_cump=0; //clear them + + node=o3_context->prob; //get pointer to lined lists + + // Divide all the probabilities by 2 and update context variables + while(1) + { + node->freq>>=1; //divide by a factor of 2 + + if(node->freq==0) //don't allow a probability to be 0 + node->freq=1; + + o3_context->max_cump+=node->freq; //sum to the total cump + + if(node->next==0) //last element + break; + node=node->next; + } + + +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +// +// If we decode an escape code, we don't modify "o3_ll_node" and thus its +// work of the updating routine to read till the end of the linked list +// (for adding a new element) however if we decode a byte, we save on +// "o3_ll_node" the pointer to its node. (so updating is faster) +void ppmc_decode_order3(void) +{ + unsigned long current_cump, counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order3()==0) + { + byte=-1; + return; + } + + + // Decode current cump + + symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=o3_context->max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order3(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + node=o3_context->prob; //get pointer to linked lists + + while(1) + { + current_cump+=node->freq; //update cump + if(symb_cumpnext; //next element + //we have no need to check for the last symbol, we'll never read further + //the end of the linked lists, before we'll found the last byte. + } + + + //read byte value and probability + + symb_prob=node->freq; //get the probability for updating the state + byte=node->byte; //get byte + o3_ll_node=node; //used for updating + + + // Get the cump of byte + + symb_cump=current_cump-symb_prob; + + // Update coding state + + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Update coded_in_order used for update exclusion + + coded_in_order=3; + + return; + } + +} + + +// This is the routine for updating while decoding. The only difference with +// the routine for coding is that when an escape code was coded, "o3_ll_node" +// is not initialized so we have to read till the end of the linked list. +// Fortunately "o3_context" will be initialized so we don't need to read its +// linked list. +void ppmc_update_dec_order3(void) +{ + struct _byte_and_freq *node; + + // First thing first, check if the hash entry is initialized + + if(order3_hasht[o3_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order3_hasht[o3_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + // The byte was coded under order three, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==3) //coded under order-3 + { + + // Update its count and variables of this context and check for renormalization + + o3_ll_node->freq++; //increment its frequency (rather probability) + + o3_context->max_cump++; //total cump + + if(o3_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order3(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o3_context" points to the last element, so we can put the new element. + + if(o3_context->order4321==full_o3_cntxt) //chech if that's the last + { //element or the a context found + + // Read till the end of the linked list + + node=o3_context->prob; //get pointer to linked list + + while(1) + { + if(node->next==0) //check for the end of the linked list + break; + node=node->next; //next node + } + + // Now add element + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o3_context->max_cump++; //total cump + o3_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o3_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + + +// This function returns the probability for the escape codes in the variables +void ppmc_get_escape_prob_order3(void) +{ + // To understand that remember that the escape allocated for the escape code + // is above the current maximum cump and that it has a probability determined + // by the scheme C. + + symb_prob=o3_context->defined_bytes; + symb_cump=o3_context->max_cump; +} + + + +// ORDER-4 functions +// +// The routines for order-4 are *equal* to those for order-3, there are a few +// changes like different global variables, and different hash keys. +// +// If you want to go to higher orders, you'd use the same code and data +// structures, with the difference of the context bytes (order4321) +// stored in every context's linked list. + + +// Returns in the variable "total_cump" the current total cump of +// order-4. Must be called while encoding or decoding before anything else +// because it initializes the pointers to the context structure in +// "o4_context" and o4_cntxt. +// +// If the hash entry is not initialized it returns "o4_context"=0 +// If the context is not present in the linked list of context, "o4_context" +// will point to the last element in the linked list. +// If the context is present "o4_context" will point to the context to use. +// One can distinguish the last two by checking the context value of the +// structure, if it's not the same, is the last element. +// +// The routine returns 0 if the hash entry is not initialized or if the +// the context was not present. Otherwise it returns 1, meaning that we +// have to code under this context. +char ppmc_get_totf_order4(void) +{ + struct context *cntxt_node; + + + // First make the hash key for order-4 + + o4_cntxt=ppmc_order4_hash_key(o1_byte,o2_byte,o3_byte,o4_byte); + full_o4_cntxt=(o1_byte)+(o2_byte<<8)+(o3_byte<<16)+(o4_byte<<24); //order-4 + + + // Now check the hash entry in the table + + if(order4_hasht[o4_cntxt]==0) //if 0, not initialized + { + + o4_context=0; //no hash entry + + return 0; //hash entry not initialized + } + + + // Now read trough the linked list of context searching current one + + cntxt_node=order4_hasht[o4_cntxt]; + + while(1) + { + + if(cntxt_node->order4321==full_o4_cntxt) //compare context + goto ppmc_gtfo4_cntxt_found; + + if(cntxt_node->next==0) //end of context's linked list + break; + + cntxt_node=cntxt_node->next; //next element + + } + + + // Once there the context was not found + o4_context=cntxt_node; //pointer to last element in the linked list + + return 0; //it was not present + + + // The context is found, so return pointer and cump + +ppmc_gtfo4_cntxt_found: + + o4_context=cntxt_node; + + // Total cump is current total cump plus the escape for the escape code + + total_cump=o4_context->defined_bytes+o4_context->max_cump; + + return 1; //context found + +} + + +// Codes a byte under order-4 and returns 1. +// Otherwise it returns a 0. +// +// In case the byte is coded under this context, coded_in_order=4. +char ppmc_code_byte_order4(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order4()==0) + return 0; + + + // See if the byte is present and compute its cump at the same time + + node=o4_context->prob; //pointer to first element in the linked list + + symb_cump=0; //the first symbol always has a 0 cump + + + // Now search the byte in the linked list + + do{ + if(node->byte==byte) + goto ppmc_o4_byte_found; //bad thing, I know, anyone has a better idea? + symb_cump+=node->freq; //add the probability of this byte to the cump + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + + // If we reach that point, the byte was not found in the linked list + // so we don't need the cump, we have to output an escape code, whose + // probabilities are know using the context structure in the table. + + // Byte was not present in the linked list, current node is the last one, + // and that's the node needed for creating a new node, save it. + + o4_ll_node=node; + + // Now get the probability and cump of the escape code + + symb_cump=o4_context->max_cump; + symb_prob=o4_context->defined_bytes; + + // Code the escape code + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 0; + + + // That code is executed when the byte is found in the linked list + +ppmc_o4_byte_found: + + + // Everything has been tested, now we can feel free to code the byte, + // the symb_cump is already computed, now get its probability and code + // the byte, also save pointer to this element in the linked lists for + // updating. + + coded_in_order=4; //successfully coded under order-4 + + o4_ll_node=node; //save it for updating + + symb_prob=node->freq; //get the probability of the byte + + // Code it. + + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 1; //byte coded under order-4 +} + + +// This functions update order-4 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +// +// "o4_ll_node" must point to the element to update or the last one, +// based on the "coded_in_order" and checking the pointer of the element we +// can decide what to do. Also "o4_context" must be initialized. +// +// This updating is only for encoding. +void ppmc_update_order4(void) +{ + + // First thing first, check if the hash entry is initialized + + if(order4_hasht[o4_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order4_hasht[o4_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + + + // The byte was coded under order three, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==4) //coded under order-4 + { + + // Update its count and variables of this context and check for renormalization + + o4_ll_node->freq++; //increment its frequency (rather probability) + + o4_context->max_cump++; //total cump + + if(o4_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order4(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o4_context" points to the last element, so we can put the new element. + + if(o4_context->order4321==full_o4_cntxt) //chech if that's the last + { //element or the a context found + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + o4_ll_node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o4_context->max_cump++; //total cump + o4_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o4_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + +// This functions renormalizes the probabilities at order-4 updating context +// variables. +void ppmc_renormalize_order4(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Initialize variables. Defined bytes remain the same. + o4_context->max_cump=0; //clear them + + node=o4_context->prob; //get pointer to lined lists + + // Divide all the probabilities by 2 and update context variables + while(1) + { + node->freq>>=1; //divide by a factor of 2 + + if(node->freq==0) //don't allow a probability to be 0 + node->freq=1; + + o4_context->max_cump+=node->freq; //sum to the total cump + + if(node->next==0) //last element + break; + node=node->next; + } + + +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +// +// If we decode an escape code, we don't modify "o4_ll_node" and thus its +// work of the updating routine to read till the end of the linked list +// (for adding a new element) however if we decode a byte, we save on +// "o4_ll_node" the pointer to its node. (so updating is faster) +void ppmc_decode_order4(void) +{ + unsigned long current_cump, counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order4()==0) + { + byte=-1; + return; + } + + + // Decode current cump + + symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=o4_context->max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order4(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + node=o4_context->prob; //get pointer to linked lists + + while(1) + { + current_cump+=node->freq; //update cump + if(symb_cumpnext; //next element + //we have no need to check for the last symbol, we'll never read further + //the end of the linked lists, before we'll found the last byte. + } + + + //read byte value and probability + + symb_prob=node->freq; //get the probability for updating the state + byte=node->byte; //get byte + o4_ll_node=node; //used for updating + + + // Get the cump of byte + + symb_cump=current_cump-symb_prob; + + // Update coding state + + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Update coded_in_order used for update exclusion + + coded_in_order=4; + + return; + } + +} + + +// This is the routine for updating while decoding. The only difference with +// the routine for coding is that when an escape code was coded, "o4_ll_node" +// is not initialized so we have to read till the end of the linked list. +// Fortunately "o4_context" will be initialized so we don't need to read its +// linked list. +void ppmc_update_dec_order4(void) +{ + struct _byte_and_freq *node; + + // First thing first, check if the hash entry is initialized + + if(order4_hasht[o4_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order4_hasht[o4_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + // The byte was coded under order four, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==4) //coded under order-4 + { + + // Update its count and variables of this context and check for renormalization + + o4_ll_node->freq++; //increment its frequency (rather probability) + + o4_context->max_cump++; //total cump + + if(o4_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order4(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o4_context" points to the last element, so we can put the new element. + + if(o4_context->order4321==full_o4_cntxt) //chech if that's the last + { //element or the a context found + + // Read till the end of the linked list + + node=o4_context->prob; //get pointer to linked list + + while(1) + { + if(node->next==0) //check for the end of the linked list + break; + node=node->next; //next node + } + + // Now add element + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o4_context->max_cump++; //total cump + o4_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o4_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + + +// This function returns the probability for the escape codes in the variables +void ppmc_get_escape_prob_order4(void) +{ + // To understand that remember that the escape allocated for the escape code + // is above the current maximum cump and that it has a probability determined + // by the scheme C. + + symb_prob=o4_context->defined_bytes; + symb_cump=o4_context->max_cump; +} + diff --git a/examples/ppmc/ppmc.h b/examples/ppmc/ppmc.h new file mode 100644 index 0000000..e21545e --- /dev/null +++ b/examples/ppmc/ppmc.h @@ -0,0 +1,135 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmc.h" + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Part of the ppmc encoder and decoder. + + This module contains the definitions of different functions and all the + data structures defined by ppmc. Also contains defines. +*/ + +// Definitions + +#define ppmc_order4_hash_size 65536 +#define ppmc_order4_hash_key(k,j,i,l) ( (k)+(j<<8)+(i<<1)+(l<<9) )& ppmc_order4_hash_size-1 +#define ppmc_order3_hash_size 65536 +#define ppmc_order3_hash_key(k,j,i) ((k)+(j<<7)+(i<<11)) & ppmc_order3_hash_size-1 +#define ppmc_order2_hash_key(k,j) ((k)+(j<<8)) +#define _bytes_pool_elements 125000 //this is used the first time + //that we allocate memory, that's + //the number of entries +#define _bytes_pool_elements_inc 125000 //if we need to alloc again, this + //is the number of entries to get +#define _context_pool_elements 50000 +#define _context_pool_elements_inc 50000 + +#define _mempool_max_index 1000 //the number of entries in the array with + //pointers + + +// Data structures + +// This structure contains a single element of a linked lists which contains +// the probability distribution of a given order. This structure takes 6 bytes. +struct _byte_and_freq{ +unsigned char byte; //the byte itself +unsigned char freq; //and the frequency of it +struct _byte_and_freq *next; //pointer to next element in linked list or 0 +}; + + +// This structure is used for both order-3 and order-4. It takes 20 bytes, +// and it can still hold another byte more. (only 19 being used) +// Order 2-1-0-(-1) use different structures for a faster accessing. +struct context{ +struct context *next; //next context in the hash entry +unsigned long order4321; //order-4-3-2-1 (or order-3-2-1 for order-3) +struct _byte_and_freq *prob; //pointer to linked lists containing probability distribution +unsigned int max_cump; //maximum cumulative probability (can't exceed (2^16)-1 ) +unsigned int defined_bytes; //the number of bytes in this context +}; + +// That's the same but for order-2 where there's no hash collisions. +struct context_o2{ +struct _byte_and_freq *prob; //pointer to linked lists containing probability distribution +unsigned int max_cump; //maximum cumulative probability (can't exceed (2^16)-1 ) +unsigned int defined_bytes; //the number of bytes in this context +}; + + +// Declaration of functions + + +// Functions for initializing +void ppmc_alloc_memory(void); +void ppmc_initialize_contexts(void); +void ppmc_encoder_initialize(void); +void ppmc_decoder_initialize(void); +void ppmc_free_memory(void); +void ppmc_flush_mem_enc(void); +void ppmc_flush_mem_dec(void); + +// Functions for order-(-1) +void ppmc_get_prob_ordern1(void); +unsigned long ppmc_get_symbol_ordern1(void); +void ppmc_get_totf_ordern1(void); +void ppmc_renormalize_order1(void); + +// Functions for order-0 +void ppmc_get_totf_order0(void); +char ppmc_code_byte_order0(void); +void ppmc_update_order0(void); +void ppmc_renormalize_order0(void); +void ppmc_decode_order0(void); +void ppmc_get_escape_prob_order0(void); +void ppmc_get_prob_order0(void); + +// Functions for order-1 +void ppmc_get_totf_order1(void); +char ppmc_code_byte_order1(void); +void ppmc_update_order1(void); +void ppmc_renormalize_order1(void); +void ppmc_decode_order1(void); +void ppmc_get_escape_prob_order1(void); +void ppmc_get_prob_order1(void); + + +// Functions for order-2 +void ppmc_get_totf_order2(void); +char ppmc_code_byte_order2(void); +void ppmc_update_order2(void); +void ppmc_renormalize_order2(void); +void ppmc_decode_order2(void); +void ppmc_update_dec_order2(void); +void ppmc_get_escape_prob_order2(void); +void ppmc_get_prob_order2(void); + + +// Functions for order-3 +char ppmc_get_totf_order3(void); +char ppmc_code_byte_order3(void); +void ppmc_update_order3(void); +void ppmc_renormalize_order3(void); +void ppmc_decode_order3(void); +void ppmc_update_dec_order3(void); +void ppmc_get_escape_prob_order3(void); +void ppmc_get_prob_order3(void); + + +// Functions for order-4 +char ppmc_get_totf_order4(void); +char ppmc_code_byte_order4(void); +void ppmc_update_order4(void); +void ppmc_renormalize_order4(void); +void ppmc_decode_order4(void); +void ppmc_update_dec_order4(void); +void ppmc_get_escape_prob_order4(void); +void ppmc_get_prob_order4(void); + + + diff --git a/examples/ppmc/ppmcdata.c b/examples/ppmc/ppmcdata.c new file mode 100644 index 0000000..bea0926 --- /dev/null +++ b/examples/ppmc/ppmcdata.c @@ -0,0 +1,119 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmcdata.c" +Email: arturo@arturocampos.com +Web: http://www.arturocampos.com + +Part of the ppmc encoder and decoder. + +This module contains global data. +*/ + +#include "ppmc.h" //defines +#include "range.h" + +// Order-4 uses a hash table which points to the start of a linked list with +// the different context, which has the cump, the number of different symbols +// and a pointer to the linked list with the bytes and frequencies. +// Order-3 is almost the same, both take 262144 bytes. +struct context *order4_hasht[ppmc_order4_hash_size]; + +struct context *order3_hasht[ppmc_order3_hash_size]; + + +// The array for order-2 is different, as we do directly hashing, and thus +// we have no need to do the stuff of linked lists for the context itself, +// so it contains the context used. This takes 1310720 bytes. +struct context_o2 order2_array[65536]; + + +// Those are the multiple arrays for order-1. It takes 65536 bytes. +unsigned char order1_array[256][256]; +unsigned int order1_defined_bytes_array[256]; //the defined bytes in every context +unsigned int order1_max_cump_array[256]; //max cump of every context + + +// This is the array for order-0. It takes 256 bytes. +unsigned char order0_array[256]; +unsigned int order0_defined_bytes; +unsigned int order0_max_cump; + + +// No need of variables for order-(-1), because it's fixed. + + +// Those are the pointers and variables used for managing the mem pool for +// both context, and bytes and frequencies. +struct _byte_and_freq *_bytes_pool, //pointer to pool containing linked + //lists with bytes and frequencies + *_bytes_pool_max; //the maximum of this buffer + struct context *_context_pool; //pointer to pool containing contexts + struct context *_context_pool_max; //the same as with _bytes_pool + + unsigned long _bytes_pool_index; //index in array of pointers + unsigned long _context_pool_index; + + //the following is an array keeping pointers to different buffers. A new + //buffer is allocated when the current one is full, so we always have a + //buffer for linked lists. (without allocating a buffer for every element) + struct _byte_and_freq *_bytes_pool_array[_mempool_max_index]; + struct context *_context_pool_array[_mempool_max_index]; + + char ppmc_out_of_memory; //0 if we have enough memory, 1 instead, any + //routine that needs to allocate memory must + //quit if that's 1. + + + // Variables which contain current byte to code and order + unsigned long byte, //current byte to code + o1_byte, //order-1 byte + o2_byte, //order-2 byte + o3_byte, //order-3 byte + o4_byte; //order-4 byte + +unsigned long o2_cntxt; //used in the hash key of order-2 +unsigned long o3_cntxt; //use as hash key for order-3 +unsigned long o4_cntxt; //use as hash key for order-4 +unsigned long full_o3_cntxt; //o1_byte, o2_byte and o3_byte together +unsigned long full_o4_cntxt; //order-4-3-2-1 + +unsigned long coded_in_order; //in which order the last byte was coded +//it's for update exclusion +//also used for decoding + +// Variables used for coding + +unsigned long +total_cump, //the total cumulative probability + symb_cump, //the symbol cumulative probability + symb_prob; //the symbol frequency + +rangecoder rc_coder; //state of range coder +rangecoder rc_decoder; //state of range decoder + +// File handles + +FILE *file_input, //file to code + *file_output; //file where the coded data is placed + + +// Pointers to linked lists and context structures used for faster updating +// or creation of new nodes, because instead of reading again all the linked +// list till the end (in the case of creation) we have a pointer to the last +// element. In the case that a byte was present in the linked lists but it +// had a 0 count, we just have to update its probability. And in the case +// that it already was present and we coded it under that context or a lower +// one, we just have to update its probability. + + +struct _byte_and_freq *o2_ll_node; //pointer to linked lists under order-2 +//where does it points depends in which +//order the byte was coded. +struct _byte_and_freq *o3_ll_node; //the same but for order-3 +struct _byte_and_freq *o4_ll_node; + +struct context *o3_context; //pointer to current order-3 context +struct context *o4_context; //pointer to current order-3 context diff --git a/examples/ppmc/ppmcdata.h b/examples/ppmc/ppmcdata.h new file mode 100644 index 0000000..f4bddc1 --- /dev/null +++ b/examples/ppmc/ppmcdata.h @@ -0,0 +1,117 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmcdata.h" + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Part of the ppmc encoder and decoder. + + This module contains externs definition for global data. +*/ + +#include "ppmc.h" + +// Order-4 uses a hash table which points to the start of a linked list with +// the different context, which has the cump, the number of different symbols +// and a pointer to the linked list with the bytes and frequencies. +// Order-3 is almost the same, both take 262144 bytes. +extern struct context *order4_hasht[]; + +extern struct context *order3_hasht[]; + + +// The array for order-2 is different, as we do directly hashing, and thus +// we have no need to do the stuff of linked lists for the context itself, +// so it contains the context used. This takes 1310720 bytes. +extern struct context_o2 order2_array[]; + + +// Those are the multiple arrays for order-1. It takes 65536 bytes. +extern unsigned char order1_array[256][256]; +extern unsigned int order1_defined_bytes_array[]; //the defined bytes in every context +extern unsigned int order1_max_cump_array[]; //max cump of every context + + +// This is the array for order-0. It takes 256 bytes. +extern unsigned char order0_array[]; +extern unsigned int order0_defined_bytes; +extern unsigned int order0_max_cump; + + +// Those are the pointers and variables used for managing the mem pool for +// both context, and bytes and frequencies. +extern struct _byte_and_freq *_bytes_pool, //pointer to pool containing linked + //lists with bytes and frequencies + *_bytes_pool_max; //the maximum of this buffer +extern struct context *_context_pool; //pointer to pool containing contexts +extern struct context *_context_pool_max; //the same as with _bytes_pool + +extern unsigned long _bytes_pool_index; //index in array of pointers +extern unsigned long _context_pool_index; + +//the following is an array keeping pointers to different buffers. A new +//buffer is allocated when the current one is full, so we always have a +//buffer for linked lists. (without allocating a buffer for every element) +extern struct _byte_and_freq *_bytes_pool_array[_mempool_max_index]; +extern struct context *_context_pool_array[_mempool_max_index]; + +extern char ppmc_out_of_memory; //0 if we have enough memory, 1 instead, any + //routine that needs to allocate memory must + //quit if that's 1. + + + +// Variables which contain current byte to code and order +extern unsigned long //they are only bytes + byte, //current byte to code + o1_byte, //order-1 byte + o2_byte, //order-2 byte + o3_byte, //order-3 byte + o4_byte; //order-4 byte + +extern unsigned long o2_cntxt; //used in the hash key of order-2 +extern unsigned long o3_cntxt; //use as hash key for order-3 +unsigned long o4_cntxt; //use as hash key for order-4 +extern unsigned long full_o3_cntxt; //o1_byte, o2_byte and o3_byte together +extern unsigned long full_o4_cntxt; //order-4-3-2-1 + +extern unsigned long coded_in_order; //in which order the last byte was coded + //it's for update exclusion + //also used for decoding +// Variables used for coding + +extern unsigned long //no need for negative values + total_cump, //the total cumulative probability + symb_cump, //the symbol cumulative probability + symb_prob; //the symbol frequency + +extern rangecoder rc_coder; //state of range coder +extern rangecoder rc_decoder; //state of range decoder + +// File handles + + FILE *file_input, //file to code + *file_output; //file where the coded data is placed + + + +// Pointers to linked lists and context structures used for faster updating +// or creation of new nodes, because instead of reading again all the linked +// list till the end (in the case of creation) we have a pointer to the last +// element. In the case that a byte was present in the linked lists but it +// had a 0 count, we just have to update its probability. And in the case +// that it already was present and we coded it under that context or a lower +// one, we just have to update its probability. + + +extern struct _byte_and_freq *o2_ll_node;//pointer to linked lists under order-2 + //where does it points depends in which + //order the byte was coded. +extern struct _byte_and_freq *o3_ll_node; //the same but for order-3 +extern struct _byte_and_freq *o4_ll_node; + +extern struct context *o3_context; //pointer to current order-3 context +extern struct context *o4_context; //pointer to current order-3 context diff --git a/examples/ppmc/ppmcmain.c b/examples/ppmc/ppmcmain.c new file mode 100644 index 0000000..7a99d12 --- /dev/null +++ b/examples/ppmc/ppmcmain.c @@ -0,0 +1,176 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmcmain.c" +Email: arturo@arturocampos.com +Web: http://www.arturocampos.com + +Part of the ppmc encoder only. + +This module is the main module and calls the different modules to do +the encoding of a file. When done prints bpb and kbyps. +*/ + + +#include +#include +#include "range.h" //the range coder functions and data +#include "ppmcdata.h" + + +long filesize(FILE *stream); + + + +//Main +int main (char argc, char *argv[]) +{ + unsigned long counter, //temporal counter for loops like for or while + counter2, //another temporal counter for sub loops + size_file_input; //the size of the input file + + + // Print title, version and copyright + printf("PPMC using range coder. (without exclusion)\n"); + printf("Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.\n"); + printf("Permission is granted to make verbatim copies of this program for private\n"); + printf("use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.\n"); + + // Check for correct number of parameters + if(argc!=3) + { + printf("Bad number of arguments.\n"); + exit(1); + } + + + // Try to open input and output files + if((file_input=fopen(argv[1],"r+b"))==NULL) + { + printf("Couldn't open %s.\n",argv[1]); + exit(1); + } + + if((file_output=fopen(argv[2],"w+b"))==NULL) + { + printf("Couldn't create %s.\n",argv[2]); + exit(1); + } + + + // Check input file length and not accept 0 length files + size_file_input=filesize(file_input); + + if(size_file_input<5) + { + printf("Can't work with files below than 5 bytes!"); + exit(1); + } + + + // First output file length + fwrite(&size_file_input,1,4,file_output); //input length + + + // Initialize ppmc encoder + ppmc_alloc_memory(); //get memory + ppmc_initialize_contexts(); //initialize model + ppmc_encoder_initialize(); + + // Initialize range coder + range_coder_init(&rc_coder,file_output); + + + // Start main loop which codes the file + while((byte=fgetc(file_input))!=EOF) + { + + // Try to code current byte under order-4 if possible then go to lower orders + if(ppmc_code_byte_order4()==0) + if(ppmc_code_byte_order3()==0) + if(ppmc_code_byte_order2()==0) + if(ppmc_code_byte_order1()==0) + if(ppmc_code_byte_order0()==0) //else try to code under order-0 + { + // Code under order-(-1) + ppmc_get_prob_ordern1(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + coded_in_order=0; //update all the tables (unless order-(-1)) + } + + + // Now do update exclusion + + switch(coded_in_order) + { + case 0: ppmc_update_order0(); //update only order-0 + case 1: ppmc_update_order1(); //update order-0 and order-1 + case 2: ppmc_update_order2(); //update order-2 1 and 0... + case 3: ppmc_update_order3(); + case 4: ppmc_update_order4(); + default: break; + }; + + + + // Update order variables + + o4_byte=o3_byte; + o3_byte=o2_byte; + o2_byte=o1_byte; + o1_byte=byte; //current one is next time order-1 + + + // Check if we run out of memory, in that case, flush the encoder + + if(ppmc_out_of_memory==1) + { + printf("Flushing memory! Output file might be not decodable.\n"); + ppmc_flush_mem_enc(); + } + + + } + + + // Flush range coder + range_coder_flush(&rc_coder); + + // Free memory + ppmc_free_memory(); + + + // Print bpb and kbyps + printf("%s at %f bpb.\n",argv[1],((float)filesize(file_output)/(float)size_file_input)*(float)8); + + + // Close file handles + fclose(file_input); + fclose(file_output); + + + + // Nicely exit + return 0; +} + + +// Routines not used by ppmc but rather by main. +// Not including the range coder. + + +// Returns the file size of a given file. +long filesize(FILE *stream) +{ + long curpos, length; + + curpos = ftell(stream); + fseek(stream, 0L, SEEK_END); + length = ftell(stream); + fseek(stream, curpos, SEEK_SET); + return length; +} + + diff --git a/examples/ppmc/range.c b/examples/ppmc/range.c new file mode 100644 index 0000000..168ddc2 --- /dev/null +++ b/examples/ppmc/range.c @@ -0,0 +1,212 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "range.c" +Email: arturo@arturocampos.com +Web: http://www.arturocampos.com + +This module contains the routines of both the range coder and decoder. + +The range coder works internally in 32 bits, and uses bytes as symbols. +Also the end of message symbol is used. So z=257. + +Both input and output use rc_file as the file stream. Of course we can't +code and decode at the same time. All the input or output comes from the +same file, no matter what range coder structure are we using. The modules +here provided don't manage the io except for reading and writing, they +don't open nor close the files. The reading and writing is done via +putc and getc. +*/ + +#include "range.h" +#include + +/* + Inits the range coder state. Must be called before encoding any symbol. + It uses a magic number 0xB3 as the first byte outputted. + -rangecoder *rc, the range coder to be used. + + Shoulde be called like that: + range_coder_init(&o0_rc_state,file_output); + */ +void range_coder_init(rangecoder *rc, FILE *stream) +{ + rc_file=stream; + rc->low=0; //define state + rc->range=0x80000000; + rc->byte_buffer=0xB3; //magic number + rc->help=0; //temp value +} + + +/* + Encodes a symbol. + -rangecoder *rc, the range coder to be used. + -unsigned long tot_f, the maximum cumulative frequency + -unsigned long lt_f, the cumulative probabilty of the symbol + -unsigned long sy_f, the probability of the symbol + */ +void range_coder_encode(rangecoder *rc,unsigned long tot_f, unsigned long lt_f,unsigned long sy_f) +{ + unsigned long temp, r; + + range_coder_renormalize(rc); //&rc? + + r=rc->range/tot_f; + temp=r*lt_f; + if(lt_f+sy_frange=r*sy_f; + else + rc->range-=temp; + rc->low+=temp; +} + +/* + Renormalizes the state when coding. + -rangecoder *rc, the range coder to be used. + */ + +void range_coder_renormalize(rangecoder *rc) +{ + while(rc->range<=(unsigned long)0x00800000) + { + if(rc->low<(unsigned long)0x7F800000) + { + putc(rc->byte_buffer,rc_file); + for(;rc->help;rc->help--) + putc(0xFF,rc_file); + rc->byte_buffer=(unsigned char)(rc->low>>23); + } + else + { + if(rc->low&(unsigned long)0x80000000) + { + putc(rc->byte_buffer+1,rc_file); + for(;rc->help;rc->help--) + putc(0x00,rc_file); + rc->byte_buffer=(unsigned char)(rc->low>>23); + } + else + rc->help++; + } + rc->range<<=8; + rc->low=(rc->low<<8)&(unsigned long)(0x7FFFFFFF); + } +} + + +/* + Flushes the encoder. Must be called when the coding is done. + -rangecoder *rc, the range coder to be used. + + Shoulde be called like that: + range_coder_flush(&o0_rc_state); + */ +void range_coder_flush(rangecoder *rc) +{ + unsigned long tmp; + + range_coder_renormalize(rc); + tmp = rc->low >> 23; + if (tmp > 0xff) + { + putc(rc->byte_buffer+1,rc_file); + for(; rc->help; rc->help--) + putc(0,rc_file); + } + else + { + putc(rc->byte_buffer,rc_file); + for(; rc->help; rc->help--) + putc(0xff,rc_file); + } + + putc(tmp & 0xff,rc_file); + putc((tmp = rc->low >> (23-8)) & 0xff,rc_file); +} + + +/* + Inits the range decoder state. Also checks for the magic number, and + quits in case it isn't the first, so be careful. + -rangecoder *rc, the range coder to be used. + */ +void range_decoder_init(rangecoder *rc, FILE *stream) +{ + unsigned int _rd_c; + + rc_file=stream; + if((_rd_c=getc(rc_file))!=0xB3) + { + printf("\nThis is not range coded data. Magic number not found. Exiting."); + exit(1); + } + rc->byte_buffer=getc(rc_file); + rc->low=rc->byte_buffer>>1; + rc->range=0x80; +} + + +/* + Decode a symbol, get its cumulative probability. +Input: +-rangecoder *rc, the range coder to be used. +-unsigned long tot_f, the maximum cumulative probability +Output: +-unsigned long, cumulative probability of the current symbol +Should be called like that: +current_cump=range_decoder_decode(&o0_rc_state,o0_tot_f); +*/ +unsigned long range_decoder_decode(rangecoder *rc, unsigned long tot_f) +{ + unsigned long temp; + + range_decoder_renormalize(rc); + rc->help=rc->range/tot_f; + temp=rc->low/rc->help; + if(temp>=tot_f) + return tot_f-1; + else + return temp; +} + + +/* + Updates the state so next symbol can be decoded. +Input: +-rangecoder *rc, the range coder to be used. +-unsigned long tot_f, the maximum cumulative probability +-unsigned long lt_f, the cumulative probabilty of the symbol +-unsigned long sy_f, the probability of the symbol + +*/ +void range_decoder_update(rangecoder *rc, unsigned long tot_f, unsigned long lt_f,unsigned long sy_f) +{ + unsigned long temp; + + temp=rc->help*lt_f; + rc->low-=temp; + if(lt_f+sy_frange=rc->help*sy_f; + else + rc->range-=temp; +} + + +/* + Renormalizes the state while decoding. + -rangecoder *rc, the range coder to be used. + */ +void range_decoder_renormalize(rangecoder *rc) +{ + while(rc->range<=0x00800000) + { + rc->low=(rc->low<<8)|((rc->byte_buffer<<7)&0xFF); + rc->byte_buffer=getc(rc_file); + rc->low |= rc->byte_buffer >> (1); + rc->range<<=8; + } +} + diff --git a/examples/ppmc/range.h b/examples/ppmc/range.h new file mode 100644 index 0000000..04b6fcc --- /dev/null +++ b/examples/ppmc/range.h @@ -0,0 +1,39 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "range.h" + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Declarations for the coder. +*/ + +#include + +typedef struct{ + unsigned long low, range, help; + unsigned char byte_buffer; +}rangecoder; + +FILE *rc_file; + +void range_coder_init(rangecoder *rc, FILE *stream); //coding routines +void range_coder_encode(rangecoder *rc,unsigned long tot_f, unsigned long lt_f,unsigned long sy_f); +void range_coder_renormalize(rangecoder *rc); +void range_coder_flush(rangecoder *rc); +void range_decoder_init(rangecoder *rc, FILE *stream);//decoding routines +unsigned long range_decoder_decode(rangecoder *rc, unsigned long tot_f); +void range_decoder_update(rangecoder *rc, unsigned long tot_f, unsigned long lt_f,unsigned long sy_f); +void range_decoder_renormalize(rangecoder *rc); + + +typedef unsigned long code_value; +#define CODE_BITS 32 +#define Top_value ((code_value)1 << (CODE_BITS-1)) +#define SHIFT_BITS (CODE_BITS - 9) +#define EXTRA_BITS ((CODE_BITS-2) % 8 + 1) +#define Bottom_value (Top_value >> 8) +#define outbyte(cod,x) putc(x,stdout) +#define inbyte(cod) getc(stdin) diff --git a/examples/ppmc/test.sh b/examples/ppmc/test.sh new file mode 100755 index 0000000..547626a --- /dev/null +++ b/examples/ppmc/test.sh @@ -0,0 +1,6 @@ +#!/bin/bash +time ./ppmc "$1" "$1".ppmc +time ./exclusion/ppmc "$1" "$1".ppmc-e +time gzip -c "$1" "$2" > "$1".gz +time bzip2 -c "$1" "$2" > "$1".bz2 +ls -l "$1".* diff --git a/examples/ppmc/unppmc.c b/examples/ppmc/unppmc.c new file mode 100644 index 0000000..501f0c4 --- /dev/null +++ b/examples/ppmc/unppmc.c @@ -0,0 +1,204 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this program for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "unppmc.c" +Email: arturo@arturocampos.com +Web: http://www.arturocampos.com + + +Part of the ppmc decoder. + +This module is the main module and calls the different modules to do +the decoding of a file. When done prints kbyps. +*/ + + +// Bibliotecas necesarias +#include +#include +#include "range.h" //the range coder functions and data +#include "ppmcdata.h" + + +// Declaracion de funciones del ppmcmain.c +long filesize(FILE *stream); + + + + +//Main +void main (int argc, char *argv[]) +{ + unsigned long counter, //temporal counter for loops like for or while + counter2, //another temporal counter for sub loops + size_file_output, //the size of the output file + main_counter; //used in main + char expected_flush=0; //used for checking flushing which can't be done + + + // Print title, version and copyright + printf("UNPPMC using range coder.\n"); + printf("Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.\n"); + printf("Permission is granted to make verbatim copies of this program for private\n"); + printf("use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.\n"); + + + + // Check for correct number of parameters + if(argc!=3) + { + printf("Bad number of arguments.\n"); + exit(1); + } + + + // Try to open input and output files + if((file_input=fopen(argv[1],"r+b"))==NULL) + { + printf("Couldn't open %s.\n",argv[1]); + exit(1); + } + + if((file_output=fopen(argv[2],"w+b"))==NULL) + { + printf("Couldn't create %s.\n",argv[2]); + exit(1); + } + + + // Get output length + fread(&size_file_output,1,4,file_input); + + + // Initialize ppmc decoder + ppmc_alloc_memory(); + ppmc_initialize_contexts(); + ppmc_decoder_initialize(); + + + + // Initialize decoder + range_decoder_init(&rc_decoder,file_input); + + + // Start main loop which decodes the file + main_counter=size_file_output-4; //take in account the bytes already written + expected_flush=0; //we don't expect a flush yet + + while(main_counter!=0) + { + + // Try to decode current byte in order-4 if possible, else in lower ones + ppmc_decode_order4(); + if(byte==-1) + ppmc_decode_order3(); + if(byte==-1) + { + ppmc_decode_order2(); + if(byte==-1) + { + ppmc_decode_order1(); + if(byte==-1) + { + ppmc_decode_order0(); + if(byte==-1) //check if it was an escape code + { + // Decode in order-(-1) + ppmc_get_totf_ordern1(); + symb_cump=range_decoder_decode(&rc_decoder,total_cump); + byte=ppmc_get_symbol_ordern1(); + ppmc_get_prob_ordern1(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + coded_in_order=0; //update all orders + + // Now see if it's the code of flushing + + if(symb_cump==256) + { + printf("Flushing.\n"); + ppmc_flush_mem_dec(); + expected_flush=0; + continue; //do not output byte nor update + } + + } + } + } + } + + // Output byte and update model + + fputc(byte,file_output); + + switch(coded_in_order) //update exclusion + { + case 0: ppmc_update_order0(); //update only order-0 + case 1: ppmc_update_order1(); //update order-0 and order-1 + case 2: ppmc_update_dec_order2(); //update order-0 1 and 2 + case 3: ppmc_update_dec_order3(); + case 4: ppmc_update_dec_order4(); + default: break; + }; + + + // Check if flushing has to be done and has not been done. + // This is optional, in case you limit the memory usage, you don't + // need to include this + + if(expected_flush==1) // If flushing didn't happen, we can't decode + { + printf("Can't decompress file. Not enough memory.\nTry in a machine with more memory.\n"); + exit(1); + } + if(ppmc_out_of_memory==1) + { + expected_flush=1; // Next code must be a flush code, otherwise we don't + // have enough memory, and therefore we can't decode + } + + + // Update order variables + + o4_byte=o3_byte; + o3_byte=o2_byte; + o2_byte=o1_byte; + o1_byte=byte; //current one, is next time order-1 + + // Byte decoded and model updated, loop + main_counter--; + + + } + + + ppmc_free_memory(); + + // Close file handles and free memory + fclose(file_input); + fclose(file_output); + + + // Nicely exit + exit(0); +} + + +// Ruotines not used by ppmc but rather by main. +// Not including the range coder. + + +// Returns the file size of a given file. +long filesize(FILE *stream) +{ + long curpos, length; + + curpos = ftell(stream); + fseek(stream, 0L, SEEK_END); + length = ftell(stream); + fseek(stream, curpos, SEEK_SET); + return length; +} + + diff --git a/examples/rle/Makefile b/examples/rle/Makefile new file mode 100644 index 0000000..65caa33 --- /dev/null +++ b/examples/rle/Makefile @@ -0,0 +1,74 @@ +# Makefile de ejemplo para C++ +# +# Creado: jue abr 15 15:34:19 ART 2004 +# +# Copyleft 2004 - Leandro Lucarella, Bajo licencia GPL [http://www.gnu.org/] +# + +# CONFIGURACION +################ + +# Nombre del ejecutable. +target = rle + +# Extensión de los archivos a compilar (c para C, cpp o cc o cxx para C++). +extension = c + +# Archivos con el código fuente que componen el ejecutable. Si no se especifica, +# toma todos los archivos con la extensión mencionada. Para especificar hay que +# descomentar la línea (quitarle el '#' del principio). +# NOTA: No poner cabeceras (.h). +#fuentes = entrada.cpp + +# Si es un programa GTK+, descomentá (quitale el '#' a) la siguiente línea. +#gtk = si + + +# CONFIGURACION "AVANZADA" +########################### + +# Opciones para el compilador C. +#CFLAGS = -Wall -ggdb -ansi -pedantic -DDEBUG +CFLAGS = -Wall -O3 -ansi -pedantic -DNDEBUG -g + +# Opciones para el compilador C++. +#CXXFLAGS = $(CFLAGS) -fno-inline +CXXFLAGS = $(CFLAGS) + + +# VARIABLES CALCULADAS A PARTIR DE LA CONFIGURACION +#################################################### + +# Agrego flags y libs de GTK+ de ser necesario. +ifdef gtk +CFLAGS += $(shell pkg-config --cflags gtk+-2.0) +CXXFLAGS += $(shell pkg-config --cflags gtk+-2.0) +LDFLAGS += $(shell pkg-config --libs gtk+-2.0) +endif + +# Uso enlazador de c++ si es código no C. +ifeq ($(extension), c) +enlazador = $(CC) +else +enlazador = $(CXX) +endif + +# Si no especifica archivos, tomo todos. +fuentes ?= $(wildcard *.$(extension)) + + +# REGLAS +######### + +.PHONY: all clean + +all: $(target) + +o_files = $(patsubst %.$(extension),%.o,$(fuentes)) + +$(target): $(o_files) + $(enlazador) $(LDFLAGS) $(o_files) $(LOADLIBES) $(LDLIBS) -o $(target) + +clean: + @$(RM) -fv *.o $(target) + diff --git a/examples/rle/NO_COMPROBADO b/examples/rle/NO_COMPROBADO new file mode 100644 index 0000000..e69de29 diff --git a/examples/rle/rle.c b/examples/rle/rle.c new file mode 100644 index 0000000..b2ff114 --- /dev/null +++ b/examples/rle/rle.c @@ -0,0 +1,225 @@ + +#include +#include +#include + +#define SIZE 10 +#define BIT_SIZE 4 +#define MIN_LEN 2 + +/* Insertar un char al final */ +void push_end(char *s, char c); +int esta_en_memoria(char *memoria, char *pal, int *len); + +void out_ini(char *f); +void out_bit(char b); +void out_char(char c); +void out_data(char len, char pos); +void out_end(); + +unsigned int out_buffer; +FILE *out; +int bit_count; + +int main(int argc, char *argv[]) +{ + int mc, ic; /* Contadores */ + char memoria[SIZE]; + char inspeccion[SIZE]; + char c, i; /* caracter leido */ + FILE *fp; + + /* inicio todo lindo */ + memset(memoria, 1, SIZE); + memset(inspeccion, 1, SIZE); + mc = ic = 0; + + fp = fopen(argv[1], "rt"); + + /* llego INSPECCION */ + while (((c = fgetc(fp)) != EOF) && (ic < SIZE)) { + push_end(inspeccion, c); + ic++; + } + + ungetc(c, fp); + + out_ini(argv[2]); + /* Comienza el juevo */ + while (c != EOF) { + int pos, len; + /* Busco */ + pos = esta_en_memoria(memoria, inspeccion, &len); + if (pos != -1) { + /* La cadena se repite! */ + printf("[0,%d,%d]\n", pos, len); + out_bit(0); + out_data(len, pos); + + /* Tengo que meter caracteres */ + for(i=0; i 0) { + int pos, len; + /* busco */ + pos = esta_en_memoria(memoria, inspeccion, &len); + if (pos != -1) { + /* La cadena se repite! */ + printf("[0,%d,%d]\n", pos, len); + out_bit(0); + out_data(len, pos); + + /* Tengo que meter caracteres */ + for(i=0; ilen_max) { + pos_max = i-k+1; + len_max = k; + } + + j++; + } else { + j = 0; + k = 0; + } + i++; + } + + if (len_max >= MIN_LEN) { + (*len) = len_max; + return pos_max; + } + return -1; +} + +void out_bit(char b) +{ + char c; + if (bit_count+1 >= 32) { + /* Tengo que hacer lugar! , saco 1 byte*/ + c = (out_buffer >> (bit_count-8)); + bit_count -= 8; + /* lo grabo en el archivo */ + fwrite(&c, 1, 1, out); + } + + bit_count++; + out_buffer <<= 1; + out_buffer |= (0x1&b); +} + +void out_char(char c) +{ + char cc; + if (bit_count+8 >= 32) { + /* Tengo que hacer lugar! , saco 1 byte */ + cc = (out_buffer >> (bit_count-8)); + bit_count -= 8; + /* lo grabo en el archivo */ + fwrite(&cc, 1, 1, out); + } + + bit_count+=8; + out_buffer <<= 8; + out_buffer |= c; +} + +void out_data(char len, char pos) +{ + char cc; + char b; + while (bit_count+BIT_SIZE*2 >= 32) { + /* Tengo que hacer lugar! , saco 1 byte */ + cc = (out_buffer >> (bit_count-8)); + bit_count -= 8; + /* lo grabo en el archivo */ + fwrite(&cc, 1, 1, out); + } + + b = 0x0; + b = ((0x3|pos)<<3) & ((0x3|len)); + bit_count+=BIT_SIZE*2; + out_buffer <<= BIT_SIZE*2; + out_buffer |= b; +} + +void out_ini(char *f) +{ + out = fopen(f, "wb"); + bit_count = 0; + out_buffer = 0; +} + +void out_end() +{ + char cc; + /* Saco lo que falte */ + while (bit_count > 8) { + cc = (out_buffer >> (bit_count-8)); + bit_count -= 8; + fwrite(&cc, 1, 1, out); + } + + if (bit_count > 0) { + cc = (out_buffer >> bit_count); + fwrite(&cc, 1, 1, out); + } + fclose(out); +} +