--- /dev/null
+/**************************************************************
+ lzhuf.c
+ written by Haruyasu Yoshizaki 1988/11/20
+ some minor changes 1989/04/06
+ comments translated by Haruhiko Okumura 1989/04/07
+ getbit and getbyte modified 1990/03/23 by Paul Edwards
+ so that they would work on machines where integers are
+ not necessarily 16 bits (although ANSI guarantees a
+ minimum of 16). This program has compiled and run with
+ no errors under Turbo C 2.0, Power C, and SAS/C 4.5
+ (running on an IBM mainframe under MVS/XA 2.2). Could
+ people please use YYYY/MM/DD date format so that everyone
+ in the world can know what format the date is in?
+ external storage of filesize changed 1990/04/18 by Paul Edwards to
+ Intel's "little endian" rather than a machine-dependant style so
+ that files produced on one machine with lzhuf can be decoded on
+ any other. "little endian" style was chosen since lzhuf
+ originated on PC's, and therefore they should dictate the
+ standard.
+ initialization of something predicting spaces changed 1990/04/22 by
+ Paul Edwards so that when the compressed file is taken somewhere
+ else, it will decode properly, without changing ascii spaces to
+ ebcdic spaces. This was done by changing the ' ' (space literal)
+ to 0x20 (which is the far most likely character to occur, if you
+ don't know what environment it will be running on.
+**************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+FILE *infile, *outfile;
+static unsigned long int textsize = 0, codesize = 0, printcount = 0;
+
+char wterr[] = "Can't write.";
+
+static void Error(char *message)
+{
+ printf("\n%s\n", message);
+ exit(EXIT_FAILURE);
+}
+
+/********** LZSS compression **********/
+
+#define N 4096 /* buffer size */
+#define F 60 /* lookahead buffer size */
+#define THRESHOLD 2
+#define NIL N /* leaf of tree */
+
+unsigned char
+ text_buf[N + F - 1];
+static int match_position, match_length,
+ lson[N + 1], rson[N + 257], dad[N + 1];
+
+static void InitTree(void) /* initialize trees */
+{
+ int i;
+
+ for (i = N + 1; i <= N + 256; i++)
+ rson[i] = NIL; /* root */
+ for (i = 0; i < N; i++)
+ dad[i] = NIL; /* node */
+}
+
+static void InsertNode(int r) /* insert to tree */
+{
+ int i, p, cmp;
+ unsigned char *key;
+ unsigned c;
+
+ cmp = 1;
+ key = &text_buf[r];
+ p = N + 1 + key[0];
+ rson[r] = lson[r] = NIL;
+ match_length = 0;
+ for ( ; ; ) {
+ if (cmp >= 0) {
+ if (rson[p] != NIL)
+ p = rson[p];
+ else {
+ rson[p] = r;
+ dad[r] = p;
+ return;
+ }
+ } else {
+ if (lson[p] != NIL)
+ p = lson[p];
+ else {
+ lson[p] = r;
+ dad[r] = p;
+ return;
+ }
+ }
+ for (i = 1; i < F; i++)
+ if ((cmp = key[i] - text_buf[p + i]) != 0)
+ break;
+ if (i > THRESHOLD) {
+ if (i > match_length) {
+ match_position = ((r - p) & (N - 1)) - 1;
+ if ((match_length = i) >= F)
+ break;
+ }
+ if (i == match_length) {
+ if ((c = ((r - p) & (N-1)) - 1) < (unsigned)match_position) {
+ match_position = c;
+ }
+ }
+ }
+ }
+ dad[r] = dad[p];
+ lson[r] = lson[p];
+ rson[r] = rson[p];
+ dad[lson[p]] = r;
+ dad[rson[p]] = r;
+ if (rson[dad[p]] == p)
+ rson[dad[p]] = r;
+ else
+ lson[dad[p]] = r;
+ dad[p] = NIL; /* remove p */
+}
+
+static void DeleteNode(int p) /* remove from tree */
+{
+ int q;
+
+ if (dad[p] == NIL)
+ return; /* not registered */
+ if (rson[p] == NIL)
+ q = lson[p];
+ else
+ if (lson[p] == NIL)
+ q = rson[p];
+ else {
+ q = lson[p];
+ if (rson[q] != NIL) {
+ do {
+ q = rson[q];
+ } while (rson[q] != NIL);
+ rson[dad[q]] = lson[q];
+ dad[lson[q]] = dad[q];
+ lson[q] = lson[p];
+ dad[lson[p]] = q;
+ }
+ rson[q] = rson[p];
+ dad[rson[p]] = q;
+ }
+ dad[q] = dad[p];
+ if (rson[dad[p]] == p)
+ rson[dad[p]] = q;
+ else
+ lson[dad[p]] = q;
+ dad[p] = NIL;
+}
+
+/* Huffman coding */
+
+#define N_CHAR (256 - THRESHOLD + F)
+ /* kinds of characters (character code = 0..N_CHAR-1) */
+#define T (N_CHAR * 2 - 1) /* size of table */
+#define R (T - 1) /* position of root */
+#define MAX_FREQ 0x8000 /* updates tree when the */
+typedef unsigned char uchar;
+
+
+/* table for encoding and decoding the upper 6 bits of position */
+
+/* for encoding */
+uchar p_len[64] = {
+ 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08
+};
+
+uchar p_code[64] = {
+ 0x00, 0x20, 0x30, 0x40, 0x50, 0x58, 0x60, 0x68,
+ 0x70, 0x78, 0x80, 0x88, 0x90, 0x94, 0x98, 0x9C,
+ 0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC,
+ 0xC0, 0xC2, 0xC4, 0xC6, 0xC8, 0xCA, 0xCC, 0xCE,
+ 0xD0, 0xD2, 0xD4, 0xD6, 0xD8, 0xDA, 0xDC, 0xDE,
+ 0xE0, 0xE2, 0xE4, 0xE6, 0xE8, 0xEA, 0xEC, 0xEE,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
+};
+
+/* for decoding */
+uchar d_code[256] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+ 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+ 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B,
+ 0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D,
+ 0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F,
+ 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11,
+ 0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13,
+ 0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15,
+ 0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17,
+ 0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B,
+ 0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F,
+ 0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23,
+ 0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27,
+ 0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B,
+ 0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
+};
+
+uchar d_len[256] = {
+ 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+ 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+ 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+ 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+ 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
+ 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
+ 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
+ 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
+ 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
+ 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+};
+
+unsigned freq[T + 1]; /* frequency table */
+
+int prnt[T + N_CHAR]; /* pointers to parent nodes, except for the */
+ /* elements [T..T + N_CHAR - 1] which are used to get */
+ /* the positions of leaves corresponding to the codes. */
+
+int son[T]; /* pointers to child nodes (son[], son[] + 1) */
+
+unsigned getbuf = 0;
+uchar getlen = 0;
+
+static int GetBit(void) /* get one bit */
+{
+ unsigned i;
+
+ while (getlen <= 8) {
+ if ((int)(i = getc(infile)) < 0) i = 0;
+ getbuf |= i << (8 - getlen);
+ getlen += 8;
+ }
+ i = getbuf;
+ getbuf <<= 1;
+ getlen--;
+ return (int)((i & 0x8000) >> 15);
+}
+
+static int GetByte(void) /* get one byte */
+{
+ unsigned i;
+
+ while (getlen <= 8) {
+ if ((int)(i = getc(infile)) < 0) i = 0;
+ getbuf |= i << (8 - getlen);
+ getlen += 8;
+ }
+ i = getbuf;
+ getbuf <<= 8;
+ getlen -= 8;
+ return (int)((i & 0xff00) >> 8);
+}
+
+unsigned putbuf = 0;
+uchar putlen = 0;
+
+static void Putcode(int l, unsigned c) /* output c bits of code */
+{
+ putbuf |= c >> putlen;
+ if ((putlen += l) >= 8) {
+ if (putc(putbuf >> 8, outfile) == EOF) {
+ Error(wterr);
+ }
+ if ((putlen -= 8) >= 8) {
+ if (putc(putbuf, outfile) == EOF) {
+ Error(wterr);
+ }
+ codesize += 2;
+ putlen -= 8;
+ putbuf = c << (l - putlen);
+ } else {
+ putbuf <<= 8;
+ codesize++;
+ }
+ }
+}
+
+
+/* initialization of tree */
+
+static void StartHuff(void)
+{
+ int i, j;
+
+ for (i = 0; i < N_CHAR; i++) {
+ freq[i] = 1;
+ son[i] = i + T;
+ prnt[i + T] = i;
+ }
+ i = 0; j = N_CHAR;
+ while (j <= R) {
+ freq[j] = freq[i] + freq[i + 1];
+ son[j] = i;
+ prnt[i] = prnt[i + 1] = j;
+ i += 2; j++;
+ }
+ freq[T] = 0xffff;
+ prnt[R] = 0;
+}
+
+
+/* reconstruction of tree */
+
+static void reconst(void)
+{
+ int i, j, k;
+ unsigned f, l;
+
+ /* collect leaf nodes in the first half of the table */
+ /* and replace the freq by (freq + 1) / 2. */
+ j = 0;
+ for (i = 0; i < T; i++) {
+ if (son[i] >= T) {
+ freq[j] = (freq[i] + 1) / 2;
+ son[j] = son[i];
+ j++;
+ }
+ }
+ /* begin constructing tree by connecting sons */
+ for (i = 0, j = N_CHAR; j < T; i += 2, j++) {
+ k = i + 1;
+ f = freq[j] = freq[i] + freq[k];
+ for (k = j - 1; f < freq[k]; k--);
+ k++;
+ l = (j - k) * 2;
+ memmove(&freq[k + 1], &freq[k], l);
+ freq[k] = f;
+ memmove(&son[k + 1], &son[k], l);
+ son[k] = i;
+ }
+ /* connect prnt */
+ for (i = 0; i < T; i++) {
+ if ((k = son[i]) >= T) {
+ prnt[k] = i;
+ } else {
+ prnt[k] = prnt[k + 1] = i;
+ }
+ }
+}
+
+
+/* increment frequency of given code by one, and update tree */
+
+static void update(int c)
+{
+ int i, j, k, l;
+
+ if (freq[R] == MAX_FREQ) {
+ reconst();
+ }
+ c = prnt[c + T];
+ do {
+ k = ++freq[c];
+
+ /* if the order is disturbed, exchange nodes */
+ if ((unsigned)k > freq[l = c + 1]) {
+ while ((unsigned)k > freq[++l]);
+ l--;
+ freq[c] = freq[l];
+ freq[l] = k;
+
+ i = son[c];
+ prnt[i] = l;
+ if (i < T) prnt[i + 1] = l;
+
+ j = son[l];
+ son[l] = i;
+
+ prnt[j] = c;
+ if (j < T) prnt[j + 1] = c;
+ son[c] = j;
+
+ c = l;
+ }
+ } while ((c = prnt[c]) != 0); /* repeat up to root */
+}
+
+unsigned code, len;
+
+static void EncodeChar(unsigned c)
+{
+ unsigned i;
+ int j, k;
+
+ i = 0;
+ j = 0;
+ k = prnt[c + T];
+
+ /* travel from leaf to root */
+ do {
+ i >>= 1;
+
+ /* if node's address is odd-numbered, choose bigger brother node */
+ if (k & 1) i += 0x8000;
+
+ j++;
+ } while ((k = prnt[k]) != R);
+ Putcode(j, i);
+ code = i;
+ len = j;
+ update(c);
+}
+
+static void EncodePosition(unsigned c)
+{
+ unsigned i;
+
+ /* output upper 6 bits by table lookup */
+ i = c >> 6;
+ Putcode(p_len[i], (unsigned)p_code[i] << 8);
+
+ /* output lower 6 bits verbatim */
+ Putcode(6, (c & 0x3f) << 10);
+}
+
+static void EncodeEnd(void)
+{
+ if (putlen) {
+ if (putc(putbuf >> 8, outfile) == EOF) {
+ Error(wterr);
+ }
+ codesize++;
+ }
+}
+
+static int DecodeChar(void)
+{
+ unsigned c;
+
+ c = son[R];
+
+ /* travel from root to leaf, */
+ /* choosing the smaller child node (son[]) if the read bit is 0, */
+ /* the bigger (son[]+1} if 1 */
+ while (c < T) {
+ c += GetBit();
+ c = son[c];
+ }
+ c -= T;
+ update(c);
+ return (int)c;
+}
+
+static int DecodePosition(void)
+{
+ unsigned i, j, c;
+
+ /* recover upper 6 bits from table */
+ i = GetByte();
+ c = (unsigned)d_code[i] << 6;
+ j = d_len[i];
+
+ /* read lower 6 bits verbatim */
+ j -= 2;
+ while (j--) {
+ i = (i << 1) + GetBit();
+ }
+ return (int)(c | (i & 0x3f));
+}
+
+/* compression */
+
+static void Encode(void) /* compression */
+{
+ int i, c, len, r, s, last_match_length;
+
+ fseek(infile, 0L, 2);
+ textsize = ftell(infile);
+ fputc((int)((textsize & 0xff)),outfile);
+ fputc((int)((textsize & 0xff00) >> 8),outfile);
+ fputc((int)((textsize & 0xff0000L) >> 16),outfile);
+ fputc((int)((textsize & 0xff000000L) >> 24),outfile);
+ if (ferror(outfile))
+ Error(wterr); /* output size of text */
+ if (textsize == 0)
+ return;
+ rewind(infile);
+ textsize = 0; /* rewind and re-read */
+ StartHuff();
+ InitTree();
+ s = 0;
+ r = N - F;
+ for (i = s; i < r; i++)
+ text_buf[i] = 0x20;
+ for (len = 0; len < F && (c = getc(infile)) != EOF; len++)
+ text_buf[r + len] = (unsigned char)c;
+ textsize = len;
+ for (i = 1; i <= F; i++)
+ InsertNode(r - i);
+ InsertNode(r);
+ do {
+ if (match_length > len)
+ match_length = len;
+ if (match_length <= THRESHOLD) {
+ match_length = 1;
+ EncodeChar(text_buf[r]);
+ } else {
+ EncodeChar(255 - THRESHOLD + match_length);
+ EncodePosition(match_position);
+ }
+ last_match_length = match_length;
+ for (i = 0; i < last_match_length &&
+ (c = getc(infile)) != EOF; i++) {
+ DeleteNode(s);
+ text_buf[s] = (unsigned char)c;
+ if (s < F - 1)
+ text_buf[s + N] = (unsigned char)c;
+ s = (s + 1) & (N - 1);
+ r = (r + 1) & (N - 1);
+ InsertNode(r);
+ }
+ if ((textsize += i) > printcount) {
+ printf("%12ld\r", textsize);
+ printcount += 1024;
+ }
+ while (i++ < last_match_length) {
+ DeleteNode(s);
+ s = (s + 1) & (N - 1);
+ r = (r + 1) & (N - 1);
+ if (--len) InsertNode(r);
+ }
+ } while (len > 0);
+ EncodeEnd();
+ printf("In : %ld bytes\n", textsize);
+ printf("Out: %ld bytes\n", codesize);
+ printf("Out/In: %.3f\n", 1.0 * codesize / textsize);
+}
+
+static void Decode(void) /* recover */
+{
+ int i, j, k, r, c;
+ unsigned long int count;
+
+ textsize = (fgetc(infile));
+ textsize |= (fgetc(infile) << 8);
+ textsize |= (fgetc(infile) << 16);
+ textsize |= (fgetc(infile) << 24);
+ if (ferror(infile))
+ Error("Can't read"); /* read size of text */
+ if (textsize == 0)
+ return;
+ StartHuff();
+ for (i = 0; i < N - F; i++)
+ text_buf[i] = 0x20;
+ r = N - F;
+ for (count = 0; count < textsize; ) {
+ c = DecodeChar();
+ if (c < 256) {
+ if (putc(c, outfile) == EOF) {
+ Error(wterr);
+ }
+ text_buf[r++] = (unsigned char)c;
+ r &= (N - 1);
+ count++;
+ } else {
+ i = (r - DecodePosition() - 1) & (N - 1);
+ j = c - 255 + THRESHOLD;
+ for (k = 0; k < j; k++) {
+ c = text_buf[(i + k) & (N - 1)];
+ if (putc(c, outfile) == EOF) {
+ Error(wterr);
+ }
+ text_buf[r++] = (unsigned char)c;
+ r &= (N - 1);
+ count++;
+ }
+ }
+ if (count > printcount) {
+ printf("%12ld\r", count);
+ printcount += 1024;
+ }
+ }
+ printf("%12ld\n", count);
+}
+
+int main(int argc, char *argv[])
+{
+ char *s;
+
+ if (argc != 4) {
+ printf("'lzhuf e file1 file2' encodes file1 into file2.\n"
+ "'lzhuf d file2 file1' decodes file2 into file1.\n");
+ return EXIT_FAILURE;
+ }
+ if ((s = argv[1], s[1] || strpbrk(s, "DEde") == NULL)
+ || (s = argv[2], (infile = fopen(s, "rb")) == NULL)
+ || (s = argv[3], (outfile = fopen(s, "wb")) == NULL)) {
+ printf("??? %s\n", s);
+ return EXIT_FAILURE;
+ }
+ if (toupper(*argv[1]) == 'E')
+ Encode();
+ else
+ Decode();
+ fclose(infile);
+ fclose(outfile);
+ return EXIT_SUCCESS;
+}
--- /dev/null
+
+TARGETS=ppmc unppmc
+COMMON= ppmc.o ppmcdata.o range.o
+
+CFLAGS=-O3 -Wall -DNDEBUG
+
+all: $(TARGETS)
+
+ppmc: $(COMMON) ppmcmain.c
+
+unppmc: $(COMMON) unppmc.c
+
+clean:
+
+ @$(RM) -f *.o $(TARGETS)
+
+.PHONY: all clean
+
--- /dev/null
+
+ PPMC
+
+
+TABLE OF CONTENTS
+-Description
+-Compiling
+-Files included in this release
+-Timing
+-Author
+-Disclaimer
+
+
+DESCRIPTION
+This is the source code of an implementation of ppmc.
+The data structures used are hash tables instead of a context trie.
+
+A file is compressed and decompressed like that:
+ppmc inputfile compressedfile
+unppmc compressedfile outputfile
+
+I don't recommend to use this for compressing vital information, because it
+hasn't been fully tested, and moreover, the machine where the decompressor is
+being run, must have at least as much memory as the encoder had. I recommend
+to use this compressor only for researching pourposes.
+
+For further information read ac_ppmc.html also included in the package, for
+the latest version visit http://www.ross.net/arturocampos
+
+
+COMPILING
+The source code is in the C programming language, and was successfully
+compiled with Djgpp version 2.02, a project was made which included the
+following files for the encoder (call this project ppmc):
+ppmc.c
+ppmcdata.c
+ppmcmain.c
+range.c
+
+And for the decoder (call this project unppmc)
+ppmc.c
+ppmcdata.c
+range.c
+unppmc.c
+
+Then you just have to hit F9 and wait. I tried to do makefiles, however there
+was something wrong and it didn't worked. If someone has any idea about it, or
+how to compile this source for other compilers, please let me know it, so in
+the next release I can include makefiles.
+
+
+FILES INCLUDED IN THIS RELEASE
+In the zip file ac_ppmc_html.zip you should find:
+
+readme.txt -> The file you're reading now.
+ac_ppmc.html -> article which explains ppmc and the data structures used
+ppmc.c -> This is the main file which includes all the routines used
+ by both the encoder and decoder's model.
+range.c -> The encoder and decoder's routines.
+ppmcmain.c -> The main routine for the compressor
+unppmc.c -> The main routine for the decompressor
+ppmcdata.c -> Global data structures. (used mainly by ppmc.c)
+ppmc.h -> Declarations of routines
+ppmcdata.h -> Declarations of global data and structures
+range.h -> Declarations of the routines for the range encoder and decoder
+ppmc.exe -> A compiled version of the compressor
+unppmc.exe -> A compiled version of the decompressor
+
+All this files are the implementation of ppmc order-4 using lazy exclusions.
+In the /exclusion directory you can find the same files (unless readme) but
+for ppmc order-4 using full exclusion.
+I thought there was no need to include the files for ppmc-o3h, because they
+are nothing else than ppmc with lazy exclusions, using only order-2 (and
+lowers) but instead of using the hash key for order-2 it uses the one for
+order-3. (you also have to take care about o3_byte, of course)
+
+The executables were compiled with Djgpp 2.95 using only the switch -O6.
+
+
+TIMING
+The standard function to get the
+time "time()", has a maximum precision of seconds. This is not enough for
+testing the speed of a compressor. Due to this timing was not included in
+this release.
+If you are interested on compiling it with Djgpp, my original version used the
+following code:
+
+ struct time ppmc_time, ppmc_time2;
+ double _time2, _time;
+
+ // Get current time
+ gettime(&ppmc_time);
+
+ //Compress file
+
+
+ // Print bpb and kbyps
+ gettime(&ppmc_time2);
+ printf("%s at %f bpb in ",argv[1],((float)filesize(file_output)/(float)size_file_input)*(float)8);
+ _time=((ppmc_time.ti_min)*60)+(ppmc_time.ti_sec)+(((double)ppmc_time.ti_hund)/100);
+ _time2=((ppmc_time2.ti_min)*60)+(ppmc_time2.ti_sec)+(((double)ppmc_time2.ti_hund)/100);
+ if((_time2-_time)!=0)
+ printf("%f kbytes/seconds.", ((float)size_file_input/(float)1024)/(_time2-_time));
+
+
+AUTHOR
+This code was made by Arturo San Emeterio Campos, you can find his home page
+at: http://www.ross.net/arturocampos
+And his email is: arturo-campos@mixmail.com
+
+
+DISCLAIMER
+Copyright (c) Arturo San Emeterio Campos 1999. All rights reserved. Permission
+is granted to make verbatim copies of this files for private use only. There
+is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+
+ Arturo San Emeterio Campos, Barcelona 04-Jan-2000
--- /dev/null
+Order-3 and order-4 use almost the same code. With lazy exclusions it was
+exactly the same. But for full exclusions is not.
+
+The problem is that when updating we can't be sure that the stored pointer
+points where it should to (as it did with lazy exclusions). So we have to
+read the whole linked list. This is done for order-2 and order-3. However
+order-4 is the highest order used, so it doesn't use exclusion, therefore
+it uses the same code as with lazy exclusions.
+
+So if you want to use higher orders remember that the code to use should
+be the one for order-3 because it updates correctly when exclusions are
+being used. and use the order-4 code for the highest order.
+
+ Arturo Campos (arturo-campos@mixmail.com)
+ http://www.arturocampos.com
\ No newline at end of file
--- /dev/null
+
+TARGETS=ppmc unppmc
+COMMON= ppmc.o ppmcdata.o range.o
+
+CFLAGS=-O3 -Wall -DNDEBUG
+
+all: $(TARGETS)
+
+ppmc: $(COMMON) ppmcmain.c
+
+unppmc: $(COMMON) unppmc.c
+
+clean:
+
+ @$(RM) -f *.o $(TARGETS)
+
+.PHONY: all clean
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "ppmc.c" (exclusion)
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+
+ Part of the ppmc encoder and decoder.
+
+ This module contains the whole ppmc encoder. It uses hash tables for
+ managing most of the orders. And a maximum order of 4. It codes bytes.
+ Order-1-0-(-1) are all handled in tables. Order-2 has a table with
+ direct hashing with pointers to the linked lists. Order-4 and order-3
+ both have hash tables with pointers to contexts in a linked lists which
+ finally have a pointer to the start of the linked list with the
+ probability distribution. Update exclusion is used, but exclusion is not.
+
+ Please, note that if the machine where the decoder is run doesn't has as
+ much memory as the computer where the encoder was ran, the decoder will
+ not be able to properly decode the file, because it will not be able to
+ keep track of new statistics, in this case it will just exit.
+
+ For applications where the loss of data is not admisible, I suggest you to
+ limit both encoder and decoder's memory requeriments to a given minimum.
+
+ Using exclusion. It's up to the main encoding routine to clear this table
+ for every new byte.
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "range.h"
+#include "ppmcdata.h"
+
+
+
+// Ruotines used by ppmc. Not including the range coder.
+//
+// They are for initializing of both encoder and decoder, and unless there
+// are two version, both encoder and decoder use the same routines. Like
+// "ppmc_initialize_contexts".
+
+
+// This one allocs the memory needed by ppmc, and adjust some pointers used
+// for allocating elements in the linked lists. The mempool arrays must be
+// initialized now.
+void ppmc_alloc_memory(void)
+{
+ unsigned long counter;
+
+
+ // Init mempool buffers
+
+ for(counter=0;counter!=_mempool_max_index;++counter)
+ {
+ _bytes_pool_array[counter]=0;
+ _context_pool_array[counter]=0;
+ }
+
+ _bytes_pool_index=1; //first entry will be used now
+ _context_pool_index=1;
+
+
+ // Allocate memory for ppmc structures and adjust some variables
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+
+ //save pointers in the array for freeing
+ _bytes_pool_array[0]=_bytes_pool;
+ _context_pool_array[0]=_context_pool;
+
+
+ //adjust variables
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements;
+ _context_pool_max=_context_pool+_context_pool_elements;
+
+ ppmc_out_of_memory=0; //we still have memory
+}
+
+
+// This routine initializes all the contexts, and all the tables including
+// those who care about the number of bytes defined in a context.
+void ppmc_initialize_contexts(void)
+{
+ unsigned long counter, counter2;
+
+
+ // Order-0
+ for(counter=0;counter!=256;++counter) //clear table
+ order0_array[counter]=0;
+
+ order0_defined_bytes=0; //adjust variables
+ order0_max_cump=0;
+
+
+ // Order-1
+ for(counter=0;counter!=256;++counter) //erase every table of every context
+ for(counter2=0;counter2!=256;++counter2)
+ order1_array[counter][counter2]=0;
+
+ for(counter=0;counter!=256;++counter) //adjust variables
+ {
+ order1_defined_bytes_array[counter]=0;
+ order1_max_cump_array[counter]=0;
+ }
+
+
+ // Order-2
+ for(counter=0;counter!=65536;++counter)
+ {
+ //order2_array[counter].prob=0; //clear pointer to bytes and frequencies
+ //order2_array[counter].max_cump=0;
+ order2_array[counter].defined_bytes=0;
+ }
+
+
+ // Order-4-3
+ for(counter=0;counter!=65536;++counter) //order-4-3
+ {
+ order4_hasht[counter]=0;
+ order3_hasht[counter]=0;
+ }
+}
+
+
+// This routine initializes the encode model by outputting as many bytes as
+// needed to prepare the models. This should be called before the main loop
+// and after the memory has been allocated and tables initialized.
+//
+// It does not need uses the range coder. It output the first 1 bytes.
+void ppmc_encoder_initialize(void)
+{
+
+ // Initialize order-0 and prepare different bytes for orders
+ fputc((byte=fgetc(file_input)),file_output);
+ o4_byte=byte; //order-4
+
+ fputc((byte=fgetc(file_input)),file_output);
+ o3_byte=byte; //order-3
+
+ fputc((byte=fgetc(file_input)),file_output);
+ o2_byte=byte; //order-2
+ ppmc_update_order0();
+
+ fputc((byte=fgetc(file_input)),file_output);
+ o1_byte=byte;
+
+}
+
+
+// This routine initializes the decoder model, should be called to do the same
+// changes as "ppmc_encoder_initialize()" did.
+void ppmc_decoder_initialize(void)
+{
+
+ // Initialize order-0 and context bytes
+ byte=fgetc(file_input);
+ o4_byte=byte; //order-4
+ fputc(byte,file_output);
+
+ byte=fgetc(file_input);
+ o3_byte=byte; //order-3
+ fputc(byte,file_output);
+
+ byte=fgetc(file_input);
+ o2_byte=byte; //order-2
+
+ fputc(byte,file_output); //output first byte
+ ppmc_update_order0();
+
+ byte=fgetc(file_input);
+ o1_byte=byte; //order-1
+ fputc(byte,file_output);
+}
+
+
+// Once coding or decoding is finished you have to call this routine.
+// It must be called when done.
+void ppmc_free_memory(void)
+{
+ unsigned long counter;
+
+ // Free the memory buffers
+
+ for(counter=0;counter!=_mempool_max_index;++counter)
+ {
+ if(_bytes_pool_array[counter]!=0)
+ free(_bytes_pool_array[counter]);
+
+ if(_context_pool_array[counter]!=0)
+ free(_context_pool_array[counter]);
+ }
+
+}
+
+
+// This routine flushes the memory and restarts all the tables of
+// probabilities, current order bytes are not modified, this function
+// is called when we ran out of memory. We have to output the code
+// number 256 which means memory flushing, for doing this we have to go
+// to order-(-1) so we have to output an escape code in all the orders
+// till we reach order-(-1) where we can code it. Then we free all the
+// memory, alloc it again, and reinitialize all the orders.
+//
+// However we may find the case when the current order is not initialized,
+// in this case we don't need to output an escape code.
+void ppmc_flush_mem_enc(void)
+{
+ unsigned long counter;
+
+
+ // Clear exclusion table
+ for(counter=0;counter!=256;++counter)
+ excluded[counter]=0;
+
+
+ // Code an escape code in order-4
+ if(ppmc_get_totf_order4()!=0) //if 0 no need of escape code
+ {
+
+ ppmc_get_escape_prob_order4(); //get prob and cump
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ }
+
+
+ // Code an escape code in order-3
+ if(ppmc_get_totf_order3()!=0) //if 0 no need of escape code
+ {
+
+ ppmc_get_escape_prob_order3(); //get prob and cump
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ }
+
+
+ // Code an escape code in order-2
+
+ o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte);
+
+ // First check if current order-2 context is empty
+ if(order2_array[o2_cntxt].defined_bytes!=0) //it's not empty
+ {
+ ppmc_get_totf_order2();
+ ppmc_get_escape_prob_order2();
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+ }
+
+
+ // Code an escape code in order-1
+
+ // First check if current order-1 table is empty
+ if(order1_defined_bytes_array[o1_byte]!=0) //it's not empty
+ {
+ ppmc_get_totf_order1();
+ ppmc_get_escape_prob_order1();
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+ }
+
+
+ // Code an escape code in order-0. Order-0 always has at least one symbol
+
+ ppmc_get_totf_order0();
+ ppmc_get_escape_prob_order0();
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+
+
+ // Now we can code the code 256
+
+ symb_prob=1;
+ symb_cump=256;
+ total_cump=257;
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+
+ // Now that decoder knows the flushing, free memory and reinit
+
+ ppmc_free_memory();
+ ppmc_alloc_memory();
+ ppmc_initialize_contexts();
+
+
+ // Be sure that order-0 has at least one probability
+
+ order0_array[o1_byte]++;
+ order0_max_cump++;
+ order0_defined_bytes++;
+
+}
+
+
+// When the decoder gets the symbol of flushing, most of the job is done
+// because we already got all the escape codes, so we only have to reinit.
+void ppmc_flush_mem_dec(void)
+{
+
+ // Free memory and reinit
+
+ ppmc_free_memory();
+ ppmc_alloc_memory();
+ ppmc_initialize_contexts();
+
+
+ // Be sure that order-0 has at least one probability
+
+ order0_array[o1_byte]++;
+ order0_max_cump++;
+ order0_defined_bytes++;
+
+
+}
+
+
+
+// ORDER-(-1) functions, also called ordern1 (Negative1) in functions
+//
+// Because order-(-1) does not need to update its probability tables, it
+// has no tables, and relies on the fact that the cump of byte is its own
+// value, and the probability is fixed, 1, and the total cump is 257.
+//
+// The alphabet has the following distribution: 0-255 the bytes. 256 is
+// an special symbol which means that we have flushed the encoder tables,
+// and thus the encoder must flush its tables too.
+//
+// The rest of the tables only have 256 symbols, because we have no need
+// of assign a symbol to the flush code (which already is the order-(-1)
+// table) nor to the escape code.
+//
+// For order-(-1) we don't use exclusion.
+
+
+// Gets the probability for a given symbol in the order-(-1) (ordern1)
+void ppmc_get_prob_ordern1(void)
+{
+ symb_cump=byte; //its value
+ symb_prob=1; //flat probability
+ total_cump=257; //total cump
+}
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-(-1)
+void ppmc_get_totf_ordern1(void)
+{
+ total_cump=257; //this is fixed
+}
+
+
+// Returns the symbol for a given cump under order-(-1)
+unsigned long ppmc_get_symbol_ordern1 (void)
+{
+ return symb_cump;
+}
+
+
+
+// ORDER-0 functions
+//
+// Due to the fact that order-0 has no context, I use an array for all the
+// probabilities under order-0, just as you could do in a basic model for
+// arithmetic coding.
+//
+// The main array is: order0_array. Where order0_array[byte] contains the
+// probability for a given byte. The same applies to order-1.
+//
+// To ensure that the updating and coding is done correctly, "byte" can't
+// be changed till all the coding and updating is done.
+//
+// Order-0 uses exclusions. Exclusion values are always prepared in "get_totf"
+// so there's no need to get them again. However order-0 doesn't have to
+// update exclude table, because order-(-1) will not use it
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-0. We have to read the whole array because we can't
+// guarante that all the bytes are used.
+void ppmc_get_totf_order0(void)
+{
+ unsigned long temp_cump, //temp value for the cump
+ counter;
+
+ exc_defined_bytes=0;
+ exc_max_cump=0;
+
+ // Read the number of defined bytes by reading the count of every byte
+ // and if it's present in the exclusion table.
+ for(counter=0;counter!=256;++counter)
+ {
+ if(excluded[counter]==0) //only if it's not excluded
+ if(order0_array[counter]!=0) //if it has a nonzero count, then it's present
+ {
+ ++exc_defined_bytes;
+ exc_max_cump+=order0_array[counter];
+ }
+ }
+
+ // Total cump is current total cump plus the probability for the escape code
+ exc_total_cump=exc_max_cump+exc_defined_bytes;
+}
+
+
+// Codes a byte under order-0 and returns 1, otherwise it returns a 0 and
+// has coded an escape code. In this case further coding is needed.
+//
+// Returns: 1 in case a byte was coded. 0 in case of escape code.
+char ppmc_code_byte_order0(void)
+{
+ unsigned long counter;
+
+ ppmc_get_totf_order0(); //get total cump
+
+ // It's possible that due to excluding, there's no byte left, in that case
+ // return.
+ if(exc_defined_bytes==0)
+ return 0;
+
+ // See if the byte is present
+ if(order0_array[byte]==0) //a probability of 0
+ {
+
+ // Because it was not present, output an escape code, prepare variables
+
+ symb_cump=exc_max_cump; //obviously its cump is current max_cump
+ //without escape code's space
+
+ symb_prob=exc_defined_bytes; //the number of defined bytes
+
+ total_cump=exc_total_cump;
+
+ // Code the escape code
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 0; //byte not coded
+ }
+ else
+ {
+
+ coded_in_order=0;
+
+ // The symbol is present, code it under order-0
+
+ symb_prob=order0_array[byte]; //get probability directly
+
+ // Make cump for current symbol
+
+ symb_cump=0; //for first symbol is 0
+ for(counter=0; counter!=byte ; ++counter)
+ {
+ if(excluded[counter]==0)
+ symb_cump+=order0_array[counter]; //sum probabilities before our symbol
+ }
+
+ total_cump=exc_total_cump;
+
+ // Code the symbol
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 1; //symbol coded under order-0
+ }
+}
+
+
+// This functions update order-0 probabilities with current byte, also takes
+// care about updating variables, and renormalizing.
+void ppmc_update_order0(void)
+{
+ if(order0_array[byte]==0)
+ {
+ // It had a zero probability
+ order0_array[byte]++; //increment symbol probability
+ ++order0_defined_bytes; //new byte defined
+ ++order0_max_cump; //total cump
+ return;
+ }
+ else
+ {
+ // It had a non-zero probability
+
+ // Increment its probability
+ order0_array[byte]++; //increment symbol probability
+ ++order0_max_cump; //total cump
+
+ // Check to see if its the maximum in this case renormalize
+ if(order0_array[byte]==255)
+ ppmc_renormalize_order0();
+
+ return;
+ }
+}
+
+
+// This functions renormalizes the probabilities at order-0 updating variables
+void ppmc_renormalize_order0(void)
+{
+ unsigned long counter;
+
+ // Initialize variables
+ order0_defined_bytes=0; //clear them
+ order0_max_cump=0;
+
+ // Loop over all probabilities, divide them by a factor of 2 and update variables
+ for(counter=0 ; counter!=256 ; ++counter)
+ {
+ if(order0_array[counter]!=0)
+ {
+ order0_array[counter]>>=1; //divide by a factor of 2
+ if(order0_array[counter]==0)
+ order0_array[counter]=1;
+ }
+
+ if(order0_array[counter]!=0) //see if it has a non zero probability
+ order0_defined_bytes++;
+
+ order0_max_cump+=order0_array[counter]; //sum to the total cump
+ }
+}
+
+
+// Decodes the symbol correspoding to the current order, it returns the byte
+// or in case of a escape code it returns -1
+void ppmc_decode_order0(void)
+{
+ unsigned long current_cump, counter;
+
+
+ // Get the total cump needed for decoding symbol
+ ppmc_get_totf_order0(); //total cump needed for decoding
+
+ // It's possible that due to excluding, there's no byte left, in that case
+ // return.
+ if(exc_defined_bytes==0)
+ {
+ byte=-1;
+ return;
+ }
+
+ symb_cump=range_decoder_decode(&rc_decoder,exc_total_cump); //decode it
+
+ // Now check if it's an escape code
+ if(symb_cump>=exc_max_cump) //the defined code space for the escape code
+ {
+
+ // Update coding values
+
+ ppmc_get_escape_prob_order0();
+ range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob);
+
+ // Mark as escape code
+
+ byte=-1;
+
+ return; //an escape code
+ }
+ else
+ {
+ // Now we have to check what symbol it is
+
+ current_cump=0; //cump of the current symbol
+
+ for(counter=0 ; counter!= 256 ; ++counter)
+ {
+ if(symb_cump<current_cump)
+ break; //symbol found, break search loop
+ if(excluded[counter]==0)
+ current_cump+=order0_array[counter]; //update cump
+ }
+
+ counter--; //-1 (see ac_ppmc.html)
+
+ byte=counter; //update the variable with the found symbol
+
+
+ // Get the cump and prob of byte
+
+ symb_prob=order0_array[byte]; //the probabilty
+ symb_cump=current_cump-order0_array[byte]; //using the cump which was
+ //already made
+
+ // Update coding state
+
+ range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob);
+
+ coded_in_order=0; //decoded under order-0
+
+ return;
+ }
+
+}
+
+
+// This function returns the probability for the escape codes in the variables
+void ppmc_get_escape_prob_order0(void)
+{
+ // To understand that remember that the escape allocated for the escape code
+ // is above the current maximum cump and that it has a probability determined
+ // by the scheme C.
+ symb_prob=exc_defined_bytes;
+ symb_cump=exc_max_cump;
+ total_cump=exc_defined_bytes+exc_max_cump;
+}
+
+
+
+// ORDER-1 functions
+//
+// Order-1 uses 256 arrays one for every context, obviously they are accessed
+// like that: order1_array[o1_byte][byte]
+//
+// Also for computing the value of the current escape code in a given context
+// we need to know how many bytes are defined in a given context, they are
+// stored in an array, and you use them like that:
+// order1_defined_bytes_array[o1_byte]
+//
+// The same goes for the maximum cump of a given context:
+// order1_max_cump_array[o1_byte]
+//
+// Read the order-0 explanation.
+//
+// To ensure that the updating and coding is done correctly, "byte" and
+// "o1_byte" can't be changed till all the coding and updating is done.
+//
+// Order-1 uses exclusions. Exclusion values are always prepared in "get_totf"
+// so there's no need to get them again. It does update the exclude table.
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-1. o1_byte should contain the order-1
+void ppmc_get_totf_order1(void)
+{
+ unsigned long temp_cump, //temp value for the cump
+ counter;
+
+ exc_defined_bytes=0;
+ exc_max_cump=0;
+
+ // Read the number of defined bytes by reading the count of every byte
+ // and if it's present in the exclusion table.
+ for(counter=0;counter!=256;++counter)
+ {
+ if(excluded[counter]==0) //only if it's not excluded
+ if(order1_array[o1_byte][counter]!=0) //if it has a nonzero count, then it's present
+ {
+ ++exc_defined_bytes;
+ exc_max_cump+=order1_array[o1_byte][counter];
+ }
+ }
+
+ // Total cump is current total cump plus the probability for the escape code
+ exc_total_cump=exc_max_cump+exc_defined_bytes;
+}
+
+
+// Codes a byte under order-1 and returns 1.
+// Otherwise it returns a 0 it may be that it has coded an escape code, or
+// that current table was empty, in this case nothing is outputted
+// In both cases further coding is needed.
+//
+// Returns: 1 in case a byte was coded. 0 in case of escape code or empty table.
+// In case the byte is coded under this context, coded_in_order=1.
+char ppmc_code_byte_order1(void)
+{
+ unsigned long counter;
+
+
+ // First check if current order-1 table is empty
+ if(order1_defined_bytes_array[o1_byte]==0) //it's empty
+ {
+ return 0; //byte not coded, nothing done
+ }
+
+
+ // Now try to code this byte under order-1
+
+ ppmc_get_totf_order1(); //get total cump
+
+ // It's possible that due to excluding, there's no byte left, in that case
+ // return.
+ if(exc_defined_bytes==0)
+ return 0;
+
+
+ // See if the byte is present
+ if(order1_array[o1_byte][byte]==0) //a probability of 0
+ {
+
+ // Because it was not present, output an escape code, prepare variables
+
+ symb_cump=exc_max_cump;
+
+ symb_prob=exc_defined_bytes;
+
+ // Code the escape code
+ range_coder_encode(&rc_coder,exc_total_cump,symb_cump,symb_prob);
+
+ // Now update "exclude" table
+ for(counter=0;counter!=256;++counter)
+ if(order1_array[o1_byte][counter]!=0)
+ excluded[counter]=1; //occurred but was not code, now exclude
+
+ return 0; //byte not coded, escape coded
+ }
+ else
+ {
+
+ coded_in_order=1; //because we coded it under order-1
+
+ // The symbol is present, code it under order-1
+
+ symb_prob=order1_array[o1_byte][byte]; //get probability directly
+
+ // Make cump for current symbol
+
+ symb_cump=0; //for first symbol is 0
+ for(counter=0; counter!=byte ; ++counter)
+ {
+ if(excluded[counter]==0)
+ symb_cump+=order1_array[o1_byte][counter]; //sum probabilities before our symbol
+ }
+
+ // Code the symbol
+ range_coder_encode(&rc_coder,exc_total_cump,symb_cump,symb_prob);
+
+ return 1; //symbol coded under order-1
+ }
+}
+
+
+// This functions update order-1 probabilities with current byte, also takes
+// care about updating variables, and renormalizing. o1_byte must be filled.
+void ppmc_update_order1(void)
+{
+ if(order1_array[o1_byte][byte]==0)
+ {
+ // It had a zero probability
+ order1_array[o1_byte][byte]++; //increment symbol probability
+ ++order1_defined_bytes_array[o1_byte]; //new byte defined
+ ++order1_max_cump_array[o1_byte]; //total cump
+ return;
+ }
+ else
+ {
+ // It had a non-zero probability
+
+ // Increment its probability
+ order1_array[o1_byte][byte]++; //increment symbol probability
+ ++order1_max_cump_array[o1_byte]; //total cump
+
+ // Check to see if its the maximum in this case renormalize
+ if(order1_array[o1_byte][byte]==255)
+ ppmc_renormalize_order1();
+
+ return;
+ }
+}
+
+
+// This functions renormalizes the probabilities at order-1 updating variables
+void ppmc_renormalize_order1(void)
+{
+ unsigned long counter;
+
+ // Initialize variables
+ order1_defined_bytes_array[o1_byte]=0; //clear them
+ order1_max_cump_array[o1_byte]=0;
+
+ // Loop over all probabilities, divide them by a factor of 2 and update variables
+ for(counter=0 ; counter!=256 ; ++counter)
+ {
+ if(order1_array[o1_byte][counter]!=0)
+ {
+ order1_array[o1_byte][counter]>>=1; //divide by a factor of 2
+ if(order1_array[o1_byte][counter]==0)
+ order1_array[o1_byte][counter]=1; //don't let it have a 0 count
+ }
+
+ if(order1_array[o1_byte][counter]!=0) //see if it has a non zero probability
+ order1_defined_bytes_array[o1_byte]++;
+
+ order1_max_cump_array[o1_byte]+=order1_array[o1_byte][counter]; //sum to the total cump
+ }
+}
+
+
+// Decodes the symbol correspoding to the current order, it returns the byte
+// or in case of an escape code or empty table it returns -1.
+// It updates "coded_in_order".
+void ppmc_decode_order1(void)
+{
+ unsigned long current_cump, counter;
+
+
+ // First check if current order-1 table is empty
+ if(order1_defined_bytes_array[o1_byte]==0) //it's empty
+ {
+ byte=-1; //byte not coded, nothing done
+ return;
+ }
+
+
+ // Get the total cump needed for decoding symbol
+ ppmc_get_totf_order1(); //total cump needed for decoding
+
+ // It's possible that due to excluding, there's no byte left, in that case
+ // return.
+ if(exc_defined_bytes==0)
+ {
+ byte=-1;
+ return;
+ }
+
+ symb_cump=range_decoder_decode(&rc_decoder,exc_total_cump); //decode it
+
+ // Now check if it's an escape code
+ if(symb_cump>=exc_max_cump) //the defined code space for the escape code
+ {
+
+ // Update coding values
+
+ ppmc_get_escape_prob_order1();
+ range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob);
+
+
+ // Now update "exclude" table
+ for(counter=0;counter!=256;++counter)
+ if(order1_array[o1_byte][counter]!=0)
+ excluded[counter]=1; //occurred but was not code, now exclude
+
+ // Mark as escape code (in fact nothing coded)
+
+ byte=-1;
+
+ return; //an escape code
+ }
+ else
+ {
+ // Now we have to check what symbol it is
+
+ current_cump=0; //cump of the current symbol
+
+ for(counter=0 ; counter!= 256 ; ++counter)
+ {
+ if(symb_cump<current_cump)
+ break; //symbol found, break search loop
+ if(excluded[counter]==0)
+ current_cump+=order1_array[o1_byte][counter]; //update cump
+ }
+
+ counter--; //-1 (see ppmc.txt, searching for a given symbol)
+
+ byte=counter; //update the variable with the found symbol
+
+
+ // Get the cump and prob of byte
+
+ symb_prob=order1_array[o1_byte][byte]; //the probabilty
+ symb_cump=current_cump-order1_array[o1_byte][byte]; //using the cump which was
+ //already made
+
+ // Update coding state
+
+ range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob);
+
+
+ // Update coded_in_order used for update exclusion
+
+ coded_in_order=1;
+
+ return;
+ }
+
+}
+
+
+// This function returns the probability for the escape codes in the variables
+void ppmc_get_escape_prob_order1(void)
+{
+ // To understand that remember that the escape allocated for the escape code
+ // is above the current maximum cump and that it has a probability determined
+ // by the scheme C.
+ symb_prob=exc_defined_bytes;
+ symb_cump=exc_max_cump;
+ total_cump=exc_defined_bytes+exc_max_cump;
+}
+
+
+
+// ORDER-2 functions
+//
+// Order-2 uses a table which holds the contexts data structure of every
+// structure, its accessed with "ppmc_order2_hash_key(o1_byte, o2_byte)"
+// and because it uses direct addressing there's no need to check for hash
+// collisions, which makes it faster. The variable "o2_cntxt" contains o1_byte
+// and o2_byte used directly to index the order-2 array.
+//
+// Though order-2 uses the same routines as lower orders they are rather
+// different, because we can't allocate memory for all the probabilities
+// tables, we have to use linked lists for the bytes and probabilities.
+//
+// Under order-2 we don't let a probability to be lower than 1, that is,
+// there can't be a symbol in a linked list with a 0 probability, I choosed
+// this for three factors: the code is easier to read (it isn't messy)
+// we gain some speed, and the loss of compression seems little.
+//
+// To ensure that the updating is done correctly, "o2_cntxt" and
+// "o2_ll_node" must not be modified by any routine except those who manage
+// order-2.
+//
+// Order-2 uses exclusions. Exclusion values are always prepared in "get_totf"
+// so there's no need to get them again. It does update the exclude table.
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-2. cntxt must have o1_byte and o2_byte hashed with the hash key
+// for order-2.
+//
+// Here we have to compute both "exc_defined_bytes" and "exc_max_cump"
+void ppmc_get_totf_order2(void)
+{
+ struct _byte_and_freq *node;
+
+
+ // Read the whole linked list for making the values
+ node=order2_array[o2_cntxt].prob;
+ exc_max_cump=0;
+ exc_defined_bytes=0;
+
+ do{
+ if(excluded[node->byte]==0)
+ {
+ exc_defined_bytes++;
+ exc_max_cump+=node->freq; //add the probability of this byte to the cump
+ }
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+
+ // Total cump is current total cump plus the probability for the escape code
+ exc_total_cump=exc_max_cump+exc_defined_bytes;
+
+}
+
+
+// Codes a byte under order-2 and returns 1.
+// Otherwise it returns a 0. It may be that it has coded an escape code, or
+// that current table was empty.
+//
+// Returns: 1 in case a byte was coded. 0 in case of escape code or empty table.
+// In case the byte is coded under this context, coded_in_order=2.
+char ppmc_code_byte_order2(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+
+ // Initialize o2_cntxt
+
+ o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte);
+
+
+ // First check if current order-2 context is empty
+ if(order2_array[o2_cntxt].defined_bytes==0) //it's empty
+ {
+ return 0; //byte not coded, nothing done
+ }
+
+
+ // Now try to code this byte under order-2
+
+ ppmc_get_totf_order2(); //get total cump
+
+
+ // It's possible that due to exclusion, there's no byte left, in that case
+ // return.
+ if(exc_defined_bytes==0)
+ return 0;
+
+
+ // See if the byte is present and compute its cump at the same time
+
+ node=order2_array[o2_cntxt].prob; //pointer to first element in the linked list
+
+ symb_cump=0; //the first symbol always has a 0 cump
+
+
+ // Now search the byte in the linked list
+
+ do{
+ if(node->byte==byte)
+ goto ppmc_o2_byte_found; //bad thing, I know, anyone has a better idea?
+ if(excluded[node->byte]==0)
+ symb_cump+=node->freq; //add the probability of this byte to the cump
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+
+ // If we reach that point, the byte was not found in the linked list
+ // so we don't need the cump, we have to output an escape code, whose
+ // probabilities are know using the context structure in the table.
+
+ // Byte was not present in the linked list, current node is the last one,
+ // and that's the node needed for creating a new node, save it.
+
+ o2_ll_node=node;
+
+ // Now get the probability and cump of the escape code
+
+ symb_cump=exc_max_cump;
+ symb_prob=exc_defined_bytes;
+
+ // Code the escape code
+ range_coder_encode(&rc_coder,exc_total_cump,symb_cump,symb_prob);
+
+ // Then, update "excluded" table
+
+ node=order2_array[o2_cntxt].prob;
+
+ do{
+ excluded[node->byte]=1;
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+ return 0; //now exit
+
+
+ // That code is executed when the byte is found in the linked list
+
+ ppmc_o2_byte_found:
+
+
+ // Everything has been tested, now we can feel free to code the byte,
+ // the symb_cump is already computed, now get its probability and code
+ // the byte, also save pointer to this element in the linked lists for
+ // updating.
+
+ coded_in_order=2; //successfully coded under order-2
+
+ o2_ll_node=node; //save it for updating
+
+ symb_prob=node->freq; //get the probability of the byte
+
+ // Code it.
+
+ range_coder_encode(&rc_coder,exc_total_cump,symb_cump,symb_prob);
+
+ return 1; //byte coded under order-2
+}
+
+
+// This functions update order-2 probabilities with current byte, also takes
+// care about updating variables, and renormalizing.
+// Of course "o2_ll_node" must point to the element to update or the last one,
+// based on the "coded_in_order" and checking the pointer of the element we
+// can decide what to do.
+//
+// This updating is only for encoding.
+void ppmc_update_order2(void)
+{
+ struct _byte_and_freq *node;
+
+
+ // First of all check if that's the first byte in this context, in that case
+ // we have to initialize some variables in the context structure.
+
+ if(order2_array[o2_cntxt].defined_bytes==0) //no byte defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+ order2_array[o2_cntxt].defined_bytes=1;
+ order2_array[o2_cntxt].max_cump=1;
+ order2_array[o2_cntxt].prob=_bytes_pool;
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //nothing else to do
+ }
+
+
+ // The byte was coded under order two, otherwise it was coded under a
+ // lower order (never higher ones, remember that we are using update
+ // exclusion) in this case we have to create a new node in the list.
+
+ if(coded_in_order==2) //coded under order-2
+ {
+
+ // Update its count and variables of this context and check for renormalization
+
+ o2_ll_node->freq++; //increment its frequency (rather probability)
+
+ order2_array[o2_cntxt].max_cump++; //total cump
+
+ if(o2_ll_node->freq==255) //do we have to renormalize?
+ ppmc_renormalize_order2(); //renormalize
+
+ }
+ else
+ {
+
+ // Once every paranoid check has been done we are sure that this byte
+ // did not existed and so we have to create a new node in the linked
+ // list. Also we have to take care of memory issues.
+ //
+ // However due to the use of exclusion, we have to ensure that "o2_ll_node"
+ // points to the last element in the linked lists of this context
+
+ node=order2_array[o2_cntxt].prob; //get pointer to linked list
+
+ while(1)
+ {
+ if(node->next==0) //check for the end of the linked list
+ break;
+ node=node->next; //next node
+ }
+
+ o2_ll_node=node;
+
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ o2_ll_node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ order2_array[o2_cntxt].max_cump++; //total cump
+ order2_array[o2_cntxt].defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ }
+
+}
+
+
+// This functions renormalizes the probabilities at order-2 updating context
+// variables.
+void ppmc_renormalize_order2(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+ // Initialize variables. Defined bytes remain the same.
+ order2_array[o2_cntxt].max_cump=0; //clear them
+
+ node=order2_array[o2_cntxt].prob; //get pointer to lined lists
+
+ // Divide all the probabilities by 2 and update context variables
+ while(1)
+ {
+ node->freq>>=1; //divide by a factor of 2
+
+ if(node->freq==0) //don't allow a probability to be 0
+ node->freq=1;
+
+ order2_array[o2_cntxt].max_cump+=node->freq; //sum to the total cump
+
+ if(node->next==0) //last element
+ break;
+ node=node->next;
+ }
+
+
+ //printf("\nRenormalization, context:%c%c",o2_byte,o1_byte);
+
+}
+
+
+// Decodes the symbol correspoding to the current order, it returns the byte
+// or in case of an escape code or empty table it returns -1.
+// It updates "coded_in_order".
+//
+// If we decode an escape code, we don't modify "o2_ll_node" and thus its
+// work of the updating routine to read till the end of the linked list
+// (for adding a new element) however if we decode a byte, we save on
+// "o2_ll_node" the pointer to its node. (so updating is faster)
+void ppmc_decode_order2(void)
+{
+ unsigned long current_cump, counter;
+ struct _byte_and_freq *node;
+
+ // Initialize o2_cntxt
+
+ o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte);
+
+
+ // First check if current order-2 context is empty
+ if(order2_array[o2_cntxt].defined_bytes==0) //it's empty
+ {
+ byte=-1; //byte not coded, nothing done
+ return;
+ }
+
+
+ // Get the total cump needed for decoding symbol
+ ppmc_get_totf_order2(); //total cump needed for decoding
+
+
+ // It's possible that due to exclusion, there's no byte left, in that case
+ // return.
+ if(exc_defined_bytes==0)
+ {
+ byte=-1;
+ return; //byte not coded, nothing done
+ }
+
+
+ symb_cump=range_decoder_decode(&rc_decoder,exc_total_cump); //decode it
+
+ // Now check if it's an escape code
+ if(symb_cump>=exc_max_cump) //the defined code space for the escape code
+ {
+
+ // Update coding values
+
+ ppmc_get_escape_prob_order2();
+ range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob);
+
+ // Mark as escape code
+
+ byte=-1;
+
+ // Then, update "excluded" table
+
+ node=order2_array[o2_cntxt].prob;
+
+ do{
+ excluded[node->byte]=1;
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+ return; //an escape code
+ }
+ else
+ {
+ // Now we have to check what symbol it is
+
+ current_cump=0; //cump of the current symbol
+
+ node=order2_array[o2_cntxt].prob; //get pointer to linked lists
+
+ while(1)
+ {
+ if(excluded[node->byte]==0) //only if it's not excluded
+ current_cump+=node->freq; //update cump
+ if(symb_cump<current_cump)
+ break; //symbol found, break search loop
+
+ node=node->next; //next element
+ //we have no need to check for the last symbol, we'll never read further
+ //the end of the linked lists, before we'll found the last byte.
+ }
+
+
+ //read byte value and probability
+
+ symb_prob=node->freq; //get the probability for updating the state
+ byte=node->byte; //get byte
+ o2_ll_node=node; //used for updating
+
+
+ // Get the cump of byte
+
+ symb_cump=current_cump-symb_prob;
+
+ // Update coding state
+
+ range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob);
+
+ // Update coded_in_order used for update exclusion
+
+ coded_in_order=2;
+
+ return;
+ }
+
+}
+
+
+// This is the routine for updating while decoding. We have to search the byte
+// in the linked list, if it's present, update its count, otherwise we have
+// hitted the end of the linked list, and there we have to create a new node.
+//
+// Of course if the byte was matched in order-2 we'll have a pointer to it
+// in "o2_ll_node" so we don't need to read the linked list. (we already did
+// in decoding)
+//
+// Another case which we also have to specially deal with, this is the case
+// when the context has not been initalized yet.
+void ppmc_update_dec_order2(void)
+{
+ struct _byte_and_freq *node;
+
+
+ // Handle the case when the context is not initialized
+ // This code is the same as the one for the encoding.
+
+ if(order2_array[o2_cntxt].defined_bytes==0) //no byte defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+ order2_array[o2_cntxt].defined_bytes=1;
+ order2_array[o2_cntxt].max_cump=1;
+ order2_array[o2_cntxt].prob=_bytes_pool;
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+
+ return; //nothing else to do
+ }
+
+
+ // Current context is initalized, proceed
+
+ if(coded_in_order==2) //check if it was decoded under order-2
+ {
+
+ // We can be sure that the pointer "o2_ll_node" points to its entry, and
+ // it has a non 0 probability (otherwise it couldn't be coded) so just
+ // update its probability and max_cump
+
+ o2_ll_node->freq++; //the probability of the byte
+ order2_array[o2_cntxt].max_cump++; //the max_cump
+
+ if(o2_ll_node->freq==255) //check for renormalization
+ ppmc_renormalize_order2();
+
+ }
+ else
+ {
+
+ // An escape code was decoded under order-2, we have to read till the
+ // end of the linked list so we can add a new node for this new byte.
+
+ node=order2_array[o2_cntxt].prob; //get pointer to linked list
+
+ while(1)
+ {
+ if(node->next==0) //check for the end of the linked list
+ break;
+ node=node->next; //next node
+ }
+
+
+ // We reached the end of the linked list, add a new node if possible,
+ // we are using the same code of "ppmc_update_order2()" with the
+ // difference that the pointer to the linked list is "node"
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ order2_array[o2_cntxt].max_cump++; //total cump
+ order2_array[o2_cntxt].defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //we are finished updating
+
+ }
+
+}
+
+
+// This function returns the probability for the escape codes in the variables
+void ppmc_get_escape_prob_order2(void)
+{
+ // To understand that remember that the escape allocated for the escape code
+ // is above the current maximum cump and that it has a probability determined
+ // by the scheme C.
+
+ symb_prob=exc_defined_bytes;
+ symb_cump=exc_max_cump;
+}
+
+
+
+// ORDER-3 functions
+//
+// The difference between order-3 and order-3 are just a few, instead of
+// keeping a table with the context structures, we keep a hash table with
+// pointers to linked lists with the context, so it's only a matter of
+// searching current context in the linked list corresponding to its hash
+// entry. This is done in "ppmc_get_totf_order3" because that's the first
+// routine that both encoding and decoding routines call.
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-3. Must be called while encoding or decoding before anything else
+// because it initializes the pointers to the context structure in
+// "o3_context" and o3_cntxt.
+//
+// If the hash entry is not initialized it returns "o3_context"=0
+// If the context is not present in the linked list of context, "o3_context"
+// will point to the last element in the linked list.
+// If the context is present "o3_context" will point to the context to use.
+// One can distinguish the last two by checking the context value of the
+// structure, if it's not the same, is the last element.
+//
+// The routine returns 0 if the hash entry is not initialized or if the
+// the context was not present. Otherwise it returns 1, meaning that we
+// have to code under this context.
+char ppmc_get_totf_order3(void)
+{
+ struct context *cntxt_node;
+ struct _byte_and_freq *node;
+
+
+ // First make the hash key for order-3
+
+ o3_cntxt=ppmc_order3_hash_key(o1_byte,o2_byte,o3_byte);
+ full_o3_cntxt=(o1_byte)+(o2_byte<<8)+(o3_byte<<16); //order-3
+
+
+ // Now check the hash entry in the table
+
+ if(order3_hasht[o3_cntxt]==0) //if 0, not initialized
+ {
+
+ o3_context=0; //no hash entry
+
+ return 0; //hash entry not initialized
+ }
+
+
+ // Now read trough the linked list of context searching current one
+
+ cntxt_node=order3_hasht[o3_cntxt];
+
+ while(1)
+ {
+
+ if(cntxt_node->order4321==full_o3_cntxt) //compare context
+ goto ppmc_gtf_cntxt_found;
+
+ if(cntxt_node->next==0) //end of context's linked list
+ break;
+
+ cntxt_node=cntxt_node->next; //next element
+
+ }
+
+
+ // Once there the context was not found
+ o3_context=cntxt_node; //pointer to last element in the linked list
+
+ return 0; //it was not present
+
+
+ // The context is found, so return pointer and cump
+
+ ppmc_gtf_cntxt_found:
+
+ o3_context=cntxt_node;
+
+ // Read the whole linked list for making the values
+ node=o3_context->prob;
+ exc_max_cump=0;
+ exc_defined_bytes=0;
+
+ do{
+ if(excluded[node->byte]==0)
+ {
+ exc_defined_bytes++;
+ exc_max_cump+=node->freq; //add the probability of this byte to the cump
+ }
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+
+ // Total cump is current total cump plus the probability for the escape code
+ exc_total_cump=exc_max_cump+exc_defined_bytes;
+
+
+ return 1; //context found
+
+}
+
+
+// Codes a byte under order-3 and returns 1.
+// Otherwise it returns a 0.
+//
+// In case the byte is coded under this context, coded_in_order=3.
+char ppmc_code_byte_order3(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+
+ // Get current context (if present) and total cump.
+
+ if(ppmc_get_totf_order3()==0)
+ return 0;
+
+
+ // It's possible that due to exclusion, there's no byte left, in that case
+ // return.
+ if(exc_defined_bytes==0)
+ return 0;
+
+
+ // See if the byte is present and compute its cump at the same time
+
+ node=o3_context->prob; //pointer to first element in the linked list
+
+ symb_cump=0; //the first symbol always has a 0 cump
+
+
+ // Now search the byte in the linked list
+
+ do{
+ if(node->byte==byte)
+ goto ppmc_o3_byte_found; //bad thing, I know, anyone has a better idea?
+ if(excluded[node->byte]==0)
+ symb_cump+=node->freq; //add the probability of this byte to the cump
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+
+ // If we reach that point, the byte was not found in the linked list
+ // so we don't need the cump, we have to output an escape code, whose
+ // probabilities are know using the context structure in the table.
+
+ // Byte was not present in the linked list, current node is the last one,
+ // and that's the node needed for creating a new node, save it.
+
+ o3_ll_node=node;
+
+ // Now get the probability and cump of the escape code
+
+ symb_cump=exc_max_cump;
+ symb_prob=exc_defined_bytes;
+
+ // Code the escape code
+ range_coder_encode(&rc_coder,exc_total_cump,symb_cump,symb_prob);
+
+ // Then, update "excluded" table
+
+ node=o3_context->prob;
+
+ do{
+ excluded[node->byte]=1;
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+ return 0;
+
+
+ // That code is executed when the byte is found in the linked list
+
+ ppmc_o3_byte_found:
+
+
+ // Everything has been tested, now we can feel free to code the byte,
+ // the symb_cump is already computed, now get its probability and code
+ // the byte, also save pointer to this element in the linked lists for
+ // updating.
+
+ coded_in_order=3; //successfully coded under order-3
+
+ o3_ll_node=node; //save it for updating
+
+ symb_prob=node->freq; //get the probability of the byte
+
+ // Code it.
+
+ range_coder_encode(&rc_coder,exc_total_cump,symb_cump,symb_prob);
+
+ return 1; //byte coded under order-3
+}
+
+
+// This functions update order-3 probabilities with current byte, also takes
+// care about updating variables, and renormalizing.
+//
+// "o3_ll_node" must point to the element to update or the last one,
+// based on the "coded_in_order" and checking the pointer of the element we
+// can decide what to do. Also "o3_context" must be initialized.
+//
+// This updating is only for encoding.
+void ppmc_update_order3(void)
+{
+ struct _byte_and_freq *node;
+
+
+ // First thing first, check if the hash entry is initialized
+
+ if(order3_hasht[o3_cntxt]==0) //no pointer to linked list defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ order3_hasht[o3_cntxt]=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o3_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //nothing else to do
+ }
+
+
+
+
+ // The byte was coded under order three, otherwise it was coded under a
+ // lower order (never higher ones, remember that we are using update
+ // exclusion) in this case we have to create a new node in the list.
+
+ if(coded_in_order==3) //coded under order-3
+ {
+
+ // Update its count and variables of this context and check for renormalization
+
+ o3_ll_node->freq++; //increment its frequency (rather probability)
+
+ o3_context->max_cump++; //total cump
+
+ if(o3_ll_node->freq==255) //do we have to renormalize?
+ ppmc_renormalize_order3(); //renormalize
+
+ }
+ else
+ {
+
+ // Now we have two cases, under a given context (which we actually found)
+ // we coded an escape coded, in that case just create a new node in the
+ // linked list of bytes and probabilities. Otherwise we didn't find the
+ // same node so we have to create it in the linked list for context.
+ // And we can be sure that it at least has one element and that
+ // "o3_context" points to the last element, so we can put the new element.
+
+ if(o3_context->order4321==full_o3_cntxt) //chech if that's the last
+ { //element or a context found
+
+ // However due to the use of exclusion, we have to ensure that "o3_ll_node"
+ // points to the last element in the linked lists of this context
+
+ node=o3_context->prob; //get pointer to linked list
+
+ while(1)
+ {
+ if(node->next==0) //check for the end of the linked list
+ break;
+ node=node->next; //next node
+ }
+
+ o3_ll_node=node;
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ o3_ll_node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ o3_context->max_cump++; //total cump
+ o3_context->defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+ }
+ else
+ {
+
+ // Ensure that we are at the end of the linked list of contexts
+ o3_context=order3_hasht[o3_cntxt];
+
+ do{
+ if(o3_context->next==0)
+ break;
+ o3_context=o3_context->next;
+ }while(1);
+
+ // We have to create a new context node
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ o3_context->next=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o3_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ }
+
+ }
+
+}
+
+
+// This functions renormalizes the probabilities at order-3 updating context
+// variables.
+void ppmc_renormalize_order3(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+
+ // Initialize variables. Defined bytes remain the same.
+ o3_context->max_cump=0; //clear them
+
+ node=o3_context->prob; //get pointer to lined lists
+
+ // Divide all the probabilities by 2 and update context variables
+ while(1)
+ {
+ node->freq>>=1; //divide by a factor of 2
+
+ if(node->freq==0) //don't allow a probability to be 0
+ node->freq=1;
+
+ o3_context->max_cump+=node->freq; //sum to the total cump
+
+ if(node->next==0) //last element
+ break;
+ node=node->next;
+ }
+
+
+}
+
+
+// Decodes the symbol correspoding to the current order, it returns the byte
+// or in case of an escape code or empty table it returns -1.
+// It updates "coded_in_order".
+//
+// If we decode an escape code, we don't modify "o3_ll_node" and thus its
+// work of the updating routine to read till the end of the linked list
+// (for adding a new element) however if we decode a byte, we save on
+// "o3_ll_node" the pointer to its node. (so updating is faster)
+void ppmc_decode_order3(void)
+{
+ unsigned long current_cump, counter;
+ struct _byte_and_freq *node;
+
+
+ // Get current context (if present) and total cump.
+
+ if(ppmc_get_totf_order3()==0)
+ {
+ byte=-1;
+ return;
+ }
+
+ // It's possible that due to exclusion, there's no byte left, in that case
+ // return.
+ if(exc_defined_bytes==0)
+ {
+ byte=-1;
+ return;
+ }
+
+
+
+ // Decode current cump
+
+ symb_cump=range_decoder_decode(&rc_decoder,exc_total_cump); //decode it
+
+ // Now check if it's an escape code
+ if(symb_cump>=exc_max_cump) //the defined code space for the escape code
+ {
+
+ // Update coding values
+
+ ppmc_get_escape_prob_order3();
+ range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob);
+
+ // Mark as escape code
+
+ byte=-1;
+
+ // Then, update "excluded" table
+
+ node=o3_context->prob;
+
+ do{
+ excluded[node->byte]=1;
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+ return; //an escape code
+ }
+ else
+ {
+ // Now we have to check what symbol it is
+
+ current_cump=0; //cump of the current symbol
+
+ node=o3_context->prob; //get pointer to linked lists
+
+ while(1)
+ {
+ if(excluded[node->byte]==0)
+ current_cump+=node->freq; //update cump
+ if(symb_cump<current_cump)
+ break; //symbol found, break search loop
+
+ node=node->next; //next element
+ //we have no need to check for the last symbol, we'll never read further
+ //the end of the linked lists, before we'll found the last byte.
+ }
+
+
+ //read byte value and probability
+
+ symb_prob=node->freq; //get the probability for updating the state
+ byte=node->byte; //get byte
+ o3_ll_node=node; //used for updating
+
+
+ // Get the cump of byte
+
+ symb_cump=current_cump-symb_prob;
+
+ // Update coding state
+
+ range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob);
+
+ // Update coded_in_order used for update exclusion
+
+ coded_in_order=3;
+
+ return;
+ }
+
+}
+
+
+// This is the routine for updating while decoding. The only difference with
+// the routine for coding is that when an escape code was coded, "o3_ll_node"
+// is not initialized so we have to read till the end of the linked list.
+// Fortunately "o3_context" will be initialized so we don't need to read its
+// linked list.
+void ppmc_update_dec_order3(void)
+{
+ struct _byte_and_freq *node;
+
+ // First thing first, check if the hash entry is initialized
+
+ if(order3_hasht[o3_cntxt]==0) //no pointer to linked list defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ order3_hasht[o3_cntxt]=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o3_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //nothing else to do
+ }
+
+
+ // The byte was coded under order three, otherwise it was coded under a
+ // lower order (never higher ones, remember that we are using update
+ // exclusion) in this case we have to create a new node in the list.
+
+ if(coded_in_order==3) //coded under order-3
+ {
+
+ // Update its count and variables of this context and check for renormalization
+
+ o3_ll_node->freq++; //increment its frequency (rather probability)
+
+ o3_context->max_cump++; //total cump
+
+ if(o3_ll_node->freq==255) //do we have to renormalize?
+ ppmc_renormalize_order3(); //renormalize
+
+ }
+ else
+ {
+
+ // Now we have two cases, under a given context (which we actually found)
+ // we coded an escape coded, in that case just create a new node in the
+ // linked list of bytes and probabilities. Otherwise we didn't find the
+ // same node so we have to create it in the linked list for context.
+ // And we can be sure that it at least has one element and that
+ // "o3_context" points to the last element, so we can put the new element.
+
+ if(o3_context->order4321==full_o3_cntxt) //chech if that's the last
+ { //element or the a context found
+
+ // Read till the end of the linked list
+
+ node=o3_context->prob; //get pointer to linked list
+
+ while(1)
+ {
+ if(node->next==0) //check for the end of the linked list
+ break;
+ node=node->next; //next node
+ }
+
+ // Now add element
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ o3_context->max_cump++; //total cump
+ o3_context->defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+ }
+ else
+ {
+
+ // We have to create a new context node
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ o3_context->next=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o3_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.");
+ exit(1);
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ }
+
+ }
+
+}
+
+
+
+// This function returns the probability for the escape codes in the variables
+void ppmc_get_escape_prob_order3(void)
+{
+ // To understand that remember that the escape allocated for the escape code
+ // is above the current maximum cump and that it has a probability determined
+ // by the scheme C.
+
+ symb_prob=exc_defined_bytes;
+ symb_cump=exc_max_cump;
+}
+
+
+
+// ORDER-4 functions
+//
+// The routines for order-4 are *equal* to those for order-3, there are a few
+// changes like different global variables, and different hash keys.
+//
+// If you want to go to higher orders, you'd use the same code and data
+// structures, with the difference of the context bytes (order4321)
+// stored in every context's linked list.
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-4. Must be called while encoding or decoding before anything else
+// because it initializes the pointers to the context structure in
+// "o4_context" and o4_cntxt.
+//
+// If the hash entry is not initialized it returns "o4_context"=0
+// If the context is not present in the linked list of context, "o4_context"
+// will point to the last element in the linked list.
+// If the context is present "o4_context" will point to the context to use.
+// One can distinguish the last two by checking the context value of the
+// structure, if it's not the same, is the last element.
+//
+// The routine returns 0 if the hash entry is not initialized or if the
+// the context was not present. Otherwise it returns 1, meaning that we
+// have to code under this context.
+char ppmc_get_totf_order4(void)
+{
+ struct context *cntxt_node;
+ struct _byte_and_freq *node;
+
+
+ // First make the hash key for order-4
+
+ o4_cntxt=ppmc_order4_hash_key(o1_byte,o2_byte,o3_byte,o4_byte);
+ full_o4_cntxt=(o1_byte)+(o2_byte<<8)+(o3_byte<<16)+(o4_byte<<24); //order-4
+
+
+ // Now check the hash entry in the table
+
+ if(order4_hasht[o4_cntxt]==0) //if 0, not initialized
+ {
+
+ o4_context=0; //no hash entry
+
+ return 0; //hash entry not initialized
+ }
+
+
+ // Now read trough the linked list of context searching current one
+
+ cntxt_node=order4_hasht[o4_cntxt];
+
+ while(1)
+ {
+
+ if(cntxt_node->order4321==full_o4_cntxt) //compare context
+ goto ppmc_gtfo4_cntxt_found;
+
+ if(cntxt_node->next==0) //end of context's linked list
+ break;
+
+ cntxt_node=cntxt_node->next; //next element
+
+ }
+
+
+ // Once there the context was not found
+ o4_context=cntxt_node; //pointer to last element in the linked list
+
+ return 0; //it was not present
+
+
+ // The context is found, so return pointer and cump
+
+ ppmc_gtfo4_cntxt_found:
+
+ o4_context=cntxt_node;
+
+ // Read the whole linked list for making the values
+ node=o4_context->prob;
+ exc_max_cump=0;
+ exc_defined_bytes=0;
+
+ do{
+ if(excluded[node->byte]==0)
+ {
+ exc_defined_bytes++;
+ exc_max_cump+=node->freq; //add the probability of this byte to the cump
+ }
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+
+ // Total cump is current total cump plus the probability for the escape code
+ exc_total_cump=exc_max_cump+exc_defined_bytes;
+
+
+ return 1; //context found
+
+}
+
+
+// Codes a byte under order-4 and returns 1.
+// Otherwise it returns a 0.
+//
+// In case the byte is coded under this context, coded_in_order=4.
+char ppmc_code_byte_order4(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+
+ // Get current context (if present) and total cump.
+
+ if(ppmc_get_totf_order4()==0)
+ return 0;
+
+
+ // It's possible that due to exclusion, there's no byte left, in that case
+ // return.
+ if(exc_defined_bytes==0)
+ return 0;
+
+
+ // See if the byte is present and compute its cump at the same time
+
+ node=o4_context->prob; //pointer to first element in the linked list
+
+ symb_cump=0; //the first symbol always has a 0 cump
+
+
+ // Now search the byte in the linked list
+
+ do{
+ if(node->byte==byte)
+ goto ppmc_o4_byte_found; //bad thing, I know, anyone has a better idea?
+ if(excluded[node->byte]==0)
+ symb_cump+=node->freq; //add the probability of this byte to the cump
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+
+ // If we reach that point, the byte was not found in the linked list
+ // so we don't need the cump, we have to output an escape code, whose
+ // probabilities are know using the context structure in the table.
+
+ // Byte was not present in the linked list, current node is the last one,
+ // and that's the node needed for creating a new node, save it.
+
+ o4_ll_node=node;
+
+ // Now get the probability and cump of the escape code
+
+ symb_cump=exc_max_cump;
+ symb_prob=exc_defined_bytes;
+
+ // Code the escape code
+ range_coder_encode(&rc_coder,exc_total_cump,symb_cump,symb_prob);
+
+ // Then, update "excluded" table
+
+ node=o4_context->prob;
+
+ do{
+ excluded[node->byte]=1;
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+ return 0;
+
+
+ // That code is executed when the byte is found in the linked list
+
+ ppmc_o4_byte_found:
+
+
+ // Everything has been tested, now we can feel free to code the byte,
+ // the symb_cump is already computed, now get its probability and code
+ // the byte, also save pointer to this element in the linked lists for
+ // updating.
+
+ coded_in_order=4; //successfully coded under order-4
+
+ o4_ll_node=node; //save it for updating
+
+ symb_prob=node->freq; //get the probability of the byte
+
+ // Code it.
+
+ range_coder_encode(&rc_coder,exc_total_cump,symb_cump,symb_prob);
+
+ return 1; //byte coded under order-4
+}
+
+
+// This functions update order-4 probabilities with current byte, also takes
+// care about updating variables, and renormalizing.
+//
+// "o4_ll_node" must point to the element to update or the last one,
+// based on the "coded_in_order" and checking the pointer of the element we
+// can decide what to do. Also "o4_context" must be initialized.
+//
+// This updating is only for encoding.
+void ppmc_update_order4(void)
+{
+
+ // First thing first, check if the hash entry is initialized
+
+ if(order4_hasht[o4_cntxt]==0) //no pointer to linked list defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ order4_hasht[o4_cntxt]=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o4_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.");
+ exit(1);
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //nothing else to do
+ }
+
+
+
+
+ // The byte was coded under order three, otherwise it was coded under a
+ // lower order (never higher ones, remember that we are using update
+ // exclusion) in this case we have to create a new node in the list.
+
+ if(coded_in_order==4) //coded under order-4
+ {
+
+ // Update its count and variables of this context and check for renormalization
+
+ o4_ll_node->freq++; //increment its frequency (rather probability)
+
+ o4_context->max_cump++; //total cump
+
+ if(o4_ll_node->freq==255) //do we have to renormalize?
+ ppmc_renormalize_order4(); //renormalize
+
+ }
+ else
+ {
+
+ // Now we have two cases, under a given context (which we actually found)
+ // we coded an escape coded, in that case just create a new node in the
+ // linked list of bytes and probabilities. Otherwise we didn't find the
+ // same node so we have to create it in the linked list for context.
+ // And we can be sure that it at least has one element and that
+ // "o4_context" points to the last element, so we can put the new element.
+
+ if(o4_context->order4321==full_o4_cntxt) //chech if that's the last
+ { //element or the a context found
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ o4_ll_node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ o4_context->max_cump++; //total cump
+ o4_context->defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+ }
+ else
+ {
+
+ // We have to create a new context node
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ o4_context->next=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o4_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.");
+ exit(1);
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ }
+
+ }
+
+}
+
+
+// This functions renormalizes the probabilities at order-4 updating context
+// variables.
+void ppmc_renormalize_order4(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+
+ // Initialize variables. Defined bytes remain the same.
+ o4_context->max_cump=0; //clear them
+
+ node=o4_context->prob; //get pointer to lined lists
+
+ // Divide all the probabilities by 2 and update context variables
+ while(1)
+ {
+ node->freq>>=1; //divide by a factor of 2
+
+ if(node->freq==0) //don't allow a probability to be 0
+ node->freq=1;
+
+ o4_context->max_cump+=node->freq; //sum to the total cump
+
+ if(node->next==0) //last element
+ break;
+ node=node->next;
+ }
+
+
+}
+
+
+// Decodes the symbol correspoding to the current order, it returns the byte
+// or in case of an escape code or empty table it returns -1.
+// It updates "coded_in_order".
+//
+// If we decode an escape code, we don't modify "o4_ll_node" and thus its
+// work of the updating routine to read till the end of the linked list
+// (for adding a new element) however if we decode a byte, we save on
+// "o4_ll_node" the pointer to its node. (so updating is faster)
+void ppmc_decode_order4(void)
+{
+ unsigned long current_cump, counter;
+ struct _byte_and_freq *node;
+
+
+ // Get current context (if present) and total cump.
+
+ if(ppmc_get_totf_order4()==0)
+ {
+ byte=-1;
+ return;
+ }
+
+
+ // It's possible that due to exclusion, there's no byte left, in that case
+ // return.
+ if(exc_defined_bytes==0)
+ {
+ byte=-1;
+ return;
+ }
+
+
+ // Decode current cump
+
+ symb_cump=range_decoder_decode(&rc_decoder,exc_total_cump); //decode it
+
+ // Now check if it's an escape code
+ if(symb_cump>=exc_max_cump) //the defined code space for the escape code
+ {
+
+ // Update coding values
+
+ ppmc_get_escape_prob_order4();
+ range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob);
+
+ // Mark as escape code
+
+ byte=-1;
+
+ // Then, update "excluded" table
+
+ node=o4_context->prob;
+
+ do{
+ excluded[node->byte]=1;
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+ return; //an escape code
+ }
+ else
+ {
+ // Now we have to check what symbol it is
+
+ current_cump=0; //cump of the current symbol
+
+ node=o4_context->prob; //get pointer to linked lists
+
+ while(1)
+ {
+ if(excluded[node->byte]==0)
+ current_cump+=node->freq; //update cump
+ if(symb_cump<current_cump)
+ break; //symbol found, break search loop
+
+ node=node->next; //next element
+ //we have no need to check for the last symbol, we'll never read further
+ //the end of the linked lists, before we'll found the last byte.
+ }
+
+
+ //read byte value and probability
+
+ symb_prob=node->freq; //get the probability for updating the state
+ byte=node->byte; //get byte
+ o4_ll_node=node; //used for updating
+
+
+ // Get the cump of byte
+
+ symb_cump=current_cump-symb_prob;
+
+ // Update coding state
+
+ range_decoder_update(&rc_decoder,exc_total_cump,symb_cump,symb_prob);
+
+ // Update coded_in_order used for update exclusion
+
+ coded_in_order=4;
+
+ return;
+ }
+
+}
+
+
+// This is the routine for updating while decoding. The only difference with
+// the routine for coding is that when an escape code was coded, "o4_ll_node"
+// is not initialized so we have to read till the end of the linked list.
+// Fortunately "o4_context" will be initialized so we don't need to read its
+// linked list.
+void ppmc_update_dec_order4(void)
+{
+ struct _byte_and_freq *node;
+
+ // First thing first, check if the hash entry is initialized
+
+ if(order4_hasht[o4_cntxt]==0) //no pointer to linked list defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ order4_hasht[o4_cntxt]=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o4_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.");
+ exit(1);
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //nothing else to do
+ }
+
+
+ // The byte was coded under order four, otherwise it was coded under a
+ // lower order (never higher ones, remember that we are using update
+ // exclusion) in this case we have to create a new node in the list.
+
+ if(coded_in_order==4) //coded under order-4
+ {
+
+ // Update its count and variables of this context and check for renormalization
+
+ o4_ll_node->freq++; //increment its frequency (rather probability)
+
+ o4_context->max_cump++; //total cump
+
+ if(o4_ll_node->freq==255) //do we have to renormalize?
+ ppmc_renormalize_order4(); //renormalize
+
+ }
+ else
+ {
+
+ // Now we have two cases, under a given context (which we actually found)
+ // we coded an escape coded, in that case just create a new node in the
+ // linked list of bytes and probabilities. Otherwise we didn't find the
+ // same node so we have to create it in the linked list for context.
+ // And we can be sure that it at least has one element and that
+ // "o4_context" points to the last element, so we can put the new element.
+
+ if(o4_context->order4321==full_o4_cntxt) //chech if that's the last
+ { //element or the a context found
+
+ // Read till the end of the linked list
+
+ node=o4_context->prob; //get pointer to linked list
+
+ while(1)
+ {
+ if(node->next==0) //check for the end of the linked list
+ break;
+ node=node->next; //next node
+ }
+
+ // Now add element
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ o4_context->max_cump++; //total cump
+ o4_context->defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+ }
+ else
+ {
+
+ // We have to create a new context node
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ o4_context->next=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o4_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.");
+ exit(1);
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ }
+
+ }
+
+}
+
+
+
+// This function returns the probability for the escape codes in the variables
+void ppmc_get_escape_prob_order4(void)
+{
+ // To understand that remember that the escape allocated for the escape code
+ // is above the current maximum cump and that it has a probability determined
+ // by the scheme C.
+
+ symb_prob=exc_defined_bytes;
+ symb_cump=exc_max_cump;
+}
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "ppmc.h" (exclusions)
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+ Part of the ppmc encoder and decoder.
+
+ This module contains the definitions of different functions and all the
+ data structures defined by ppmc. Also contains defines.
+*/
+
+// Definitions
+
+#define ppmc_order4_hash_size 65536
+#define ppmc_order4_hash_key(k,j,i,l) ( (k)+(j<<8)+(i<<1)+(l<<9) )& ppmc_order4_hash_size-1
+#define ppmc_order3_hash_size 65536
+#define ppmc_order3_hash_key(k,j,i) ((k)+(j<<7)+(i<<11)) & ppmc_order3_hash_size-1
+#define ppmc_order2_hash_key(k,j) ((k)+(j<<8))
+#define _bytes_pool_elements 125000 //this is used the first time
+ //that we allocate memory, that's
+ //the number of entries
+#define _bytes_pool_elements_inc 125000 //if we need to alloc again, this
+ //is the number of entries to get
+#define _context_pool_elements 50000
+#define _context_pool_elements_inc 50000
+
+#define _mempool_max_index 1000 //the number of entries in the array with
+ //pointers
+
+
+// Data structures
+
+// This structure contains a single element of a linked lists which contains
+// the probability distribution of a given order. This structure takes 6 bytes.
+struct _byte_and_freq{
+unsigned char byte; //the byte itself
+unsigned char freq; //and the frequency of it
+struct _byte_and_freq *next; //pointer to next element in linked list or 0
+};
+
+
+// This structure is used for both order-3 and order-4. It takes 20 bytes,
+// and it can still hold another byte more. (only 19 being used)
+// Order 2-1-0-(-1) use different structures for a faster accessing.
+struct context{
+struct context *next; //next context in the hash entry
+unsigned long order4321; //order-4-3-2-1 (or order-3-2-1 for order-3)
+struct _byte_and_freq *prob; //pointer to linked lists containing probability distribution
+unsigned int max_cump; //maximum cumulative probability (can't exceed (2^16)-1 )
+unsigned int defined_bytes; //the number of bytes in this context
+};
+
+// That's the same but for order-2 where there's no hash collisions.
+struct context_o2{
+struct _byte_and_freq *prob; //pointer to linked lists containing probability distribution
+unsigned int max_cump; //maximum cumulative probability (can't exceed (2^16)-1 )
+unsigned int defined_bytes; //the number of bytes in this context
+};
+
+
+// Declaration of functions
+
+
+// Functions for initializing
+void ppmc_alloc_memory(void);
+void ppmc_initialize_contexts(void);
+void ppmc_encoder_initialize(void);
+void ppmc_decoder_initialize(void);
+void ppmc_free_memory(void);
+void ppmc_flush_mem_enc(void);
+void ppmc_flush_mem_dec(void);
+
+// Functions for order-(-1)
+void ppmc_get_prob_ordern1(void);
+unsigned long ppmc_get_symbol_ordern1(void);
+void ppmc_get_totf_ordern1(void);
+void ppmc_renormalize_order1(void);
+
+// Functions for order-0
+void ppmc_get_totf_order0(void);
+char ppmc_code_byte_order0(void);
+void ppmc_update_order0(void);
+void ppmc_renormalize_order0(void);
+void ppmc_decode_order0(void);
+void ppmc_get_escape_prob_order0(void);
+void ppmc_get_prob_order0(void);
+
+// Functions for order-1
+void ppmc_get_totf_order1(void);
+char ppmc_code_byte_order1(void);
+void ppmc_update_order1(void);
+void ppmc_renormalize_order1(void);
+void ppmc_decode_order1(void);
+void ppmc_get_escape_prob_order1(void);
+void ppmc_get_prob_order1(void);
+
+
+// Functions for order-2
+void ppmc_get_totf_order2(void);
+char ppmc_code_byte_order2(void);
+void ppmc_update_order2(void);
+void ppmc_renormalize_order2(void);
+void ppmc_decode_order2(void);
+void ppmc_update_dec_order2(void);
+void ppmc_get_escape_prob_order2(void);
+void ppmc_get_prob_order2(void);
+
+
+// Functions for order-3
+char ppmc_get_totf_order3(void);
+char ppmc_code_byte_order3(void);
+void ppmc_update_order3(void);
+void ppmc_renormalize_order3(void);
+void ppmc_decode_order3(void);
+void ppmc_update_dec_order3(void);
+void ppmc_get_escape_prob_order3(void);
+void ppmc_get_prob_order3(void);
+
+
+// Functions for order-4
+char ppmc_get_totf_order4(void);
+char ppmc_code_byte_order4(void);
+void ppmc_update_order4(void);
+void ppmc_renormalize_order4(void);
+void ppmc_decode_order4(void);
+void ppmc_update_dec_order4(void);
+void ppmc_get_escape_prob_order4(void);
+void ppmc_get_prob_order4(void);
+
+
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "ppmcdata.c" (exclusions)
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+
+ Part of the ppmc encoder and decoder.
+
+ This module contains global data.
+*/
+
+#include "ppmc.h" //defines
+#include "range.h"
+
+// Order-4 uses a hash table which points to the start of a linked list with
+// the different context, which has the cump, the number of different symbols
+// and a pointer to the linked list with the bytes and frequencies.
+// Order-3 is almost the same, both take 262144 bytes.
+struct context *order4_hasht[ppmc_order4_hash_size];
+
+struct context *order3_hasht[ppmc_order3_hash_size];
+
+
+// The array for order-2 is different, as we do directly hashing, and thus
+// we have no need to do the stuff of linked lists for the context itself,
+// so it contains the context used. This takes 1310720 bytes.
+struct context_o2 order2_array[65536];
+
+
+// Those are the multiple arrays for order-1. It takes 65536 bytes.
+unsigned char order1_array[256][256];
+unsigned int order1_defined_bytes_array[256]; //the defined bytes in every context
+unsigned int order1_max_cump_array[256]; //max cump of every context
+
+
+// This is the array for order-0. It takes 256 bytes.
+unsigned char order0_array[256];
+unsigned int order0_defined_bytes;
+unsigned int order0_max_cump;
+
+
+// No need of variables for order-(-1), because it's fixed.
+
+
+
+// Those are the pointers and variables used for managing the mem pool for
+// both context, and bytes and frequencies.
+struct _byte_and_freq *_bytes_pool, //pointer to pool containing linked
+ //lists with bytes and frequencies
+ *_bytes_pool_max; //the maximum of this buffer
+struct context *_context_pool; //pointer to pool containing contexts
+struct context *_context_pool_max; //the same as with _bytes_pool
+
+unsigned long _bytes_pool_index; //index in array of pointers
+unsigned long _context_pool_index;
+
+//the following is an array keeping pointers to different buffers. A new
+//buffer is allocated when the current one is full, so we always have a
+//buffer for linked lists. (without allocating a buffer for every element)
+struct _byte_and_freq *_bytes_pool_array[_mempool_max_index];
+struct context *_context_pool_array[_mempool_max_index];
+
+char ppmc_out_of_memory; //0 if we have enough memory, 1 instead, any
+ //routine that needs to allocate memory must
+ //quit if that's 1.
+
+
+// Variables which contain current byte to code and order
+unsigned long byte, //current byte to code
+ o1_byte, //order-1 byte
+ o2_byte, //order-2 byte
+ o3_byte, //order-3 byte
+ o4_byte; //order-4 byte
+
+unsigned long o2_cntxt; //used in the hash key of order-2
+unsigned long o3_cntxt; //use as hash key for order-3
+unsigned long o4_cntxt; //use as hash key for order-4
+unsigned long full_o3_cntxt; //o1_byte, o2_byte and o3_byte together
+unsigned long full_o4_cntxt; //order-4-3-2-1
+
+unsigned long coded_in_order; //in which order the last byte was coded
+ //it's for update exclusion
+ //also used for decoding
+
+// Variables used for coding
+
+unsigned long
+ total_cump, //the total cumulative probability
+ symb_cump, //the symbol cumulative probability
+ symb_prob; //the symbol frequency
+
+rangecoder rc_coder; //state of range coder
+rangecoder rc_decoder; //state of range decoder
+
+// File handles
+
+ FILE *file_input, //file to code
+ *file_output; //file where the coded data is placed
+
+
+// Pointers to linked lists and context structures used for faster updating
+// or creation of new nodes, because instead of reading again all the linked
+// list till the end (in the case of creation) we have a pointer to the last
+// element. In the case that a byte was present in the linked lists but it
+// had a 0 count, we just have to update its probability. And in the case
+// that it already was present and we coded it under that context or a lower
+// one, we just have to update its probability.
+
+struct _byte_and_freq *o2_ll_node; //pointer to linked lists under order-2
+ //where does it points depends in which
+ //order the byte was coded.
+struct _byte_and_freq *o3_ll_node; //the same but for order-3
+struct _byte_and_freq *o4_ll_node;
+
+struct context *o3_context; //pointer to current order-3 context
+struct context *o4_context; //pointer to current order-3 context
+
+
+// Those are the variables especially used for exclusion.
+
+// the array for doing exclusion. Every routine from every order can
+// use them.
+unsigned char excluded[256]; //if 0 it was not present, if it's 1 then
+ //it appeared in a higher order.
+
+unsigned long exc_total_cump, //total cump of context (after exclusion)
+ exc_defined_bytes,//defined bytes in context (after exclusion)
+ exc_max_cump;
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "ppmcdata.h" (exclusions)
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+
+ Part of the ppmc encoder and decoder.
+
+ This module contains externs definition for global data.
+*/
+
+#include "ppmc.h"
+
+// Order-4 uses a hash table which points to the start of a linked list with
+// the different context, which has the cump, the number of different symbols
+// and a pointer to the linked list with the bytes and frequencies.
+// Order-3 is almost the same, both take 262144 bytes.
+extern struct context *order4_hasht[];
+
+extern struct context *order3_hasht[];
+
+
+// The array for order-2 is different, as we do directly hashing, and thus
+// we have no need to do the stuff of linked lists for the context itself,
+// so it contains the context used. This takes 1310720 bytes.
+extern struct context_o2 order2_array[];
+
+
+// Those are the multiple arrays for order-1. It takes 65536 bytes.
+extern unsigned char order1_array[256][256];
+extern unsigned int order1_defined_bytes_array[]; //the defined bytes in every context
+extern unsigned int order1_max_cump_array[]; //max cump of every context
+
+
+// This is the array for order-0. It takes 256 bytes.
+extern unsigned char order0_array[];
+extern unsigned int order0_defined_bytes;
+extern unsigned int order0_max_cump;
+
+
+// Those are the pointers and variables used for managing the mem pool for
+// both context, and bytes and frequencies.
+extern struct _byte_and_freq *_bytes_pool, //pointer to pool containing linked
+ //lists with bytes and frequencies
+ *_bytes_pool_max; //the maximum of this buffer
+extern struct context *_context_pool; //pointer to pool containing contexts
+extern struct context *_context_pool_max; //the same as with _bytes_pool
+
+extern unsigned long _bytes_pool_index; //index in array of pointers
+extern unsigned long _context_pool_index;
+
+//the following is an array keeping pointers to different buffers. A new
+//buffer is allocated when the current one is full, so we always have a
+//buffer for linked lists. (without allocating a buffer for every element)
+extern struct _byte_and_freq *_bytes_pool_array[_mempool_max_index];
+extern struct context *_context_pool_array[_mempool_max_index];
+
+extern char ppmc_out_of_memory; //0 if we have enough memory, 1 instead, any
+ //routine that needs to allocate memory must
+ //quit if that's 1.
+
+
+
+// Variables which contain current byte to code and order
+extern unsigned long //they are only bytes
+ byte, //current byte to code
+ o1_byte, //order-1 byte
+ o2_byte, //order-2 byte
+ o3_byte, //order-3 byte
+ o4_byte; //order-4 byte
+
+extern unsigned long o2_cntxt; //used in the hash key of order-2
+extern unsigned long o3_cntxt; //use as hash key for order-3
+unsigned long o4_cntxt; //use as hash key for order-4
+extern unsigned long full_o3_cntxt; //o1_byte, o2_byte and o3_byte together
+extern unsigned long full_o4_cntxt; //order-4-3-2-1
+
+extern unsigned long coded_in_order; //in which order the last byte was coded
+ //it's for update exclusion
+ //also used for decoding
+// Variables used for coding
+
+extern unsigned long //no need for negative values
+ total_cump, //the total cumulative probability
+ symb_cump, //the symbol cumulative probability
+ symb_prob; //the symbol frequency
+
+extern rangecoder rc_coder; //state of range coder
+extern rangecoder rc_decoder; //state of range decoder
+
+// File handles
+
+ FILE *file_input, //file to code
+ *file_output; //file where the coded data is placed
+
+
+
+// Pointers to linked lists and context structures used for faster updating
+// or creation of new nodes, because instead of reading again all the linked
+// list till the end (in the case of creation) we have a pointer to the last
+// element. In the case that a byte was present in the linked lists but it
+// had a 0 count, we just have to update its probability. And in the case
+// that it already was present and we coded it under that context or a lower
+// one, we just have to update its probability.
+
+
+extern struct _byte_and_freq *o2_ll_node;//pointer to linked lists under order-2
+ //where does it points depends in which
+ //order the byte was coded.
+extern struct _byte_and_freq *o3_ll_node; //the same but for order-3
+extern struct _byte_and_freq *o4_ll_node;
+
+extern struct context *o3_context; //pointer to current order-3 context
+extern struct context *o4_context; //pointer to current order-3 context
+
+
+// the array for doing exclusion. Every routine from every order can
+// use them.
+extern unsigned char excluded[256]; //if 0 it was not present, if it's 1 then
+ //it appeared in a higher order.
+
+extern unsigned long exc_total_cump, //total cump of context (after exclusion)
+ exc_defined_bytes,//defined bytes in context (after exclusion)
+ exc_max_cump;
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "ppmcmain.c" (exclusions)
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+
+ Part of the ppmc encoder only. Using exclusion.
+
+ This module is the main module and calls the different modules to do
+ the encoding of a file. When done prints bpb and kbyps.
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "range.h" //the range coder functions and data
+#include "ppmcdata.h"
+
+
+long filesize(FILE *stream);
+
+unsigned long debug=0;
+
+
+//Main
+void main (char argc, char *argv[])
+{
+ unsigned long counter, //temporal counter for loops like for or while
+ counter2, //another temporal counter for sub loops
+ size_file_input; //the size of the input file
+
+
+ // Print title
+ printf("PPMC using range coder. (with exclusion)\n");
+ printf("Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.\n");
+ printf("Permission is granted to make verbatim copies of this program for private\n");
+ printf("use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.\n");
+
+
+
+ // Check for correct number of parameters
+ if(argc!=3)
+ {
+ printf("Bad number of arguments.\n");
+ exit(1);
+ }
+
+
+ // Try to open input and output files
+ if((file_input=fopen(argv[1],"r+b"))==NULL)
+ {
+ printf("Couldn't open %s.\n",argv[1]);
+ exit(1);
+ }
+
+ if((file_output=fopen(argv[2],"w+b"))==NULL)
+ {
+ printf("Couldn't create %s.\n",argv[2]);
+ exit(1);
+ }
+
+
+ // Check input file length and not accept 0 length files
+ size_file_input=filesize(file_input);
+
+ if(size_file_input<5)
+ {
+ printf("Can't work with files below than 5 bytes!");
+ exit(1);
+ }
+
+
+ // First output file length
+ fwrite(&size_file_input,1,4,file_output); //input length
+
+
+ // Initialize ppmc encoder
+ ppmc_alloc_memory(); //get memory
+ ppmc_initialize_contexts(); //initialize model
+ ppmc_encoder_initialize();
+
+ // Initialize range coder
+ range_coder_init(&rc_coder,file_output);
+
+
+ // Start main loop which codes the file
+ while((byte=fgetc(file_input))!=EOF)
+ {
+
+ // Clear exclusion table
+ for(counter=0;counter!=256;++counter)
+ excluded[counter]=0;
+
+
+ // Try to code current byte under order-4 if possible then go to lower orders
+ if(ppmc_code_byte_order4()==0)
+ if(ppmc_code_byte_order3()==0)
+ if(ppmc_code_byte_order2()==0)
+ if(ppmc_code_byte_order1()==0)
+ if(ppmc_code_byte_order0()==0) //else try to code under order-0
+ {
+ // Code under order-(-1)
+ ppmc_get_prob_ordern1();
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+ coded_in_order=0; //update all the tables (unless order-(-1))
+ }
+
+
+ // Now do update exclusion
+
+ switch(coded_in_order)
+ {
+ case 0: ppmc_update_order0(); //update only order-0
+ case 1: ppmc_update_order1(); //update order-0 and order-1
+ case 2: ppmc_update_order2(); //update order-2 1 and 0...
+ case 3: ppmc_update_order3();
+ case 4: ppmc_update_order4();
+ default: break;
+ };
+
+
+ // Update order variables
+
+ o4_byte=o3_byte;
+ o3_byte=o2_byte;
+ o2_byte=o1_byte;
+ o1_byte=byte; //current one is next time order-1
+
+ debug++;
+
+ // Check if we run out of memory, in that case, flush the encoder
+
+ if(ppmc_out_of_memory==1)
+ {
+ printf("Flushing memory! Output file might be not decodable.\n");
+ ppmc_flush_mem_enc();
+ }
+
+
+ }
+
+
+ // Flush range coder
+ range_coder_flush(&rc_coder);
+
+ // Free memory
+ ppmc_free_memory();
+
+
+ // Print bpb and kbyps
+ printf("%s at %f bpb.\n",argv[1],((float)filesize(file_output)/(float)size_file_input)*(float)8);
+
+
+ // Close file handles
+ fclose(file_input);
+ fclose(file_output);
+
+
+
+ // Nicely exit
+ exit(0);
+}
+
+
+// Routines not used by ppmc but rather by main.
+// Not including the range coder.
+
+
+// Returns the file size of a given file.
+long filesize(FILE *stream)
+{
+ long curpos, length;
+
+ curpos = ftell(stream);
+ fseek(stream, 0L, SEEK_END);
+ length = ftell(stream);
+ fseek(stream, curpos, SEEK_SET);
+ return length;
+}
+
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "range.c" (exclusion)
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+
+ This module contains the routines of both the range coder and decoder.
+
+ The range coder works internally in 32 bits, and uses bytes as symbols.
+ Also the end of message symbol is used. So z=257.
+
+ Both input and output use rc_file as the file stream. Of course we can't
+ code and decode at the same time. All the input or output comes from the
+ same file, no matter what range coder structure are we using. The modules
+ here provided don't manage the io except for reading and writing, they
+ don't open nor close the files. The reading and writing is done via
+ putc and getc.
+*/
+
+#include "range.h"
+
+/*
+ Inits the range coder state. Must be called before encoding any symbol.
+ It uses a magic number 0xB3 as the first byte outputted.
+ -rangecoder *rc, the range coder to be used.
+
+ Shoulde be called like that:
+ range_coder_init(&o0_rc_state,file_output);
+*/
+void range_coder_init(rangecoder *rc, FILE *stream)
+{
+ rc_file=stream;
+ rc->low=0; //define state
+ rc->range=0x80000000;
+ rc->byte_buffer=0xB3; //magic number
+ rc->help=0; //temp value
+}
+
+
+/*
+ Encodes a symbol.
+ -rangecoder *rc, the range coder to be used.
+ -unsigned long tot_f, the maximum cumulative frequency
+ -unsigned long lt_f, the cumulative probabilty of the symbol
+ -unsigned long sy_f, the probability of the symbol
+*/
+void range_coder_encode(rangecoder *rc,unsigned long tot_f, unsigned long lt_f,unsigned long sy_f)
+{
+ unsigned long temp, r;
+
+ if(lt_f>tot_f)
+ {
+ printf("BUG!!");
+ exit(1);
+ }
+
+// printf("\nc> %d,%d,%d ",tot_f,lt_f,sy_f);
+
+ range_coder_renormalize(rc); //&rc?
+
+ r=rc->range/tot_f;
+ temp=r*lt_f;
+ if(lt_f+sy_f<tot_f)
+ rc->range=r*sy_f;
+ else
+ rc->range-=temp;
+ rc->low+=temp;
+}
+
+/*
+ Renormalizes the state when coding.
+ -rangecoder *rc, the range coder to be used.
+*/
+
+void range_coder_renormalize(rangecoder *rc)
+{
+ while(rc->range<=(unsigned long)0x00800000)
+ {
+ if(rc->low<(unsigned long)0x7F800000)
+ {
+ putc(rc->byte_buffer,rc_file);
+ for(;rc->help;rc->help--)
+ putc(0xFF,rc_file);
+ rc->byte_buffer=(unsigned char)(rc->low>>23);
+ }
+ else
+ {
+ if(rc->low&(unsigned long)0x80000000)
+ {
+ putc(rc->byte_buffer+1,rc_file);
+ for(;rc->help;rc->help--)
+ putc(0x00,rc_file);
+ rc->byte_buffer=(unsigned char)(rc->low>>23);
+ }
+ else
+ rc->help++;
+ }
+ rc->range<<=8;
+ rc->low=(rc->low<<8)&(unsigned long)(0x7FFFFFFF);
+ }
+}
+
+
+/*
+ Flushes the encoder. Must be called when the coding is done.
+ -rangecoder *rc, the range coder to be used.
+
+ Shoulde be called like that:
+ range_coder_flush(&o0_rc_state);
+*/
+void range_coder_flush(rangecoder *rc)
+{
+ unsigned long tmp;
+
+ range_coder_renormalize(rc);
+ tmp = rc->low >> 23;
+ if (tmp > 0xff)
+ {
+ putc(rc->byte_buffer+1,rc_file);
+ for(; rc->help; rc->help--)
+ putc(0,rc_file);
+ }
+ else
+ {
+ putc(rc->byte_buffer,rc_file);
+ for(; rc->help; rc->help--)
+ putc(0xff,rc_file);
+ }
+
+ putc(tmp & 0xff,rc_file);
+ putc((tmp = rc->low >> (23-8)) & 0xff,rc_file);
+}
+
+
+/*
+ Inits the range decoder state. Also checks for the magic number, and
+ quits in case it isn't the first, so be careful.
+ -rangecoder *rc, the range coder to be used.
+*/
+void range_decoder_init(rangecoder *rc, FILE *stream)
+{
+ unsigned int _rd_c;
+
+ rc_file=stream;
+ if((_rd_c=getc(rc_file))!=0xB3)
+ {
+ printf("\nThis is not range coded data. Magic number not found. Exiting.");
+ exit(1);
+ }
+ rc->byte_buffer=getc(rc_file);
+ rc->low=rc->byte_buffer>>1;
+ rc->range=0x80;
+}
+
+
+/*
+ Decode a symbol, get its cumulative probability.
+ Input:
+ -rangecoder *rc, the range coder to be used.
+ -unsigned long tot_f, the maximum cumulative probability
+ Output:
+ -unsigned long, cumulative probability of the current symbol
+ Should be called like that:
+ current_cump=range_decoder_decode(&o0_rc_state,o0_tot_f);
+*/
+unsigned long range_decoder_decode(rangecoder *rc, unsigned long tot_f)
+{
+ unsigned long temp;
+
+ range_decoder_renormalize(rc);
+ rc->help=rc->range/tot_f;
+ temp=rc->low/rc->help;
+ if(temp>=tot_f)
+ return tot_f-1;
+ else
+ return temp;
+}
+
+
+/*
+ Updates the state so next symbol can be decoded.
+ Input:
+ -rangecoder *rc, the range coder to be used.
+ -unsigned long tot_f, the maximum cumulative probability
+ -unsigned long lt_f, the cumulative probabilty of the symbol
+ -unsigned long sy_f, the probability of the symbol
+
+*/
+void range_decoder_update(rangecoder *rc, unsigned long tot_f, unsigned long lt_f,unsigned long sy_f)
+{
+ unsigned long temp;
+
+// printf("\nd> %d,%d,%d ",tot_f,lt_f,sy_f);
+
+ temp=rc->help*lt_f;
+ rc->low-=temp;
+ if(lt_f+sy_f<tot_f)
+ rc->range=rc->help*sy_f;
+ else
+ rc->range-=temp;
+}
+
+
+/*
+ Renormalizes the state while decoding.
+ -rangecoder *rc, the range coder to be used.
+*/
+void range_decoder_renormalize(rangecoder *rc)
+{
+ while(rc->range<=0x00800000)
+ {
+ rc->low=(rc->low<<8)|((rc->byte_buffer<<7)&0xFF);
+ rc->byte_buffer=getc(rc_file);
+ rc->low |= rc->byte_buffer >> (1);
+ rc->range<<=8;
+ }
+}
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "range.h" (exclusions)
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+
+ Declarations for the coder.
+*/
+
+#include <stdio.h>
+
+typedef struct{
+ unsigned long low, range, help;
+ unsigned char byte_buffer;
+}rangecoder;
+
+FILE *rc_file;
+
+void range_coder_init(rangecoder *rc, FILE *stream); //coding routines
+void range_coder_encode(rangecoder *rc,unsigned long tot_f, unsigned long lt_f,unsigned long sy_f);
+void range_coder_renormalize(rangecoder *rc);
+void range_coder_flush(rangecoder *rc);
+void range_decoder_init(rangecoder *rc, FILE *stream);//decoding routines
+unsigned long range_decoder_decode(rangecoder *rc, unsigned long tot_f);
+void range_decoder_update(rangecoder *rc, unsigned long tot_f, unsigned long lt_f,unsigned long sy_f);
+void range_decoder_renormalize(rangecoder *rc);
+
+
+typedef unsigned long code_value;
+#define CODE_BITS 32
+#define Top_value ((code_value)1 << (CODE_BITS-1))
+#define SHIFT_BITS (CODE_BITS - 9)
+#define EXTRA_BITS ((CODE_BITS-2) % 8 + 1)
+#define Bottom_value (Top_value >> 8)
+#define outbyte(cod,x) putc(x,stdout)
+#define inbyte(cod) getc(stdin)
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "unppmc.c" (exclusion)
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+
+ Part of the ppmc decoder.
+
+ This module is the main module and calls the different modules to do
+ the decoding of a file. When done prints kbyps.
+*/
+
+
+// Bibliotecas necesarias
+#include <stdio.h>
+#include <stdlib.h>
+#include "range.h" //the range coder functions and data
+#include "ppmcdata.h"
+
+
+// Declaracion de funciones del ppmcmain.c
+long filesize(FILE *stream);
+
+
+
+
+//Main
+void main (int argc, char *argv[])
+{
+ unsigned long counter, //temporal counter for loops like for or while
+ counter2, //another temporal counter for sub loops
+ size_file_output, //the size of the output file
+ main_counter; //used in main
+ char expected_flush=0; //used for checking flushing which can't be done
+
+
+ // Print title and version.
+ printf("UNPPMC using range coder.\n");
+ printf("Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.\n");
+ printf("Permission is granted to make verbatim copies of this program for private\n");
+ printf("use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.\n");
+
+
+ // Check for correct number of parameters
+ if(argc!=3)
+ {
+ printf("Bad number of arguments.\n");
+ exit(1);
+ }
+
+
+ // Try to open input and output files
+ if((file_input=fopen(argv[1],"r+b"))==NULL)
+ {
+ printf("Couldn't open %s.\n",argv[1]);
+ exit(1);
+ }
+
+ if((file_output=fopen(argv[2],"w+b"))==NULL)
+ {
+ printf("Couldn't create %s.\n",argv[2]);
+ exit(1);
+ }
+
+
+ // Get output length
+ fread(&size_file_output,1,4,file_input);
+
+
+ // Initialize ppmc decoder
+ ppmc_alloc_memory();
+ ppmc_initialize_contexts();
+ ppmc_decoder_initialize();
+
+
+
+ // Initialize decoder
+ range_decoder_init(&rc_decoder,file_input);
+
+
+ // Start main loop which decodes the file
+ main_counter=size_file_output-4; //take in account the bytes already written
+ expected_flush=0; //we don't expect a flush yet
+
+ while(main_counter!=0)
+ {
+
+ // Clear exclusion table
+ for(counter=0;counter!=256;++counter)
+ excluded[counter]=0;
+
+// Try to decode current byte in order-4 if possible, else in lower ones
+ppmc_decode_order4();
+if(byte==-1)
+ ppmc_decode_order3();
+ if(byte==-1)
+ {
+ ppmc_decode_order2();
+ if(byte==-1)
+ {
+ ppmc_decode_order1();
+ if(byte==-1)
+ {
+ ppmc_decode_order0();
+ if(byte==-1) //check if it was an escape code
+ {
+ // Decode in order-(-1)
+ ppmc_get_totf_ordern1();
+ symb_cump=range_decoder_decode(&rc_decoder,total_cump);
+ byte=ppmc_get_symbol_ordern1();
+ ppmc_get_prob_ordern1();
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+ coded_in_order=0; //update all orders
+
+ // Now see if it's the code of flushing
+
+ if(symb_cump==256)
+ {
+ printf("Flushing.\n");
+ ppmc_flush_mem_dec();
+ expected_flush=0;
+ continue; //do not output byte nor update
+ }
+
+ }
+ }
+ }
+ }
+
+ // Output byte and update model
+
+ fputc(byte,file_output);
+
+ switch(coded_in_order) //update exclusion
+ {
+ case 0: ppmc_update_order0(); //update only order-0
+ case 1: ppmc_update_order1(); //update order-0 and order-1
+ case 2: ppmc_update_dec_order2(); //update order-0 1 and 2
+ case 3: ppmc_update_dec_order3();
+ case 4: ppmc_update_dec_order4();
+ default: break;
+ };
+
+
+ // Check if flushing has to be done and has not been done.
+ // This is optional, in case you limit the memory usage, you don't
+ // need to include this
+
+ if(expected_flush==1) // If flushing didn't happen, we can't decode
+ {
+ printf("Can't decompress file. Not enough memory.\nTry in a machine with more memory.\n");
+ exit(1);
+ }
+ if(ppmc_out_of_memory==1)
+ {
+ expected_flush=1; // Next code must be a flush code, otherwise we don't
+ // have enough memory, and therefore we can't decode
+ }
+
+
+ // Update order variables
+
+ o4_byte=o3_byte;
+ o3_byte=o2_byte;
+ o2_byte=o1_byte;
+ o1_byte=byte; //current one, is next time order-1
+
+ // Byte decoded and model updated, loop
+ main_counter--;
+
+//printf("\n%d",size_file_output-main_counter);
+
+ }
+
+
+ ppmc_free_memory();
+
+ // Close file handles and free memory
+ fclose(file_input);
+ fclose(file_output);
+
+
+
+
+ // Nicely exit
+ exit(0);
+}
+
+
+// Ruotines not used by ppmc but rather by main.
+// Not including the range coder.
+
+
+// Returns the file size of a given file.
+long filesize(FILE *stream)
+{
+ long curpos, length;
+
+ curpos = ftell(stream);
+ fseek(stream, 0L, SEEK_END);
+ length = ftell(stream);
+ fseek(stream, curpos, SEEK_SET);
+ return length;
+}
+
+
--- /dev/null
+
+TARGETS=ppmc unppmc
+COMMON= ppmc.o ppmcdata.o range.o
+
+CFLAGS=-O3 -Wall -DNDEBUG
+
+all: $(TARGETS)
+
+ppmc: $(COMMON) ppmcmain.c
+
+unppmc: $(COMMON) unppmc.c
+
+clean:
+
+ @$(RM) -f *.o $(TARGETS)
+
+.PHONY: all clean
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "ppmc.c"
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+
+ Part of the ppmc encoder and decoder.
+
+ This module contains the whole ppmc encoder. It uses hash tables for
+ managing most of the orders. And a maximum order of 4. It codes bytes.
+ Order-1-0-(-1) are all handled in tables. Order-2 has a table with
+ direct hashing with pointers to the linked lists. Order-4 and order-3
+ both have hash tables with pointers to contexts in a linked lists which
+ finally have a pointer to the start of the linked list with the
+ probability distribution. Update exclusion is used, but exclusion is not.
+
+ Please, note that if the machine where the decoder is run doesn't has as
+ much memory as the computer where the encoder was ran, the decoder will
+ not be able to properly decode the file, because it will not be able to
+ keep track of new statistics, in this case it will just exit.
+
+ For applications where the loss of data is not admisible, I suggest you to
+ limit both encoder and decoder's memory requeriments to a given minimum
+ which both machines have.
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "range.h"
+#include "ppmcdata.h"
+
+static unsigned ordern1_table[] = {
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 4303574, 88451153, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 143318, 9172380, 1146547, 100, 143318, 573273, 859910, 10318928,
+ 10318928, 6019374, 573273, 26657231, 1576502, 43138853, 10462246, 3439642,
+ 6162693, 6162693, 6162693, 6162693, 6162693, 6162693, 6162693, 6162693,
+ 6162693, 6162693, 8742425, 4299553, 14188526, 3439642, 9888973, 429955,
+ 143318, 27230505, 2436413, 22357678, 10892202, 40989076, 1576502, 1433184,
+ 286636, 37406115, 573273, 286636, 19204672, 13901889, 19491309, 10462246,
+ 16481621, 1003229, 21211130, 13615252, 17914806, 2866368, 4012916, 429955,
+ 1719821, 1146547, 143318, 286636, 143318, 286636, 143318, 1576502,
+ 143318, 315013952, 32246651, 165962764, 171122228, 418203235, 23790862, 26800550,
+ 17054895, 230742703, 7452559, 1003229, 163669669, 75385504, 225153284, 261412851,
+ 88140846, 24794091, 215837585, 223003507, 162523121, 110355206, 38122707, 5302782,
+ 7595877, 18918035, 8885743, 859910, 100, 859910, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100,
+};
+#define ordern1_total_cump 3651941255u
+
+// Ruotines used by ppmc. Not including the range coder.
+//
+// They are for initializing of both encoder and decoder, and unless there
+// are two version, both encoder and decoder use the same routines. Like
+// "ppmc_initialize_contexts".
+
+
+// This one allocs the memory needed by ppmc, and adjust some pointers used
+// for allocating elements in the linked lists. The mempool arrays must be
+// initialized now.
+void ppmc_alloc_memory(void)
+{
+ unsigned long counter;
+
+
+ // Init mempool buffers
+
+ for(counter=0;counter!=_mempool_max_index;++counter)
+ {
+ _bytes_pool_array[counter]=0;
+ _context_pool_array[counter]=0;
+ }
+
+ _bytes_pool_index=1; //first entry will be used now
+ _context_pool_index=1;
+
+
+ // Allocate memory for ppmc structures and adjust some variables
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+
+ //save pointers in the array for freeing
+ _bytes_pool_array[0]=_bytes_pool;
+ _context_pool_array[0]=_context_pool;
+
+
+ //adjust variables
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements;
+ _context_pool_max=_context_pool+_context_pool_elements;
+
+ ppmc_out_of_memory=0; //we still have memory
+}
+
+
+// This routine initializes all the contexts, and all the tables including
+// those who care about the number of bytes defined in a context.
+void ppmc_initialize_contexts(void)
+{
+ unsigned long counter, counter2;
+
+
+ // Order-0
+ for(counter=0;counter!=256;++counter) //clear table
+ order0_array[counter]=0;
+
+ order0_defined_bytes=0; //adjust variables
+ order0_max_cump=0;
+
+
+ // Order-1
+ for(counter=0;counter!=256;++counter) //erase every table of every context
+ for(counter2=0;counter2!=256;++counter2)
+ order1_array[counter][counter2]=0;
+
+ for(counter=0;counter!=256;++counter) //adjust variables
+ {
+ order1_defined_bytes_array[counter]=0;
+ order1_max_cump_array[counter]=0;
+ }
+
+
+ // Order-2
+ for(counter=0;counter!=65536;++counter)
+ {
+ //order2_array[counter].prob=0; //clear pointer to bytes and frequencies
+ //order2_array[counter].max_cump=0;
+ order2_array[counter].defined_bytes=0;
+ }
+
+
+ // Order-4-3
+ for(counter=0;counter!=65536;++counter) //order-4-3
+ {
+ order4_hasht[counter]=0;
+ order3_hasht[counter]=0;
+ }
+}
+
+
+// This routine initializes the encode model by outputting as many bytes as
+// needed to prepare the models. This should be called before the main loop
+// and after the memory has been allocated and tables initialized.
+//
+// It does not need uses the range coder. It output the first 1 bytes.
+void ppmc_encoder_initialize(void)
+{
+
+ // Initialize order-0 and prepare different bytes for orders
+ fputc((byte=fgetc(file_input)),file_output);
+ o4_byte=byte; //order-4
+
+ fputc((byte=fgetc(file_input)),file_output);
+ o3_byte=byte; //order-3
+
+ fputc((byte=fgetc(file_input)),file_output);
+ o2_byte=byte; //order-2
+ ppmc_update_order0();
+
+ fputc((byte=fgetc(file_input)),file_output);
+ o1_byte=byte;
+
+}
+
+
+// This routine initializes the decoder model, should be called to do the same
+// changes as "ppmc_encoder_initialize()" did.
+void ppmc_decoder_initialize(void)
+{
+
+ // Initialize order-0 and context bytes
+ byte=fgetc(file_input);
+ o4_byte=byte; //order-4
+ fputc(byte,file_output);
+
+ byte=fgetc(file_input);
+ o3_byte=byte; //order-3
+ fputc(byte,file_output);
+
+ byte=fgetc(file_input);
+ o2_byte=byte; //order-2
+
+ fputc(byte,file_output); //output first byte
+ ppmc_update_order0();
+
+ byte=fgetc(file_input);
+ o1_byte=byte; //order-1
+ fputc(byte,file_output);
+}
+
+
+// Once coding or decoding is finished you have to call this routine.
+// It must be called when done.
+void ppmc_free_memory(void)
+{
+ unsigned long counter;
+
+ // Free the memory buffers
+
+ for(counter=0;counter!=_mempool_max_index;++counter)
+ {
+ if(_bytes_pool_array[counter]!=0)
+ free(_bytes_pool_array[counter]);
+
+ if(_context_pool_array[counter]!=0)
+ free(_context_pool_array[counter]);
+ }
+
+}
+
+
+// This routine flushes the memory and restarts all the tables of
+// probabilities, current order bytes are not modified, this function
+// is called when we ran out of memory. We have to output the code
+// number 256 which means memory flushing, for doing this we have to go
+// to order-(-1) so we have to output an escape code in all the orders
+// till we reach order-(-1) where we can code it. Then we free all the
+// memory, alloc it again, and reinitialize all the orders.
+//
+// However we may find the case when the current order is not initialized,
+// in this case we don't need to output an escape code.
+void ppmc_flush_mem_enc(void)
+{
+
+ // Code an escape code in order-4
+ if(ppmc_get_totf_order4()!=0) //if 0 no need of escape code
+ {
+
+ ppmc_get_escape_prob_order4(); //get prob and cump
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ }
+
+
+ // Code an escape code in order-3
+ if(ppmc_get_totf_order3()!=0) //if 0 no need of escape code
+ {
+
+ ppmc_get_escape_prob_order3(); //get prob and cump
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ }
+
+
+ // Code an escape code in order-2
+
+ o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte);
+
+ // First check if current order-2 context is empty
+ if(order2_array[o2_cntxt].defined_bytes!=0) //it's not empty
+ {
+ ppmc_get_totf_order2();
+ ppmc_get_escape_prob_order2();
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+ }
+
+
+ // Code an escape code in order-1
+
+ // First check if current order-1 table is empty
+ if(order1_defined_bytes_array[o1_byte]!=0) //it's not empty
+ {
+ ppmc_get_totf_order1();
+ ppmc_get_escape_prob_order1();
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+ }
+
+
+ // Code an escape code in order-0. Order-0 always has at least one symbol
+
+ ppmc_get_totf_order0();
+ ppmc_get_escape_prob_order0();
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+
+
+ // Now we can code the code 256
+
+ symb_prob=1;
+ symb_cump=256;
+ total_cump=257;
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+
+ // Now that decoder knows the flushing, free memory and reinit
+
+ ppmc_free_memory();
+ ppmc_alloc_memory();
+ ppmc_initialize_contexts();
+
+
+ // Be sure that order-0 has at least one probability
+
+ order0_array[o1_byte]++;
+ order0_max_cump++;
+ order0_defined_bytes++;
+
+}
+
+
+// When the decoder gets the symbol of flushing, most of the job is done
+// because we already got all the escape codes, so we only have to reinit.
+void ppmc_flush_mem_dec(void)
+{
+
+ // Free memory and reinit
+
+ ppmc_free_memory();
+ ppmc_alloc_memory();
+ ppmc_initialize_contexts();
+
+
+ // Be sure that order-0 has at least one probability
+
+ order0_array[o1_byte]++;
+ order0_max_cump++;
+ order0_defined_bytes++;
+
+
+}
+
+
+
+// ORDER-(-1) functions, also called ordern1 (Negative1) in functions
+//
+// Because order-(-1) does not need to update its probability tables, it
+// has no tables, and relies on the fact that the cump of byte is its own
+// value, and the probability is fixed, 1, and the total cump is 257.
+//
+// The alphabet has the following distribution: 0-255 the bytes. 256 is
+// an special symbol which means that we have flushed the encoder tables,
+// and thus the encoder must flush its tables too.
+//
+// The rest of the tables only have 256 symbols, because we have no need
+// of assign a symbol to the flush code (which already is the order-(-1)
+// table) nor to the escape code.
+
+
+// Gets the probability for a given symbol in the order-(-1) (ordern1)
+void ppmc_get_prob_ordern1(void)
+{
+ int i;
+ unsigned acum = 0;
+ for (i = 0; i < 256 || i == byte; ++i)
+ acum += ordern1_table[i];
+ symb_cump=acum;
+ symb_prob=ordern1_table[i];
+ total_cump=ordern1_total_cump;
+}
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-(-1)
+void ppmc_get_totf_ordern1(void)
+{
+ total_cump=ordern1_total_cump;
+}
+
+
+// Returns the symbol for a given cump under order-(-1)
+unsigned long ppmc_get_symbol_ordern1 (void)
+{
+ return symb_cump;
+}
+
+
+
+// ORDER-0 functions
+//
+// Due to the fact that order-0 has no context, I use an array for all the
+// probabilities under order-0, just as you could do in a basic model for
+// arithmetic coding.
+//
+// The main array is: order0_array. Where order0_array[byte] contains the
+// probability for a given byte. The same applies to order-1.
+//
+// To ensure that the updating and coding is done correctly, "byte" can't
+// be changed till all the coding and updating is done.
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-0
+void ppmc_get_totf_order0(void)
+{
+ // Total cump is current total cump plus the escape for the escape code
+ total_cump=order0_defined_bytes+order0_max_cump;
+}
+
+
+// Codes a byte under order-0 and returns 1, otherwise it returns a 0 and
+// has coded an escape code. In this case further coding is needed.
+//
+// Returns: 1 in case a byte was coded. 0 in case of escape code.
+char ppmc_code_byte_order0(void)
+{
+ unsigned long counter;
+
+ ppmc_get_totf_order0(); //get total cump
+
+ // See if the byte is present
+ if(order0_array[byte]==0) //a probability of 0
+ {
+
+ // Because it was not present, output an escape code, prepare variables
+
+ symb_cump=order0_max_cump; //obviously its cump is current max_cump
+ //without escape code's space
+
+ symb_prob=order0_defined_bytes; //the number of defined bytes
+
+ // Code the escape code
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 0; //byte not coded
+ }
+ else
+ {
+
+ coded_in_order=0;
+
+ // The symbol is present, code it under order-0
+
+ symb_prob=order0_array[byte]; //get probability directly
+
+ // Make cump for current symbol
+
+ symb_cump=0; //for first symbol is 0
+ for(counter=0; counter!=byte ; ++counter)
+ symb_cump+=order0_array[counter]; //sum probabilities before our symbol
+
+ // Code the symbol
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 1; //symbol coded under order-0
+ }
+}
+
+
+// This functions update order-0 probabilities with current byte, also takes
+// care about updating variables, and renormalizing.
+void ppmc_update_order0(void)
+{
+ if(order0_array[byte]==0)
+ {
+ // It had a zero probability
+ order0_array[byte]++; //increment symbol probability
+ ++order0_defined_bytes; //new byte defined
+ ++order0_max_cump; //total cump
+ return;
+ }
+ else
+ {
+ // It had a non-zero probability
+
+ // Increment its probability
+ order0_array[byte]++; //increment symbol probability
+ ++order0_max_cump; //total cump
+
+ // Check to see if its the maximum in this case renormalize
+ if(order0_array[byte]==255)
+ ppmc_renormalize_order0();
+
+ return;
+ }
+}
+
+
+// This functions renormalizes the probabilities at order-0 updating variables
+void ppmc_renormalize_order0(void)
+{
+ unsigned long counter;
+
+ // Initialize variables
+ order0_defined_bytes=0; //clear them
+ order0_max_cump=0;
+
+ // Loop over all probabilities, divide them by a factor of 2 and update variables
+ for(counter=0 ; counter!=256 ; ++counter)
+ {
+ order0_array[counter]>>=1; //divide by a factor of 2
+
+ if(order0_array[counter]!=0) //see if it has a non zero probability
+ order0_defined_bytes++;
+
+ order0_max_cump+=order0_array[counter]; //sum to the total cump
+ }
+}
+
+
+// Decodes the symbol correspoding to the current order, it returns the byte
+// or in case of a escape code it returns -1
+void ppmc_decode_order0(void)
+{
+ unsigned long current_cump, counter;
+
+
+ // Get the total cump needed for decoding symbol
+ ppmc_get_totf_order0(); //total cump needed for decoding
+ symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it
+
+ // Now check if it's an escape code
+ if(symb_cump>=order0_max_cump) //the defined code space for the escape code
+ {
+
+ // Update coding values
+
+ ppmc_get_escape_prob_order0();
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Mark as escape code
+
+ byte=-1;
+
+ return; //an escape code
+ }
+ else
+ {
+ // Now we have to check what symbol it is
+
+ current_cump=0; //cump of the current symbol
+
+ for(counter=0 ; counter!= 256 ; ++counter)
+ {
+ if(symb_cump<current_cump)
+ break; //symbol found, break search loop
+ current_cump+=order0_array[counter]; //update cump
+ }
+
+ counter--; //-1 (see ac_ppmc.html, searching for a given symbol)
+
+ byte=counter; //update the variable with the found symbol
+
+
+ // Get the cump and prob of byte
+
+ symb_prob=order0_array[byte]; //the probabilty
+ symb_cump=current_cump-order0_array[byte]; //using the cump which was
+ //already made
+
+ // Update coding state
+
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ coded_in_order=0; //decoded under order-0
+
+ return;
+ }
+
+}
+
+
+// This function returns the probability for the escape codes in the variables
+void ppmc_get_escape_prob_order0(void)
+{
+ // To understand that remember that the escape allocated for the escape code
+ // is above the current maximum cump and that it has a probability determined
+ // by the scheme C.
+ symb_prob=order0_defined_bytes;
+ symb_cump=order0_max_cump;
+ total_cump=order0_defined_bytes+order0_max_cump;
+}
+
+
+
+// ORDER-1 functions
+//
+// Order-1 uses 256 arrays one for every context, obviously they are accessed
+// like that: order1_array[o1_byte][byte]
+//
+// Also for computing the value of the current escape code in a given context
+// we need to know how many bytes are defined in a given context, they are
+// stored in an array, and you use them like that:
+// order1_defined_bytes_array[o1_byte]
+//
+// The same goes for the maximum cump of a given context:
+// order1_max_cump_array[o1_byte]
+//
+// Read the order-0 explanation.
+//
+// To ensure that the updating and coding is done correctly, "byte" and
+// "o1_byte" can't be changed till all the coding and updating is done.
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-1. o1_byte should contain the order-1
+void ppmc_get_totf_order1(void)
+{
+ // Total cump is current total cump plus the escape for the escape code
+ total_cump=order1_defined_bytes_array[o1_byte]+order1_max_cump_array[o1_byte];
+}
+
+
+// Codes a byte under order-1 and returns 1.
+// Otherwise it returns a 0 it may be that it has coded an escape code, or
+// that current table was empty, in this case nothing is outputted
+// In both cases further coding is needed.
+//
+// Returns: 1 in case a byte was coded. 0 in case of escape code or empty table.
+// In case the byte is coded under this context, coded_in_order=1.
+char ppmc_code_byte_order1(void)
+{
+ unsigned long counter;
+
+
+ // First check if current order-1 table is empty
+ if(order1_defined_bytes_array[o1_byte]==0) //it's empty
+ {
+ return 0; //byte not coded, nothing done
+ }
+
+
+ // Now try to code this byte under order-1
+
+ ppmc_get_totf_order1(); //get total cump
+
+ // See if the byte is present
+ if(order1_array[o1_byte][byte]==0) //a probability of 0
+ {
+
+ // Because it was not present, output an escape code, prepare variables
+
+ symb_cump=order1_max_cump_array[o1_byte];//obviously its cump is current max_cump
+ //without escape code's space
+
+ symb_prob=order1_defined_bytes_array[o1_byte];//the number of defined bytes
+
+ // Code the escape code
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 0; //byte not coded, escape coded
+ }
+ else
+ {
+
+ coded_in_order=1; //because we coded it under order-1
+
+ // The symbol is present, code it under order-1
+
+ symb_prob=order1_array[o1_byte][byte]; //get probability directly
+
+ // Make cump for current symbol
+
+ symb_cump=0; //for first symbol is 0
+ for(counter=0; counter!=byte ; ++counter)
+ symb_cump+=order1_array[o1_byte][counter]; //sum probabilities before our symbol
+
+ // Code the symbol
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 1; //symbol coded under order-1
+ }
+}
+
+
+// This functions update order-1 probabilities with current byte, also takes
+// care about updating variables, and renormalizing. o1_byte must be filled.
+void ppmc_update_order1(void)
+{
+ if(order1_array[o1_byte][byte]==0)
+ {
+ // It had a zero probability
+ order1_array[o1_byte][byte]++; //increment symbol probability
+ ++order1_defined_bytes_array[o1_byte]; //new byte defined
+ ++order1_max_cump_array[o1_byte]; //total cump
+ return;
+ }
+ else
+ {
+ // It had a non-zero probability
+
+ // Increment its probability
+ order1_array[o1_byte][byte]++; //increment symbol probability
+ ++order1_max_cump_array[o1_byte]; //total cump
+
+ // Check to see if its the maximum in this case renormalize
+ if(order1_array[o1_byte][byte]==255)
+ ppmc_renormalize_order1();
+
+ return;
+ }
+}
+
+
+// This functions renormalizes the probabilities at order-1 updating variables
+void ppmc_renormalize_order1(void)
+{
+ unsigned long counter;
+
+ // Initialize variables
+ order1_defined_bytes_array[o1_byte]=0; //clear them
+ order1_max_cump_array[o1_byte]=0;
+
+ // Loop over all probabilities, divide them by a factor of 2 and update variables
+ for(counter=0 ; counter!=256 ; ++counter)
+ {
+ order1_array[o1_byte][counter]>>=1; //divide by a factor of 2
+
+ if(order1_array[o1_byte][counter]!=0) //see if it has a non zero probability
+ order1_defined_bytes_array[o1_byte]++;
+
+ order1_max_cump_array[o1_byte]+=order1_array[o1_byte][counter]; //sum to the total cump
+ }
+}
+
+
+// Decodes the symbol correspoding to the current order, it returns the byte
+// or in case of an escape code or empty table it returns -1.
+// It updates "coded_in_order".
+void ppmc_decode_order1(void)
+{
+ unsigned long current_cump, counter;
+
+
+ // First check if current order-1 table is empty
+ if(order1_defined_bytes_array[o1_byte]==0) //it's empty
+ {
+ byte=-1; //byte not coded, nothing done
+ return;
+ }
+
+
+ // Get the total cump needed for decoding symbol
+ ppmc_get_totf_order1(); //total cump needed for decoding
+ symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it
+
+ // Now check if it's an escape code
+ if(symb_cump>=order1_max_cump_array[o1_byte]) //the defined code space for the escape code
+ {
+
+ // Update coding values
+
+ ppmc_get_escape_prob_order1();
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Mark as escape code
+
+ byte=-1;
+
+ return; //an escape code
+ }
+ else
+ {
+ // Now we have to check what symbol it is
+
+ current_cump=0; //cump of the current symbol
+
+ for(counter=0 ; counter!= 256 ; ++counter)
+ {
+ if(symb_cump<current_cump)
+ break; //symbol found, break search loop
+ current_cump+=order1_array[o1_byte][counter]; //update cump
+ }
+
+ counter--; //-1 (see ac_ppmc.html, searching for a given symbol)
+
+ byte=counter; //update the variable with the found symbol
+
+
+ // Get the cump and prob of byte
+
+ symb_prob=order1_array[o1_byte][byte]; //the probabilty
+ symb_cump=current_cump-order1_array[o1_byte][byte]; //using the cump which was
+ //already made
+
+ // Update coding state
+
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+
+ // Update coded_in_order used for update exclusion
+
+ coded_in_order=1;
+
+ return;
+ }
+
+}
+
+
+// This function returns the probability for the escape codes in the variables
+void ppmc_get_escape_prob_order1(void)
+{
+ // To understand that remember that the escape allocated for the escape code
+ // is above the current maximum cump and that it has a probability determined
+ // by the scheme C.
+ symb_prob=order1_defined_bytes_array[o1_byte];
+ symb_cump=order1_max_cump_array[o1_byte];
+ total_cump=order1_defined_bytes_array[o1_byte]+order1_max_cump_array[o1_byte];
+}
+
+
+
+// ORDER-2 functions
+//
+// Order-2 uses a table which holds the contexts data structure of every
+// structure, its accessed with "ppmc_order2_hash_key(o1_byte, o2_byte)"
+// and because it uses direct addressing there's no need to check for hash
+// collisions, which makes it faster. The variable "o2_cntxt" contains o1_byte
+// and o2_byte used directly to index the order-2 array.
+//
+// Though order-2 uses the same routines as lower orders they are rather
+// different, because we can't allocate memory for all the probabilities
+// tables, we have to use linked lists for the bytes and probabilities.
+//
+// Under order-2 we don't let a probability to be lower than 1, that is,
+// there can't be a symbol in a linked list with a 0 probability, I choosed
+// this for three factors: the code is easier to read (it isn't messy)
+// we gain some speed, and the loss of compression seems little.
+//
+// To ensure that the updating is done correctly, "o2_cntxt" and
+// "o2_ll_node" must not be modified by any routine except those who manage
+// order-2.
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-2. cntxt must have o1_byte and o2_byte hashed with the hash key
+// for order-2.
+void ppmc_get_totf_order2(void)
+{
+ // Total cump is current total cump plus the escape for the escape code
+ total_cump=order2_array[o2_cntxt].defined_bytes+order2_array[o2_cntxt].max_cump;
+}
+
+
+// Codes a byte under order-2 and returns 1.
+// Otherwise it returns a 0. It may be that it has coded an escape code, or
+// that current table was empty.
+//
+// Returns: 1 in case a byte was coded. 0 in case of escape code or empty table.
+// In case the byte is coded under this context, coded_in_order=2.
+char ppmc_code_byte_order2(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+
+ // Initialize o2_cntxt
+
+ o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte);
+
+
+ // First check if current order-2 context is empty
+ if(order2_array[o2_cntxt].defined_bytes==0) //it's empty
+ {
+ return 0; //byte not coded, nothing done
+ }
+
+
+ // Now try to code this byte under order-2
+
+ ppmc_get_totf_order2(); //get total cump
+
+
+ // See if the byte is present and compute its cump at the same time
+
+ node=order2_array[o2_cntxt].prob; //pointer to first element in the linked list
+
+ symb_cump=0; //the first symbol always has a 0 cump
+
+
+ // Now search the byte in the linked list
+
+ do{
+ if(node->byte==byte)
+ goto ppmc_o2_byte_found; //bad thing, I know, anyone has a better idea?
+ symb_cump+=node->freq; //add the probability of this byte to the cump
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+
+ // If we reach that point, the byte was not found in the linked list
+ // so we don't need the cump, we have to output an escape code, whose
+ // probabilities are know using the context structure in the table.
+
+ // Byte was not present in the linked list, current node is the last one,
+ // and that's the node needed for creating a new node, save it.
+
+ o2_ll_node=node;
+
+ // Now get the probability and cump of the escape code
+
+ symb_cump=order2_array[o2_cntxt].max_cump;
+ symb_prob=order2_array[o2_cntxt].defined_bytes;
+
+ // Code the escape code
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 0;
+
+
+ // That code is executed when the byte is found in the linked list
+
+ppmc_o2_byte_found:
+
+
+ // Everything has been tested, now we can feel free to code the byte,
+ // the symb_cump is already computed, now get its probability and code
+ // the byte, also save pointer to this element in the linked lists for
+ // updating.
+
+ coded_in_order=2; //successfully coded under order-2
+
+ o2_ll_node=node; //save it for updating
+
+ symb_prob=node->freq; //get the probability of the byte
+
+ // Code it.
+
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 1; //byte coded under order-2
+}
+
+
+// This functions update order-2 probabilities with current byte, also takes
+// care about updating variables, and renormalizing.
+// Of course "o2_ll_node" must point to the element to update or the last one,
+// based on the "coded_in_order" and checking the pointer of the element we
+// can decide what to do.
+//
+// This updating is only for encoding.
+void ppmc_update_order2(void)
+{
+
+ // First of all check if that's the first byte in this context, in that case
+ // we have to initialize some variables in the context structure.
+
+ if(order2_array[o2_cntxt].defined_bytes==0) //no byte defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+ order2_array[o2_cntxt].defined_bytes=1;
+ order2_array[o2_cntxt].max_cump=1;
+ order2_array[o2_cntxt].prob=_bytes_pool;
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //nothing else to do
+ }
+
+
+ // The byte was coded under order two, otherwise it was coded under a
+ // lower order (never higher ones, remember that we are using update
+ // exclusion) in this case we have to create a new node in the list.
+
+ if(coded_in_order==2) //coded under order-2
+ {
+
+ // Update its count and variables of this context and check for renormalization
+
+ o2_ll_node->freq++; //increment its frequency (rather probability)
+
+ order2_array[o2_cntxt].max_cump++; //total cump
+
+ if(o2_ll_node->freq==255) //do we have to renormalize?
+ ppmc_renormalize_order2(); //renormalize
+
+ }
+ else
+ {
+
+ // Once every paranoid check has been done we are sure that this byte
+ // did not existed and so we have to create a new node in the linked
+ // list. Also we have to take care of memory issues.
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ o2_ll_node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ order2_array[o2_cntxt].max_cump++; //total cump
+ order2_array[o2_cntxt].defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ }
+
+}
+
+
+// This functions renormalizes the probabilities at order-2 updating context
+// variables.
+void ppmc_renormalize_order2(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+ // Initialize variables. Defined bytes remain the same.
+ order2_array[o2_cntxt].max_cump=0; //clear them
+
+ node=order2_array[o2_cntxt].prob; //get pointer to lined lists
+
+ // Divide all the probabilities by 2 and update context variables
+ while(1)
+ {
+ node->freq>>=1; //divide by a factor of 2
+
+ if(node->freq==0) //don't allow a probability to be 0
+ node->freq=1;
+
+ order2_array[o2_cntxt].max_cump+=node->freq; //sum to the total cump
+
+ if(node->next==0) //last element
+ break;
+ node=node->next;
+ }
+
+
+ //printf("\nRenormalization, context:%c%c",o2_byte,o1_byte);
+
+}
+
+
+// Decodes the symbol correspoding to the current order, it returns the byte
+// or in case of an escape code or empty table it returns -1.
+// It updates "coded_in_order".
+//
+// If we decode an escape code, we don't modify "o2_ll_node" and thus its
+// work of the updating routine to read till the end of the linked list
+// (for adding a new element) however if we decode a byte, we save on
+// "o2_ll_node" the pointer to its node. (so updating is faster)
+void ppmc_decode_order2(void)
+{
+ unsigned long current_cump, counter;
+ struct _byte_and_freq *node;
+
+ // Initialize o2_cntxt
+
+ o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte);
+
+
+ // First check if current order-2 context is empty
+ if(order2_array[o2_cntxt].defined_bytes==0) //it's empty
+ {
+ byte=-1; //byte not coded, nothing done
+ return;
+ }
+
+
+ // Get the total cump needed for decoding symbol
+ ppmc_get_totf_order2(); //total cump needed for decoding
+ symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it
+
+ // Now check if it's an escape code
+ if(symb_cump>=order2_array[o2_cntxt].max_cump) //the defined code space for the escape code
+ {
+
+ // Update coding values
+
+ ppmc_get_escape_prob_order2();
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Mark as escape code
+
+ byte=-1;
+
+ return; //an escape code
+ }
+ else
+ {
+ // Now we have to check what symbol it is
+
+ current_cump=0; //cump of the current symbol
+
+ node=order2_array[o2_cntxt].prob; //get pointer to linked lists
+
+ while(1)
+ {
+ current_cump+=node->freq; //update cump
+ if(symb_cump<current_cump)
+ break; //symbol found, break search loop
+
+ node=node->next; //next element
+ //we have no need to check for the last symbol, we'll never read further
+ //the end of the linked lists, before we'll found the last byte.
+ }
+
+
+ //read byte value and probability
+
+ symb_prob=node->freq; //get the probability for updating the state
+ byte=node->byte; //get byte
+ o2_ll_node=node; //used for updating
+
+
+ // Get the cump of byte
+
+ symb_cump=current_cump-symb_prob;
+
+ // Update coding state
+
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Update coded_in_order used for update exclusion
+
+ coded_in_order=2;
+
+ return;
+ }
+
+}
+
+
+// This is the routine for updating while decoding. We have to search the byte
+// in the linked list, if it's present, update its count, otherwise we have
+// hitted the end of the linked list, and there we have to create a new node.
+//
+// Of course if the byte was matched in order-2 we'll have a pointer to it
+// in "o2_ll_node" so we don't need to read the linked list. (we already did
+// in decoding)
+//
+// Another case which we also have to specially deal with, this is the case
+// when the context has not been initalized yet.
+void ppmc_update_dec_order2(void)
+{
+ struct _byte_and_freq *node;
+
+
+ // Handle the case when the context is not initialized
+ // This code is the same as the one for the encoding.
+
+ if(order2_array[o2_cntxt].defined_bytes==0) //no byte defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+ order2_array[o2_cntxt].defined_bytes=1;
+ order2_array[o2_cntxt].max_cump=1;
+ order2_array[o2_cntxt].prob=_bytes_pool;
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+
+ return; //nothing else to do
+ }
+
+
+ // Current context is initalized, proceed
+
+ if(coded_in_order==2) //check if it was decoded under order-2
+ {
+
+ // We can be sure that the pointer "o2_ll_node" points to its entry, and
+ // it has a non 0 probability (otherwise it couldn't be coded) so just
+ // update its probability and max_cump
+
+ o2_ll_node->freq++; //the probability of the byte
+ order2_array[o2_cntxt].max_cump++; //the max_cump
+
+ if(o2_ll_node->freq==255) //check for renormalization
+ ppmc_renormalize_order2();
+
+ }
+ else
+ {
+
+ // An escape code was decoded under order-2, we have to read till the
+ // end of the linked list so we can add a new node for this new byte.
+
+ node=order2_array[o2_cntxt].prob; //get pointer to linked list
+
+ while(1)
+ {
+ if(node->next==0) //check for the end of the linked list
+ break;
+ node=node->next; //next node
+ }
+
+
+ // We reached the end of the linked list, add a new node if possible,
+ // we are using the same code of "ppmc_update_order2()" with the
+ // difference that the pointer to the linked list is "node"
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ order2_array[o2_cntxt].max_cump++; //total cump
+ order2_array[o2_cntxt].defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //we are finished updating
+
+ }
+
+}
+
+
+// This function returns the probability for the escape codes in the variables
+void ppmc_get_escape_prob_order2(void)
+{
+ // To understand that remember that the escape allocated for the escape code
+ // is above the current maximum cump and that it has a probability determined
+ // by the scheme C.
+
+ symb_prob=order2_array[o2_cntxt].defined_bytes;
+ symb_cump=order2_array[o2_cntxt].max_cump;
+}
+
+
+
+// ORDER-3 functions
+//
+// The difference between order-3 and order-3 are just a few, instead of
+// keeping a table with the context structures, we keep a hash table with
+// pointers to linked lists with the context, so it's only a matter of
+// searching current context in the linked list corresponding to its hash
+// entry. This is done in "ppmc_get_totf_order3" because that's the first
+// routine that both encoding and decoding routines call.
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-3. Must be called while encoding or decoding before anything else
+// because it initializes the pointers to the context structure in
+// "o3_context" and o3_cntxt.
+//
+// If the hash entry is not initialized it returns "o3_context"=0
+// If the context is not present in the linked list of context, "o3_context"
+// will point to the last element in the linked list.
+// If the context is present "o3_context" will point to the context to use.
+// One can distinguish the last two by checking the context value of the
+// structure, if it's not the same, is the last element.
+//
+// The routine returns 0 if the hash entry is not initialized or if the
+// the context was not present. Otherwise it returns 1, meaning that we
+// have to code under this context.
+char ppmc_get_totf_order3(void)
+{
+ struct context *cntxt_node;
+
+
+ // First make the hash key for order-3
+
+ o3_cntxt=ppmc_order3_hash_key(o1_byte,o2_byte,o3_byte);
+ full_o3_cntxt=(o1_byte)+(o2_byte<<8)+(o3_byte<<16); //order-3
+
+
+ // Now check the hash entry in the table
+
+ if(order3_hasht[o3_cntxt]==0) //if 0, not initialized
+ {
+
+ o3_context=0; //no hash entry
+
+ return 0; //hash entry not initialized
+ }
+
+
+ // Now read trough the linked list of context searching current one
+
+ cntxt_node=order3_hasht[o3_cntxt];
+
+ while(1)
+ {
+
+ if(cntxt_node->order4321==full_o3_cntxt) //compare context
+ goto ppmc_gtf_cntxt_found;
+
+ if(cntxt_node->next==0) //end of context's linked list
+ break;
+
+ cntxt_node=cntxt_node->next; //next element
+
+ }
+
+
+ // Once there the context was not found
+ o3_context=cntxt_node; //pointer to last element in the linked list
+
+ return 0; //it was not present
+
+
+ // The context is found, so return pointer and cump
+
+ppmc_gtf_cntxt_found:
+
+ o3_context=cntxt_node;
+
+ // Total cump is current total cump plus the escape for the escape code
+
+ total_cump=o3_context->defined_bytes+o3_context->max_cump;
+
+ return 1; //context found
+
+}
+
+
+// Codes a byte under order-3 and returns 1.
+// Otherwise it returns a 0.
+//
+// In case the byte is coded under this context, coded_in_order=3.
+char ppmc_code_byte_order3(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+
+ // Get current context (if present) and total cump.
+
+ if(ppmc_get_totf_order3()==0)
+ return 0;
+
+
+ // See if the byte is present and compute its cump at the same time
+
+ node=o3_context->prob; //pointer to first element in the linked list
+
+ symb_cump=0; //the first symbol always has a 0 cump
+
+
+ // Now search the byte in the linked list
+
+ do{
+ if(node->byte==byte)
+ goto ppmc_o3_byte_found; //bad thing, I know, anyone has a better idea?
+ symb_cump+=node->freq; //add the probability of this byte to the cump
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+
+ // If we reach that point, the byte was not found in the linked list
+ // so we don't need the cump, we have to output an escape code, whose
+ // probabilities are know using the context structure in the table.
+
+ // Byte was not present in the linked list, current node is the last one,
+ // and that's the node needed for creating a new node, save it.
+
+ o3_ll_node=node;
+
+ // Now get the probability and cump of the escape code
+
+ symb_cump=o3_context->max_cump;
+ symb_prob=o3_context->defined_bytes;
+
+ // Code the escape code
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 0;
+
+
+ // That code is executed when the byte is found in the linked list
+
+ppmc_o3_byte_found:
+
+
+ // Everything has been tested, now we can feel free to code the byte,
+ // the symb_cump is already computed, now get its probability and code
+ // the byte, also save pointer to this element in the linked lists for
+ // updating.
+
+ coded_in_order=3; //successfully coded under order-3
+
+ o3_ll_node=node; //save it for updating
+
+ symb_prob=node->freq; //get the probability of the byte
+
+ // Code it.
+
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 1; //byte coded under order-3
+}
+
+
+// This functions update order-3 probabilities with current byte, also takes
+// care about updating variables, and renormalizing.
+//
+// "o3_ll_node" must point to the element to update or the last one,
+// based on the "coded_in_order" and checking the pointer of the element we
+// can decide what to do. Also "o3_context" must be initialized.
+//
+// This updating is only for encoding.
+void ppmc_update_order3(void)
+{
+
+ // First thing first, check if the hash entry is initialized
+
+ if(order3_hasht[o3_cntxt]==0) //no pointer to linked list defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ order3_hasht[o3_cntxt]=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o3_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //nothing else to do
+ }
+
+
+
+
+ // The byte was coded under order three, otherwise it was coded under a
+ // lower order (never higher ones, remember that we are using update
+ // exclusion) in this case we have to create a new node in the list.
+
+ if(coded_in_order==3) //coded under order-3
+ {
+
+ // Update its count and variables of this context and check for renormalization
+
+ o3_ll_node->freq++; //increment its frequency (rather probability)
+
+ o3_context->max_cump++; //total cump
+
+ if(o3_ll_node->freq==255) //do we have to renormalize?
+ ppmc_renormalize_order3(); //renormalize
+
+ }
+ else
+ {
+
+ // Now we have two cases, under a given context (which we actually found)
+ // we coded an escape coded, in that case just create a new node in the
+ // linked list of bytes and probabilities. Otherwise we didn't find the
+ // same node so we have to create it in the linked list for context.
+ // And we can be sure that it at least has one element and that
+ // "o3_context" points to the last element, so we can put the new element.
+
+ if(o3_context->order4321==full_o3_cntxt) //chech if that's the last
+ { //element or the a context found
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ o3_ll_node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ o3_context->max_cump++; //total cump
+ o3_context->defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+ }
+ else
+ {
+
+ // We have to create a new context node
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ o3_context->next=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o3_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ }
+
+ }
+
+}
+
+
+// This functions renormalizes the probabilities at order-3 updating context
+// variables.
+void ppmc_renormalize_order3(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+
+ // Initialize variables. Defined bytes remain the same.
+ o3_context->max_cump=0; //clear them
+
+ node=o3_context->prob; //get pointer to lined lists
+
+ // Divide all the probabilities by 2 and update context variables
+ while(1)
+ {
+ node->freq>>=1; //divide by a factor of 2
+
+ if(node->freq==0) //don't allow a probability to be 0
+ node->freq=1;
+
+ o3_context->max_cump+=node->freq; //sum to the total cump
+
+ if(node->next==0) //last element
+ break;
+ node=node->next;
+ }
+
+
+}
+
+
+// Decodes the symbol correspoding to the current order, it returns the byte
+// or in case of an escape code or empty table it returns -1.
+// It updates "coded_in_order".
+//
+// If we decode an escape code, we don't modify "o3_ll_node" and thus its
+// work of the updating routine to read till the end of the linked list
+// (for adding a new element) however if we decode a byte, we save on
+// "o3_ll_node" the pointer to its node. (so updating is faster)
+void ppmc_decode_order3(void)
+{
+ unsigned long current_cump, counter;
+ struct _byte_and_freq *node;
+
+
+ // Get current context (if present) and total cump.
+
+ if(ppmc_get_totf_order3()==0)
+ {
+ byte=-1;
+ return;
+ }
+
+
+ // Decode current cump
+
+ symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it
+
+ // Now check if it's an escape code
+ if(symb_cump>=o3_context->max_cump) //the defined code space for the escape code
+ {
+
+ // Update coding values
+
+ ppmc_get_escape_prob_order3();
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Mark as escape code
+
+ byte=-1;
+
+ return; //an escape code
+ }
+ else
+ {
+ // Now we have to check what symbol it is
+
+ current_cump=0; //cump of the current symbol
+
+ node=o3_context->prob; //get pointer to linked lists
+
+ while(1)
+ {
+ current_cump+=node->freq; //update cump
+ if(symb_cump<current_cump)
+ break; //symbol found, break search loop
+
+ node=node->next; //next element
+ //we have no need to check for the last symbol, we'll never read further
+ //the end of the linked lists, before we'll found the last byte.
+ }
+
+
+ //read byte value and probability
+
+ symb_prob=node->freq; //get the probability for updating the state
+ byte=node->byte; //get byte
+ o3_ll_node=node; //used for updating
+
+
+ // Get the cump of byte
+
+ symb_cump=current_cump-symb_prob;
+
+ // Update coding state
+
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Update coded_in_order used for update exclusion
+
+ coded_in_order=3;
+
+ return;
+ }
+
+}
+
+
+// This is the routine for updating while decoding. The only difference with
+// the routine for coding is that when an escape code was coded, "o3_ll_node"
+// is not initialized so we have to read till the end of the linked list.
+// Fortunately "o3_context" will be initialized so we don't need to read its
+// linked list.
+void ppmc_update_dec_order3(void)
+{
+ struct _byte_and_freq *node;
+
+ // First thing first, check if the hash entry is initialized
+
+ if(order3_hasht[o3_cntxt]==0) //no pointer to linked list defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ order3_hasht[o3_cntxt]=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o3_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //nothing else to do
+ }
+
+
+ // The byte was coded under order three, otherwise it was coded under a
+ // lower order (never higher ones, remember that we are using update
+ // exclusion) in this case we have to create a new node in the list.
+
+ if(coded_in_order==3) //coded under order-3
+ {
+
+ // Update its count and variables of this context and check for renormalization
+
+ o3_ll_node->freq++; //increment its frequency (rather probability)
+
+ o3_context->max_cump++; //total cump
+
+ if(o3_ll_node->freq==255) //do we have to renormalize?
+ ppmc_renormalize_order3(); //renormalize
+
+ }
+ else
+ {
+
+ // Now we have two cases, under a given context (which we actually found)
+ // we coded an escape coded, in that case just create a new node in the
+ // linked list of bytes and probabilities. Otherwise we didn't find the
+ // same node so we have to create it in the linked list for context.
+ // And we can be sure that it at least has one element and that
+ // "o3_context" points to the last element, so we can put the new element.
+
+ if(o3_context->order4321==full_o3_cntxt) //chech if that's the last
+ { //element or the a context found
+
+ // Read till the end of the linked list
+
+ node=o3_context->prob; //get pointer to linked list
+
+ while(1)
+ {
+ if(node->next==0) //check for the end of the linked list
+ break;
+ node=node->next; //next node
+ }
+
+ // Now add element
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ o3_context->max_cump++; //total cump
+ o3_context->defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+ }
+ else
+ {
+
+ // We have to create a new context node
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ o3_context->next=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o3_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ }
+
+ }
+
+}
+
+
+
+// This function returns the probability for the escape codes in the variables
+void ppmc_get_escape_prob_order3(void)
+{
+ // To understand that remember that the escape allocated for the escape code
+ // is above the current maximum cump and that it has a probability determined
+ // by the scheme C.
+
+ symb_prob=o3_context->defined_bytes;
+ symb_cump=o3_context->max_cump;
+}
+
+
+
+// ORDER-4 functions
+//
+// The routines for order-4 are *equal* to those for order-3, there are a few
+// changes like different global variables, and different hash keys.
+//
+// If you want to go to higher orders, you'd use the same code and data
+// structures, with the difference of the context bytes (order4321)
+// stored in every context's linked list.
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-4. Must be called while encoding or decoding before anything else
+// because it initializes the pointers to the context structure in
+// "o4_context" and o4_cntxt.
+//
+// If the hash entry is not initialized it returns "o4_context"=0
+// If the context is not present in the linked list of context, "o4_context"
+// will point to the last element in the linked list.
+// If the context is present "o4_context" will point to the context to use.
+// One can distinguish the last two by checking the context value of the
+// structure, if it's not the same, is the last element.
+//
+// The routine returns 0 if the hash entry is not initialized or if the
+// the context was not present. Otherwise it returns 1, meaning that we
+// have to code under this context.
+char ppmc_get_totf_order4(void)
+{
+ struct context *cntxt_node;
+
+
+ // First make the hash key for order-4
+
+ o4_cntxt=ppmc_order4_hash_key(o1_byte,o2_byte,o3_byte,o4_byte);
+ full_o4_cntxt=(o1_byte)+(o2_byte<<8)+(o3_byte<<16)+(o4_byte<<24); //order-4
+
+
+ // Now check the hash entry in the table
+
+ if(order4_hasht[o4_cntxt]==0) //if 0, not initialized
+ {
+
+ o4_context=0; //no hash entry
+
+ return 0; //hash entry not initialized
+ }
+
+
+ // Now read trough the linked list of context searching current one
+
+ cntxt_node=order4_hasht[o4_cntxt];
+
+ while(1)
+ {
+
+ if(cntxt_node->order4321==full_o4_cntxt) //compare context
+ goto ppmc_gtfo4_cntxt_found;
+
+ if(cntxt_node->next==0) //end of context's linked list
+ break;
+
+ cntxt_node=cntxt_node->next; //next element
+
+ }
+
+
+ // Once there the context was not found
+ o4_context=cntxt_node; //pointer to last element in the linked list
+
+ return 0; //it was not present
+
+
+ // The context is found, so return pointer and cump
+
+ppmc_gtfo4_cntxt_found:
+
+ o4_context=cntxt_node;
+
+ // Total cump is current total cump plus the escape for the escape code
+
+ total_cump=o4_context->defined_bytes+o4_context->max_cump;
+
+ return 1; //context found
+
+}
+
+
+// Codes a byte under order-4 and returns 1.
+// Otherwise it returns a 0.
+//
+// In case the byte is coded under this context, coded_in_order=4.
+char ppmc_code_byte_order4(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+
+ // Get current context (if present) and total cump.
+
+ if(ppmc_get_totf_order4()==0)
+ return 0;
+
+
+ // See if the byte is present and compute its cump at the same time
+
+ node=o4_context->prob; //pointer to first element in the linked list
+
+ symb_cump=0; //the first symbol always has a 0 cump
+
+
+ // Now search the byte in the linked list
+
+ do{
+ if(node->byte==byte)
+ goto ppmc_o4_byte_found; //bad thing, I know, anyone has a better idea?
+ symb_cump+=node->freq; //add the probability of this byte to the cump
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+
+ // If we reach that point, the byte was not found in the linked list
+ // so we don't need the cump, we have to output an escape code, whose
+ // probabilities are know using the context structure in the table.
+
+ // Byte was not present in the linked list, current node is the last one,
+ // and that's the node needed for creating a new node, save it.
+
+ o4_ll_node=node;
+
+ // Now get the probability and cump of the escape code
+
+ symb_cump=o4_context->max_cump;
+ symb_prob=o4_context->defined_bytes;
+
+ // Code the escape code
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 0;
+
+
+ // That code is executed when the byte is found in the linked list
+
+ppmc_o4_byte_found:
+
+
+ // Everything has been tested, now we can feel free to code the byte,
+ // the symb_cump is already computed, now get its probability and code
+ // the byte, also save pointer to this element in the linked lists for
+ // updating.
+
+ coded_in_order=4; //successfully coded under order-4
+
+ o4_ll_node=node; //save it for updating
+
+ symb_prob=node->freq; //get the probability of the byte
+
+ // Code it.
+
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 1; //byte coded under order-4
+}
+
+
+// This functions update order-4 probabilities with current byte, also takes
+// care about updating variables, and renormalizing.
+//
+// "o4_ll_node" must point to the element to update or the last one,
+// based on the "coded_in_order" and checking the pointer of the element we
+// can decide what to do. Also "o4_context" must be initialized.
+//
+// This updating is only for encoding.
+void ppmc_update_order4(void)
+{
+
+ // First thing first, check if the hash entry is initialized
+
+ if(order4_hasht[o4_cntxt]==0) //no pointer to linked list defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ order4_hasht[o4_cntxt]=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o4_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //nothing else to do
+ }
+
+
+
+
+ // The byte was coded under order three, otherwise it was coded under a
+ // lower order (never higher ones, remember that we are using update
+ // exclusion) in this case we have to create a new node in the list.
+
+ if(coded_in_order==4) //coded under order-4
+ {
+
+ // Update its count and variables of this context and check for renormalization
+
+ o4_ll_node->freq++; //increment its frequency (rather probability)
+
+ o4_context->max_cump++; //total cump
+
+ if(o4_ll_node->freq==255) //do we have to renormalize?
+ ppmc_renormalize_order4(); //renormalize
+
+ }
+ else
+ {
+
+ // Now we have two cases, under a given context (which we actually found)
+ // we coded an escape coded, in that case just create a new node in the
+ // linked list of bytes and probabilities. Otherwise we didn't find the
+ // same node so we have to create it in the linked list for context.
+ // And we can be sure that it at least has one element and that
+ // "o4_context" points to the last element, so we can put the new element.
+
+ if(o4_context->order4321==full_o4_cntxt) //chech if that's the last
+ { //element or the a context found
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ o4_ll_node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ o4_context->max_cump++; //total cump
+ o4_context->defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+ }
+ else
+ {
+
+ // We have to create a new context node
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ o4_context->next=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o4_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ }
+
+ }
+
+}
+
+
+// This functions renormalizes the probabilities at order-4 updating context
+// variables.
+void ppmc_renormalize_order4(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+
+ // Initialize variables. Defined bytes remain the same.
+ o4_context->max_cump=0; //clear them
+
+ node=o4_context->prob; //get pointer to lined lists
+
+ // Divide all the probabilities by 2 and update context variables
+ while(1)
+ {
+ node->freq>>=1; //divide by a factor of 2
+
+ if(node->freq==0) //don't allow a probability to be 0
+ node->freq=1;
+
+ o4_context->max_cump+=node->freq; //sum to the total cump
+
+ if(node->next==0) //last element
+ break;
+ node=node->next;
+ }
+
+
+}
+
+
+// Decodes the symbol correspoding to the current order, it returns the byte
+// or in case of an escape code or empty table it returns -1.
+// It updates "coded_in_order".
+//
+// If we decode an escape code, we don't modify "o4_ll_node" and thus its
+// work of the updating routine to read till the end of the linked list
+// (for adding a new element) however if we decode a byte, we save on
+// "o4_ll_node" the pointer to its node. (so updating is faster)
+void ppmc_decode_order4(void)
+{
+ unsigned long current_cump, counter;
+ struct _byte_and_freq *node;
+
+
+ // Get current context (if present) and total cump.
+
+ if(ppmc_get_totf_order4()==0)
+ {
+ byte=-1;
+ return;
+ }
+
+
+ // Decode current cump
+
+ symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it
+
+ // Now check if it's an escape code
+ if(symb_cump>=o4_context->max_cump) //the defined code space for the escape code
+ {
+
+ // Update coding values
+
+ ppmc_get_escape_prob_order4();
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Mark as escape code
+
+ byte=-1;
+
+ return; //an escape code
+ }
+ else
+ {
+ // Now we have to check what symbol it is
+
+ current_cump=0; //cump of the current symbol
+
+ node=o4_context->prob; //get pointer to linked lists
+
+ while(1)
+ {
+ current_cump+=node->freq; //update cump
+ if(symb_cump<current_cump)
+ break; //symbol found, break search loop
+
+ node=node->next; //next element
+ //we have no need to check for the last symbol, we'll never read further
+ //the end of the linked lists, before we'll found the last byte.
+ }
+
+
+ //read byte value and probability
+
+ symb_prob=node->freq; //get the probability for updating the state
+ byte=node->byte; //get byte
+ o4_ll_node=node; //used for updating
+
+
+ // Get the cump of byte
+
+ symb_cump=current_cump-symb_prob;
+
+ // Update coding state
+
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Update coded_in_order used for update exclusion
+
+ coded_in_order=4;
+
+ return;
+ }
+
+}
+
+
+// This is the routine for updating while decoding. The only difference with
+// the routine for coding is that when an escape code was coded, "o4_ll_node"
+// is not initialized so we have to read till the end of the linked list.
+// Fortunately "o4_context" will be initialized so we don't need to read its
+// linked list.
+void ppmc_update_dec_order4(void)
+{
+ struct _byte_and_freq *node;
+
+ // First thing first, check if the hash entry is initialized
+
+ if(order4_hasht[o4_cntxt]==0) //no pointer to linked list defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ order4_hasht[o4_cntxt]=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o4_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //nothing else to do
+ }
+
+
+ // The byte was coded under order four, otherwise it was coded under a
+ // lower order (never higher ones, remember that we are using update
+ // exclusion) in this case we have to create a new node in the list.
+
+ if(coded_in_order==4) //coded under order-4
+ {
+
+ // Update its count and variables of this context and check for renormalization
+
+ o4_ll_node->freq++; //increment its frequency (rather probability)
+
+ o4_context->max_cump++; //total cump
+
+ if(o4_ll_node->freq==255) //do we have to renormalize?
+ ppmc_renormalize_order4(); //renormalize
+
+ }
+ else
+ {
+
+ // Now we have two cases, under a given context (which we actually found)
+ // we coded an escape coded, in that case just create a new node in the
+ // linked list of bytes and probabilities. Otherwise we didn't find the
+ // same node so we have to create it in the linked list for context.
+ // And we can be sure that it at least has one element and that
+ // "o4_context" points to the last element, so we can put the new element.
+
+ if(o4_context->order4321==full_o4_cntxt) //chech if that's the last
+ { //element or the a context found
+
+ // Read till the end of the linked list
+
+ node=o4_context->prob; //get pointer to linked list
+
+ while(1)
+ {
+ if(node->next==0) //check for the end of the linked list
+ break;
+ node=node->next; //next node
+ }
+
+ // Now add element
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ o4_context->max_cump++; //total cump
+ o4_context->defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+ }
+ else
+ {
+
+ // We have to create a new context node
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ o4_context->next=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o4_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ }
+
+ }
+
+}
+
+
+
+// This function returns the probability for the escape codes in the variables
+void ppmc_get_escape_prob_order4(void)
+{
+ // To understand that remember that the escape allocated for the escape code
+ // is above the current maximum cump and that it has a probability determined
+ // by the scheme C.
+
+ symb_prob=o4_context->defined_bytes;
+ symb_cump=o4_context->max_cump;
+}
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "ppmc.h"
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+
+ Part of the ppmc encoder and decoder.
+
+ This module contains the definitions of different functions and all the
+ data structures defined by ppmc. Also contains defines.
+*/
+
+// Definitions
+
+#define ppmc_order4_hash_size 65536
+#define ppmc_order4_hash_key(k,j,i,l) ( (k)+(j<<8)+(i<<1)+(l<<9) )& ppmc_order4_hash_size-1
+#define ppmc_order3_hash_size 65536
+#define ppmc_order3_hash_key(k,j,i) ((k)+(j<<7)+(i<<11)) & ppmc_order3_hash_size-1
+#define ppmc_order2_hash_key(k,j) ((k)+(j<<8))
+#define _bytes_pool_elements 125000 //this is used the first time
+ //that we allocate memory, that's
+ //the number of entries
+#define _bytes_pool_elements_inc 125000 //if we need to alloc again, this
+ //is the number of entries to get
+#define _context_pool_elements 50000
+#define _context_pool_elements_inc 50000
+
+#define _mempool_max_index 1000 //the number of entries in the array with
+ //pointers
+
+
+// Data structures
+
+// This structure contains a single element of a linked lists which contains
+// the probability distribution of a given order. This structure takes 6 bytes.
+struct _byte_and_freq{
+unsigned char byte; //the byte itself
+unsigned char freq; //and the frequency of it
+struct _byte_and_freq *next; //pointer to next element in linked list or 0
+};
+
+
+// This structure is used for both order-3 and order-4. It takes 20 bytes,
+// and it can still hold another byte more. (only 19 being used)
+// Order 2-1-0-(-1) use different structures for a faster accessing.
+struct context{
+struct context *next; //next context in the hash entry
+unsigned long order4321; //order-4-3-2-1 (or order-3-2-1 for order-3)
+struct _byte_and_freq *prob; //pointer to linked lists containing probability distribution
+unsigned int max_cump; //maximum cumulative probability (can't exceed (2^16)-1 )
+unsigned int defined_bytes; //the number of bytes in this context
+};
+
+// That's the same but for order-2 where there's no hash collisions.
+struct context_o2{
+struct _byte_and_freq *prob; //pointer to linked lists containing probability distribution
+unsigned int max_cump; //maximum cumulative probability (can't exceed (2^16)-1 )
+unsigned int defined_bytes; //the number of bytes in this context
+};
+
+
+// Declaration of functions
+
+
+// Functions for initializing
+void ppmc_alloc_memory(void);
+void ppmc_initialize_contexts(void);
+void ppmc_encoder_initialize(void);
+void ppmc_decoder_initialize(void);
+void ppmc_free_memory(void);
+void ppmc_flush_mem_enc(void);
+void ppmc_flush_mem_dec(void);
+
+// Functions for order-(-1)
+void ppmc_get_prob_ordern1(void);
+unsigned long ppmc_get_symbol_ordern1(void);
+void ppmc_get_totf_ordern1(void);
+void ppmc_renormalize_order1(void);
+
+// Functions for order-0
+void ppmc_get_totf_order0(void);
+char ppmc_code_byte_order0(void);
+void ppmc_update_order0(void);
+void ppmc_renormalize_order0(void);
+void ppmc_decode_order0(void);
+void ppmc_get_escape_prob_order0(void);
+void ppmc_get_prob_order0(void);
+
+// Functions for order-1
+void ppmc_get_totf_order1(void);
+char ppmc_code_byte_order1(void);
+void ppmc_update_order1(void);
+void ppmc_renormalize_order1(void);
+void ppmc_decode_order1(void);
+void ppmc_get_escape_prob_order1(void);
+void ppmc_get_prob_order1(void);
+
+
+// Functions for order-2
+void ppmc_get_totf_order2(void);
+char ppmc_code_byte_order2(void);
+void ppmc_update_order2(void);
+void ppmc_renormalize_order2(void);
+void ppmc_decode_order2(void);
+void ppmc_update_dec_order2(void);
+void ppmc_get_escape_prob_order2(void);
+void ppmc_get_prob_order2(void);
+
+
+// Functions for order-3
+char ppmc_get_totf_order3(void);
+char ppmc_code_byte_order3(void);
+void ppmc_update_order3(void);
+void ppmc_renormalize_order3(void);
+void ppmc_decode_order3(void);
+void ppmc_update_dec_order3(void);
+void ppmc_get_escape_prob_order3(void);
+void ppmc_get_prob_order3(void);
+
+
+// Functions for order-4
+char ppmc_get_totf_order4(void);
+char ppmc_code_byte_order4(void);
+void ppmc_update_order4(void);
+void ppmc_renormalize_order4(void);
+void ppmc_decode_order4(void);
+void ppmc_update_dec_order4(void);
+void ppmc_get_escape_prob_order4(void);
+void ppmc_get_prob_order4(void);
+
+
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "ppmcdata.c"
+Email: arturo@arturocampos.com
+Web: http://www.arturocampos.com
+
+Part of the ppmc encoder and decoder.
+
+This module contains global data.
+*/
+
+#include "ppmc.h" //defines
+#include "range.h"
+
+// Order-4 uses a hash table which points to the start of a linked list with
+// the different context, which has the cump, the number of different symbols
+// and a pointer to the linked list with the bytes and frequencies.
+// Order-3 is almost the same, both take 262144 bytes.
+struct context *order4_hasht[ppmc_order4_hash_size];
+
+struct context *order3_hasht[ppmc_order3_hash_size];
+
+
+// The array for order-2 is different, as we do directly hashing, and thus
+// we have no need to do the stuff of linked lists for the context itself,
+// so it contains the context used. This takes 1310720 bytes.
+struct context_o2 order2_array[65536];
+
+
+// Those are the multiple arrays for order-1. It takes 65536 bytes.
+unsigned char order1_array[256][256];
+unsigned int order1_defined_bytes_array[256]; //the defined bytes in every context
+unsigned int order1_max_cump_array[256]; //max cump of every context
+
+
+// This is the array for order-0. It takes 256 bytes.
+unsigned char order0_array[256];
+unsigned int order0_defined_bytes;
+unsigned int order0_max_cump;
+
+
+// No need of variables for order-(-1), because it's fixed.
+
+
+// Those are the pointers and variables used for managing the mem pool for
+// both context, and bytes and frequencies.
+struct _byte_and_freq *_bytes_pool, //pointer to pool containing linked
+ //lists with bytes and frequencies
+ *_bytes_pool_max; //the maximum of this buffer
+ struct context *_context_pool; //pointer to pool containing contexts
+ struct context *_context_pool_max; //the same as with _bytes_pool
+
+ unsigned long _bytes_pool_index; //index in array of pointers
+ unsigned long _context_pool_index;
+
+ //the following is an array keeping pointers to different buffers. A new
+ //buffer is allocated when the current one is full, so we always have a
+ //buffer for linked lists. (without allocating a buffer for every element)
+ struct _byte_and_freq *_bytes_pool_array[_mempool_max_index];
+ struct context *_context_pool_array[_mempool_max_index];
+
+ char ppmc_out_of_memory; //0 if we have enough memory, 1 instead, any
+ //routine that needs to allocate memory must
+ //quit if that's 1.
+
+
+ // Variables which contain current byte to code and order
+ unsigned long byte, //current byte to code
+ o1_byte, //order-1 byte
+ o2_byte, //order-2 byte
+ o3_byte, //order-3 byte
+ o4_byte; //order-4 byte
+
+unsigned long o2_cntxt; //used in the hash key of order-2
+unsigned long o3_cntxt; //use as hash key for order-3
+unsigned long o4_cntxt; //use as hash key for order-4
+unsigned long full_o3_cntxt; //o1_byte, o2_byte and o3_byte together
+unsigned long full_o4_cntxt; //order-4-3-2-1
+
+unsigned long coded_in_order; //in which order the last byte was coded
+//it's for update exclusion
+//also used for decoding
+
+// Variables used for coding
+
+unsigned long
+total_cump, //the total cumulative probability
+ symb_cump, //the symbol cumulative probability
+ symb_prob; //the symbol frequency
+
+rangecoder rc_coder; //state of range coder
+rangecoder rc_decoder; //state of range decoder
+
+// File handles
+
+FILE *file_input, //file to code
+ *file_output; //file where the coded data is placed
+
+
+// Pointers to linked lists and context structures used for faster updating
+// or creation of new nodes, because instead of reading again all the linked
+// list till the end (in the case of creation) we have a pointer to the last
+// element. In the case that a byte was present in the linked lists but it
+// had a 0 count, we just have to update its probability. And in the case
+// that it already was present and we coded it under that context or a lower
+// one, we just have to update its probability.
+
+
+struct _byte_and_freq *o2_ll_node; //pointer to linked lists under order-2
+//where does it points depends in which
+//order the byte was coded.
+struct _byte_and_freq *o3_ll_node; //the same but for order-3
+struct _byte_and_freq *o4_ll_node;
+
+struct context *o3_context; //pointer to current order-3 context
+struct context *o4_context; //pointer to current order-3 context
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "ppmcdata.h"
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+
+ Part of the ppmc encoder and decoder.
+
+ This module contains externs definition for global data.
+*/
+
+#include "ppmc.h"
+
+// Order-4 uses a hash table which points to the start of a linked list with
+// the different context, which has the cump, the number of different symbols
+// and a pointer to the linked list with the bytes and frequencies.
+// Order-3 is almost the same, both take 262144 bytes.
+extern struct context *order4_hasht[];
+
+extern struct context *order3_hasht[];
+
+
+// The array for order-2 is different, as we do directly hashing, and thus
+// we have no need to do the stuff of linked lists for the context itself,
+// so it contains the context used. This takes 1310720 bytes.
+extern struct context_o2 order2_array[];
+
+
+// Those are the multiple arrays for order-1. It takes 65536 bytes.
+extern unsigned char order1_array[256][256];
+extern unsigned int order1_defined_bytes_array[]; //the defined bytes in every context
+extern unsigned int order1_max_cump_array[]; //max cump of every context
+
+
+// This is the array for order-0. It takes 256 bytes.
+extern unsigned char order0_array[];
+extern unsigned int order0_defined_bytes;
+extern unsigned int order0_max_cump;
+
+
+// Those are the pointers and variables used for managing the mem pool for
+// both context, and bytes and frequencies.
+extern struct _byte_and_freq *_bytes_pool, //pointer to pool containing linked
+ //lists with bytes and frequencies
+ *_bytes_pool_max; //the maximum of this buffer
+extern struct context *_context_pool; //pointer to pool containing contexts
+extern struct context *_context_pool_max; //the same as with _bytes_pool
+
+extern unsigned long _bytes_pool_index; //index in array of pointers
+extern unsigned long _context_pool_index;
+
+//the following is an array keeping pointers to different buffers. A new
+//buffer is allocated when the current one is full, so we always have a
+//buffer for linked lists. (without allocating a buffer for every element)
+extern struct _byte_and_freq *_bytes_pool_array[_mempool_max_index];
+extern struct context *_context_pool_array[_mempool_max_index];
+
+extern char ppmc_out_of_memory; //0 if we have enough memory, 1 instead, any
+ //routine that needs to allocate memory must
+ //quit if that's 1.
+
+
+
+// Variables which contain current byte to code and order
+extern unsigned long //they are only bytes
+ byte, //current byte to code
+ o1_byte, //order-1 byte
+ o2_byte, //order-2 byte
+ o3_byte, //order-3 byte
+ o4_byte; //order-4 byte
+
+extern unsigned long o2_cntxt; //used in the hash key of order-2
+extern unsigned long o3_cntxt; //use as hash key for order-3
+unsigned long o4_cntxt; //use as hash key for order-4
+extern unsigned long full_o3_cntxt; //o1_byte, o2_byte and o3_byte together
+extern unsigned long full_o4_cntxt; //order-4-3-2-1
+
+extern unsigned long coded_in_order; //in which order the last byte was coded
+ //it's for update exclusion
+ //also used for decoding
+// Variables used for coding
+
+extern unsigned long //no need for negative values
+ total_cump, //the total cumulative probability
+ symb_cump, //the symbol cumulative probability
+ symb_prob; //the symbol frequency
+
+extern rangecoder rc_coder; //state of range coder
+extern rangecoder rc_decoder; //state of range decoder
+
+// File handles
+
+ FILE *file_input, //file to code
+ *file_output; //file where the coded data is placed
+
+
+
+// Pointers to linked lists and context structures used for faster updating
+// or creation of new nodes, because instead of reading again all the linked
+// list till the end (in the case of creation) we have a pointer to the last
+// element. In the case that a byte was present in the linked lists but it
+// had a 0 count, we just have to update its probability. And in the case
+// that it already was present and we coded it under that context or a lower
+// one, we just have to update its probability.
+
+
+extern struct _byte_and_freq *o2_ll_node;//pointer to linked lists under order-2
+ //where does it points depends in which
+ //order the byte was coded.
+extern struct _byte_and_freq *o3_ll_node; //the same but for order-3
+extern struct _byte_and_freq *o4_ll_node;
+
+extern struct context *o3_context; //pointer to current order-3 context
+extern struct context *o4_context; //pointer to current order-3 context
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "ppmcmain.c"
+Email: arturo@arturocampos.com
+Web: http://www.arturocampos.com
+
+Part of the ppmc encoder only.
+
+This module is the main module and calls the different modules to do
+the encoding of a file. When done prints bpb and kbyps.
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "range.h" //the range coder functions and data
+#include "ppmcdata.h"
+
+
+long filesize(FILE *stream);
+
+
+
+//Main
+int main (char argc, char *argv[])
+{
+ unsigned long counter, //temporal counter for loops like for or while
+ counter2, //another temporal counter for sub loops
+ size_file_input; //the size of the input file
+
+
+ // Print title, version and copyright
+ printf("PPMC using range coder. (without exclusion)\n");
+ printf("Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.\n");
+ printf("Permission is granted to make verbatim copies of this program for private\n");
+ printf("use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.\n");
+
+ // Check for correct number of parameters
+ if(argc!=3)
+ {
+ printf("Bad number of arguments.\n");
+ exit(1);
+ }
+
+
+ // Try to open input and output files
+ if((file_input=fopen(argv[1],"r+b"))==NULL)
+ {
+ printf("Couldn't open %s.\n",argv[1]);
+ exit(1);
+ }
+
+ if((file_output=fopen(argv[2],"w+b"))==NULL)
+ {
+ printf("Couldn't create %s.\n",argv[2]);
+ exit(1);
+ }
+
+
+ // Check input file length and not accept 0 length files
+ size_file_input=filesize(file_input);
+
+ if(size_file_input<5)
+ {
+ printf("Can't work with files below than 5 bytes!");
+ exit(1);
+ }
+
+
+ // First output file length
+ fwrite(&size_file_input,1,4,file_output); //input length
+
+
+ // Initialize ppmc encoder
+ ppmc_alloc_memory(); //get memory
+ ppmc_initialize_contexts(); //initialize model
+ ppmc_encoder_initialize();
+
+ // Initialize range coder
+ range_coder_init(&rc_coder,file_output);
+
+
+ // Start main loop which codes the file
+ while((byte=fgetc(file_input))!=EOF)
+ {
+
+ // Try to code current byte under order-4 if possible then go to lower orders
+ if(ppmc_code_byte_order4()==0)
+ if(ppmc_code_byte_order3()==0)
+ if(ppmc_code_byte_order2()==0)
+ if(ppmc_code_byte_order1()==0)
+ if(ppmc_code_byte_order0()==0) //else try to code under order-0
+ {
+ // Code under order-(-1)
+ ppmc_get_prob_ordern1();
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+ coded_in_order=0; //update all the tables (unless order-(-1))
+ }
+
+
+ // Now do update exclusion
+
+ switch(coded_in_order)
+ {
+ case 0: ppmc_update_order0(); //update only order-0
+ case 1: ppmc_update_order1(); //update order-0 and order-1
+ case 2: ppmc_update_order2(); //update order-2 1 and 0...
+ case 3: ppmc_update_order3();
+ case 4: ppmc_update_order4();
+ default: break;
+ };
+
+
+
+ // Update order variables
+
+ o4_byte=o3_byte;
+ o3_byte=o2_byte;
+ o2_byte=o1_byte;
+ o1_byte=byte; //current one is next time order-1
+
+
+ // Check if we run out of memory, in that case, flush the encoder
+
+ if(ppmc_out_of_memory==1)
+ {
+ printf("Flushing memory! Output file might be not decodable.\n");
+ ppmc_flush_mem_enc();
+ }
+
+
+ }
+
+
+ // Flush range coder
+ range_coder_flush(&rc_coder);
+
+ // Free memory
+ ppmc_free_memory();
+
+
+ // Print bpb and kbyps
+ printf("%s at %f bpb.\n",argv[1],((float)filesize(file_output)/(float)size_file_input)*(float)8);
+
+
+ // Close file handles
+ fclose(file_input);
+ fclose(file_output);
+
+
+
+ // Nicely exit
+ return 0;
+}
+
+
+// Routines not used by ppmc but rather by main.
+// Not including the range coder.
+
+
+// Returns the file size of a given file.
+long filesize(FILE *stream)
+{
+ long curpos, length;
+
+ curpos = ftell(stream);
+ fseek(stream, 0L, SEEK_END);
+ length = ftell(stream);
+ fseek(stream, curpos, SEEK_SET);
+ return length;
+}
+
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "range.c"
+Email: arturo@arturocampos.com
+Web: http://www.arturocampos.com
+
+This module contains the routines of both the range coder and decoder.
+
+The range coder works internally in 32 bits, and uses bytes as symbols.
+Also the end of message symbol is used. So z=257.
+
+Both input and output use rc_file as the file stream. Of course we can't
+code and decode at the same time. All the input or output comes from the
+same file, no matter what range coder structure are we using. The modules
+here provided don't manage the io except for reading and writing, they
+don't open nor close the files. The reading and writing is done via
+putc and getc.
+*/
+
+#include "range.h"
+#include <stdlib.h>
+
+/*
+ Inits the range coder state. Must be called before encoding any symbol.
+ It uses a magic number 0xB3 as the first byte outputted.
+ -rangecoder *rc, the range coder to be used.
+
+ Shoulde be called like that:
+ range_coder_init(&o0_rc_state,file_output);
+ */
+void range_coder_init(rangecoder *rc, FILE *stream)
+{
+ rc_file=stream;
+ rc->low=0; //define state
+ rc->range=0x80000000;
+ rc->byte_buffer=0xB3; //magic number
+ rc->help=0; //temp value
+}
+
+
+/*
+ Encodes a symbol.
+ -rangecoder *rc, the range coder to be used.
+ -unsigned long tot_f, the maximum cumulative frequency
+ -unsigned long lt_f, the cumulative probabilty of the symbol
+ -unsigned long sy_f, the probability of the symbol
+ */
+void range_coder_encode(rangecoder *rc,unsigned long tot_f, unsigned long lt_f,unsigned long sy_f)
+{
+ unsigned long temp, r;
+
+ range_coder_renormalize(rc); //&rc?
+
+ r=rc->range/tot_f;
+ temp=r*lt_f;
+ if(lt_f+sy_f<tot_f)
+ rc->range=r*sy_f;
+ else
+ rc->range-=temp;
+ rc->low+=temp;
+}
+
+/*
+ Renormalizes the state when coding.
+ -rangecoder *rc, the range coder to be used.
+ */
+
+void range_coder_renormalize(rangecoder *rc)
+{
+ while(rc->range<=(unsigned long)0x00800000)
+ {
+ if(rc->low<(unsigned long)0x7F800000)
+ {
+ putc(rc->byte_buffer,rc_file);
+ for(;rc->help;rc->help--)
+ putc(0xFF,rc_file);
+ rc->byte_buffer=(unsigned char)(rc->low>>23);
+ }
+ else
+ {
+ if(rc->low&(unsigned long)0x80000000)
+ {
+ putc(rc->byte_buffer+1,rc_file);
+ for(;rc->help;rc->help--)
+ putc(0x00,rc_file);
+ rc->byte_buffer=(unsigned char)(rc->low>>23);
+ }
+ else
+ rc->help++;
+ }
+ rc->range<<=8;
+ rc->low=(rc->low<<8)&(unsigned long)(0x7FFFFFFF);
+ }
+}
+
+
+/*
+ Flushes the encoder. Must be called when the coding is done.
+ -rangecoder *rc, the range coder to be used.
+
+ Shoulde be called like that:
+ range_coder_flush(&o0_rc_state);
+ */
+void range_coder_flush(rangecoder *rc)
+{
+ unsigned long tmp;
+
+ range_coder_renormalize(rc);
+ tmp = rc->low >> 23;
+ if (tmp > 0xff)
+ {
+ putc(rc->byte_buffer+1,rc_file);
+ for(; rc->help; rc->help--)
+ putc(0,rc_file);
+ }
+ else
+ {
+ putc(rc->byte_buffer,rc_file);
+ for(; rc->help; rc->help--)
+ putc(0xff,rc_file);
+ }
+
+ putc(tmp & 0xff,rc_file);
+ putc((tmp = rc->low >> (23-8)) & 0xff,rc_file);
+}
+
+
+/*
+ Inits the range decoder state. Also checks for the magic number, and
+ quits in case it isn't the first, so be careful.
+ -rangecoder *rc, the range coder to be used.
+ */
+void range_decoder_init(rangecoder *rc, FILE *stream)
+{
+ unsigned int _rd_c;
+
+ rc_file=stream;
+ if((_rd_c=getc(rc_file))!=0xB3)
+ {
+ printf("\nThis is not range coded data. Magic number not found. Exiting.");
+ exit(1);
+ }
+ rc->byte_buffer=getc(rc_file);
+ rc->low=rc->byte_buffer>>1;
+ rc->range=0x80;
+}
+
+
+/*
+ Decode a symbol, get its cumulative probability.
+Input:
+-rangecoder *rc, the range coder to be used.
+-unsigned long tot_f, the maximum cumulative probability
+Output:
+-unsigned long, cumulative probability of the current symbol
+Should be called like that:
+current_cump=range_decoder_decode(&o0_rc_state,o0_tot_f);
+*/
+unsigned long range_decoder_decode(rangecoder *rc, unsigned long tot_f)
+{
+ unsigned long temp;
+
+ range_decoder_renormalize(rc);
+ rc->help=rc->range/tot_f;
+ temp=rc->low/rc->help;
+ if(temp>=tot_f)
+ return tot_f-1;
+ else
+ return temp;
+}
+
+
+/*
+ Updates the state so next symbol can be decoded.
+Input:
+-rangecoder *rc, the range coder to be used.
+-unsigned long tot_f, the maximum cumulative probability
+-unsigned long lt_f, the cumulative probabilty of the symbol
+-unsigned long sy_f, the probability of the symbol
+
+*/
+void range_decoder_update(rangecoder *rc, unsigned long tot_f, unsigned long lt_f,unsigned long sy_f)
+{
+ unsigned long temp;
+
+ temp=rc->help*lt_f;
+ rc->low-=temp;
+ if(lt_f+sy_f<tot_f)
+ rc->range=rc->help*sy_f;
+ else
+ rc->range-=temp;
+}
+
+
+/*
+ Renormalizes the state while decoding.
+ -rangecoder *rc, the range coder to be used.
+ */
+void range_decoder_renormalize(rangecoder *rc)
+{
+ while(rc->range<=0x00800000)
+ {
+ rc->low=(rc->low<<8)|((rc->byte_buffer<<7)&0xFF);
+ rc->byte_buffer=getc(rc_file);
+ rc->low |= rc->byte_buffer >> (1);
+ rc->range<<=8;
+ }
+}
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "range.h"
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+
+ Declarations for the coder.
+*/
+
+#include <stdio.h>
+
+typedef struct{
+ unsigned long low, range, help;
+ unsigned char byte_buffer;
+}rangecoder;
+
+FILE *rc_file;
+
+void range_coder_init(rangecoder *rc, FILE *stream); //coding routines
+void range_coder_encode(rangecoder *rc,unsigned long tot_f, unsigned long lt_f,unsigned long sy_f);
+void range_coder_renormalize(rangecoder *rc);
+void range_coder_flush(rangecoder *rc);
+void range_decoder_init(rangecoder *rc, FILE *stream);//decoding routines
+unsigned long range_decoder_decode(rangecoder *rc, unsigned long tot_f);
+void range_decoder_update(rangecoder *rc, unsigned long tot_f, unsigned long lt_f,unsigned long sy_f);
+void range_decoder_renormalize(rangecoder *rc);
+
+
+typedef unsigned long code_value;
+#define CODE_BITS 32
+#define Top_value ((code_value)1 << (CODE_BITS-1))
+#define SHIFT_BITS (CODE_BITS - 9)
+#define EXTRA_BITS ((CODE_BITS-2) % 8 + 1)
+#define Bottom_value (Top_value >> 8)
+#define outbyte(cod,x) putc(x,stdout)
+#define inbyte(cod) getc(stdin)
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this program for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "unppmc.c"
+Email: arturo@arturocampos.com
+Web: http://www.arturocampos.com
+
+
+Part of the ppmc decoder.
+
+This module is the main module and calls the different modules to do
+the decoding of a file. When done prints kbyps.
+*/
+
+
+// Bibliotecas necesarias
+#include <stdio.h>
+#include <stdlib.h>
+#include "range.h" //the range coder functions and data
+#include "ppmcdata.h"
+
+
+// Declaracion de funciones del ppmcmain.c
+long filesize(FILE *stream);
+
+
+
+
+//Main
+void main (int argc, char *argv[])
+{
+ unsigned long counter, //temporal counter for loops like for or while
+ counter2, //another temporal counter for sub loops
+ size_file_output, //the size of the output file
+ main_counter; //used in main
+ char expected_flush=0; //used for checking flushing which can't be done
+
+
+ // Print title, version and copyright
+ printf("UNPPMC using range coder.\n");
+ printf("Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.\n");
+ printf("Permission is granted to make verbatim copies of this program for private\n");
+ printf("use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.\n");
+
+
+
+ // Check for correct number of parameters
+ if(argc!=3)
+ {
+ printf("Bad number of arguments.\n");
+ exit(1);
+ }
+
+
+ // Try to open input and output files
+ if((file_input=fopen(argv[1],"r+b"))==NULL)
+ {
+ printf("Couldn't open %s.\n",argv[1]);
+ exit(1);
+ }
+
+ if((file_output=fopen(argv[2],"w+b"))==NULL)
+ {
+ printf("Couldn't create %s.\n",argv[2]);
+ exit(1);
+ }
+
+
+ // Get output length
+ fread(&size_file_output,1,4,file_input);
+
+
+ // Initialize ppmc decoder
+ ppmc_alloc_memory();
+ ppmc_initialize_contexts();
+ ppmc_decoder_initialize();
+
+
+
+ // Initialize decoder
+ range_decoder_init(&rc_decoder,file_input);
+
+
+ // Start main loop which decodes the file
+ main_counter=size_file_output-4; //take in account the bytes already written
+ expected_flush=0; //we don't expect a flush yet
+
+ while(main_counter!=0)
+ {
+
+ // Try to decode current byte in order-4 if possible, else in lower ones
+ ppmc_decode_order4();
+ if(byte==-1)
+ ppmc_decode_order3();
+ if(byte==-1)
+ {
+ ppmc_decode_order2();
+ if(byte==-1)
+ {
+ ppmc_decode_order1();
+ if(byte==-1)
+ {
+ ppmc_decode_order0();
+ if(byte==-1) //check if it was an escape code
+ {
+ // Decode in order-(-1)
+ ppmc_get_totf_ordern1();
+ symb_cump=range_decoder_decode(&rc_decoder,total_cump);
+ byte=ppmc_get_symbol_ordern1();
+ ppmc_get_prob_ordern1();
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+ coded_in_order=0; //update all orders
+
+ // Now see if it's the code of flushing
+
+ if(symb_cump==256)
+ {
+ printf("Flushing.\n");
+ ppmc_flush_mem_dec();
+ expected_flush=0;
+ continue; //do not output byte nor update
+ }
+
+ }
+ }
+ }
+ }
+
+ // Output byte and update model
+
+ fputc(byte,file_output);
+
+ switch(coded_in_order) //update exclusion
+ {
+ case 0: ppmc_update_order0(); //update only order-0
+ case 1: ppmc_update_order1(); //update order-0 and order-1
+ case 2: ppmc_update_dec_order2(); //update order-0 1 and 2
+ case 3: ppmc_update_dec_order3();
+ case 4: ppmc_update_dec_order4();
+ default: break;
+ };
+
+
+ // Check if flushing has to be done and has not been done.
+ // This is optional, in case you limit the memory usage, you don't
+ // need to include this
+
+ if(expected_flush==1) // If flushing didn't happen, we can't decode
+ {
+ printf("Can't decompress file. Not enough memory.\nTry in a machine with more memory.\n");
+ exit(1);
+ }
+ if(ppmc_out_of_memory==1)
+ {
+ expected_flush=1; // Next code must be a flush code, otherwise we don't
+ // have enough memory, and therefore we can't decode
+ }
+
+
+ // Update order variables
+
+ o4_byte=o3_byte;
+ o3_byte=o2_byte;
+ o2_byte=o1_byte;
+ o1_byte=byte; //current one, is next time order-1
+
+ // Byte decoded and model updated, loop
+ main_counter--;
+
+
+ }
+
+
+ ppmc_free_memory();
+
+ // Close file handles and free memory
+ fclose(file_input);
+ fclose(file_output);
+
+
+ // Nicely exit
+ exit(0);
+}
+
+
+// Ruotines not used by ppmc but rather by main.
+// Not including the range coder.
+
+
+// Returns the file size of a given file.
+long filesize(FILE *stream)
+{
+ long curpos, length;
+
+ curpos = ftell(stream);
+ fseek(stream, 0L, SEEK_END);
+ length = ftell(stream);
+ fseek(stream, curpos, SEEK_SET);
+ return length;
+}
+
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "ppmc.c"
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+
+ Part of the ppmc encoder and decoder.
+
+ This module contains the whole ppmc encoder. It uses hash tables for
+ managing most of the orders. And a maximum order of 4. It codes bytes.
+ Order-1-0-(-1) are all handled in tables. Order-2 has a table with
+ direct hashing with pointers to the linked lists. Order-4 and order-3
+ both have hash tables with pointers to contexts in a linked lists which
+ finally have a pointer to the start of the linked list with the
+ probability distribution. Update exclusion is used, but exclusion is not.
+
+ Please, note that if the machine where the decoder is run doesn't has as
+ much memory as the computer where the encoder was ran, the decoder will
+ not be able to properly decode the file, because it will not be able to
+ keep track of new statistics, in this case it will just exit.
+
+ For applications where the loss of data is not admisible, I suggest you to
+ limit both encoder and decoder's memory requeriments to a given minimum
+ which both machines have.
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "range.h"
+#include "ppmcdata.h"
+
+
+
+// Ruotines used by ppmc. Not including the range coder.
+//
+// They are for initializing of both encoder and decoder, and unless there
+// are two version, both encoder and decoder use the same routines. Like
+// "ppmc_initialize_contexts".
+
+
+// This one allocs the memory needed by ppmc, and adjust some pointers used
+// for allocating elements in the linked lists. The mempool arrays must be
+// initialized now.
+void ppmc_alloc_memory(void)
+{
+ unsigned long counter;
+
+
+ // Init mempool buffers
+
+ for(counter=0;counter!=_mempool_max_index;++counter)
+ {
+ _bytes_pool_array[counter]=0;
+ _context_pool_array[counter]=0;
+ }
+
+ _bytes_pool_index=1; //first entry will be used now
+ _context_pool_index=1;
+
+
+ // Allocate memory for ppmc structures and adjust some variables
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+
+ //save pointers in the array for freeing
+ _bytes_pool_array[0]=_bytes_pool;
+ _context_pool_array[0]=_context_pool;
+
+
+ //adjust variables
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements;
+ _context_pool_max=_context_pool+_context_pool_elements;
+
+ ppmc_out_of_memory=0; //we still have memory
+}
+
+
+// This routine initializes all the contexts, and all the tables including
+// those who care about the number of bytes defined in a context.
+void ppmc_initialize_contexts(void)
+{
+ unsigned long counter, counter2;
+
+
+ // Order-0
+ for(counter=0;counter!=256;++counter) //clear table
+ order0_array[counter]=0;
+
+ order0_defined_bytes=0; //adjust variables
+ order0_max_cump=0;
+
+
+ // Order-1
+ for(counter=0;counter!=256;++counter) //erase every table of every context
+ for(counter2=0;counter2!=256;++counter2)
+ order1_array[counter][counter2]=0;
+
+ for(counter=0;counter!=256;++counter) //adjust variables
+ {
+ order1_defined_bytes_array[counter]=0;
+ order1_max_cump_array[counter]=0;
+ }
+
+
+ // Order-2
+ for(counter=0;counter!=65536;++counter)
+ {
+ //order2_array[counter].prob=0; //clear pointer to bytes and frequencies
+ //order2_array[counter].max_cump=0;
+ order2_array[counter].defined_bytes=0;
+ }
+
+
+ // Order-4-3
+ for(counter=0;counter!=65536;++counter) //order-4-3
+ {
+ order4_hasht[counter]=0;
+ order3_hasht[counter]=0;
+ }
+}
+
+
+// This routine initializes the encode model by outputting as many bytes as
+// needed to prepare the models. This should be called before the main loop
+// and after the memory has been allocated and tables initialized.
+//
+// It does not need uses the range coder. It output the first 1 bytes.
+void ppmc_encoder_initialize(void)
+{
+
+ // Initialize order-0 and prepare different bytes for orders
+ fputc((byte=fgetc(file_input)),file_output);
+ o4_byte=byte; //order-4
+
+ fputc((byte=fgetc(file_input)),file_output);
+ o3_byte=byte; //order-3
+
+ fputc((byte=fgetc(file_input)),file_output);
+ o2_byte=byte; //order-2
+ ppmc_update_order0();
+
+ fputc((byte=fgetc(file_input)),file_output);
+ o1_byte=byte;
+
+}
+
+
+// This routine initializes the decoder model, should be called to do the same
+// changes as "ppmc_encoder_initialize()" did.
+void ppmc_decoder_initialize(void)
+{
+
+ // Initialize order-0 and context bytes
+ byte=fgetc(file_input);
+ o4_byte=byte; //order-4
+ fputc(byte,file_output);
+
+ byte=fgetc(file_input);
+ o3_byte=byte; //order-3
+ fputc(byte,file_output);
+
+ byte=fgetc(file_input);
+ o2_byte=byte; //order-2
+
+ fputc(byte,file_output); //output first byte
+ ppmc_update_order0();
+
+ byte=fgetc(file_input);
+ o1_byte=byte; //order-1
+ fputc(byte,file_output);
+}
+
+
+// Once coding or decoding is finished you have to call this routine.
+// It must be called when done.
+void ppmc_free_memory(void)
+{
+ unsigned long counter;
+
+ // Free the memory buffers
+
+ for(counter=0;counter!=_mempool_max_index;++counter)
+ {
+ if(_bytes_pool_array[counter]!=0)
+ free(_bytes_pool_array[counter]);
+
+ if(_context_pool_array[counter]!=0)
+ free(_context_pool_array[counter]);
+ }
+
+}
+
+
+// This routine flushes the memory and restarts all the tables of
+// probabilities, current order bytes are not modified, this function
+// is called when we ran out of memory. We have to output the code
+// number 256 which means memory flushing, for doing this we have to go
+// to order-(-1) so we have to output an escape code in all the orders
+// till we reach order-(-1) where we can code it. Then we free all the
+// memory, alloc it again, and reinitialize all the orders.
+//
+// However we may find the case when the current order is not initialized,
+// in this case we don't need to output an escape code.
+void ppmc_flush_mem_enc(void)
+{
+
+ // Code an escape code in order-4
+ if(ppmc_get_totf_order4()!=0) //if 0 no need of escape code
+ {
+
+ ppmc_get_escape_prob_order4(); //get prob and cump
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ }
+
+
+ // Code an escape code in order-3
+ if(ppmc_get_totf_order3()!=0) //if 0 no need of escape code
+ {
+
+ ppmc_get_escape_prob_order3(); //get prob and cump
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ }
+
+
+ // Code an escape code in order-2
+
+ o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte);
+
+ // First check if current order-2 context is empty
+ if(order2_array[o2_cntxt].defined_bytes!=0) //it's not empty
+ {
+ ppmc_get_totf_order2();
+ ppmc_get_escape_prob_order2();
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+ }
+
+
+ // Code an escape code in order-1
+
+ // First check if current order-1 table is empty
+ if(order1_defined_bytes_array[o1_byte]!=0) //it's not empty
+ {
+ ppmc_get_totf_order1();
+ ppmc_get_escape_prob_order1();
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+ }
+
+
+ // Code an escape code in order-0. Order-0 always has at least one symbol
+
+ ppmc_get_totf_order0();
+ ppmc_get_escape_prob_order0();
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+
+
+ // Now we can code the code 256
+
+ symb_prob=1;
+ symb_cump=256;
+ total_cump=257;
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+
+ // Now that decoder knows the flushing, free memory and reinit
+
+ ppmc_free_memory();
+ ppmc_alloc_memory();
+ ppmc_initialize_contexts();
+
+
+ // Be sure that order-0 has at least one probability
+
+ order0_array[o1_byte]++;
+ order0_max_cump++;
+ order0_defined_bytes++;
+
+}
+
+
+// When the decoder gets the symbol of flushing, most of the job is done
+// because we already got all the escape codes, so we only have to reinit.
+void ppmc_flush_mem_dec(void)
+{
+
+ // Free memory and reinit
+
+ ppmc_free_memory();
+ ppmc_alloc_memory();
+ ppmc_initialize_contexts();
+
+
+ // Be sure that order-0 has at least one probability
+
+ order0_array[o1_byte]++;
+ order0_max_cump++;
+ order0_defined_bytes++;
+
+
+}
+
+
+
+// ORDER-(-1) functions, also called ordern1 (Negative1) in functions
+//
+// Because order-(-1) does not need to update its probability tables, it
+// has no tables, and relies on the fact that the cump of byte is its own
+// value, and the probability is fixed, 1, and the total cump is 257.
+//
+// The alphabet has the following distribution: 0-255 the bytes. 256 is
+// an special symbol which means that we have flushed the encoder tables,
+// and thus the encoder must flush its tables too.
+//
+// The rest of the tables only have 256 symbols, because we have no need
+// of assign a symbol to the flush code (which already is the order-(-1)
+// table) nor to the escape code.
+
+
+// Gets the probability for a given symbol in the order-(-1) (ordern1)
+void ppmc_get_prob_ordern1(void)
+{
+ symb_cump=byte; //its value
+ symb_prob=1; //flat probability
+ total_cump=257; //total cump
+}
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-(-1)
+void ppmc_get_totf_ordern1(void)
+{
+ total_cump=257; //this is fixed
+}
+
+
+// Returns the symbol for a given cump under order-(-1)
+unsigned long ppmc_get_symbol_ordern1 (void)
+{
+ return symb_cump;
+}
+
+
+
+// ORDER-0 functions
+//
+// Due to the fact that order-0 has no context, I use an array for all the
+// probabilities under order-0, just as you could do in a basic model for
+// arithmetic coding.
+//
+// The main array is: order0_array. Where order0_array[byte] contains the
+// probability for a given byte. The same applies to order-1.
+//
+// To ensure that the updating and coding is done correctly, "byte" can't
+// be changed till all the coding and updating is done.
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-0
+void ppmc_get_totf_order0(void)
+{
+ // Total cump is current total cump plus the escape for the escape code
+ total_cump=order0_defined_bytes+order0_max_cump;
+}
+
+
+// Codes a byte under order-0 and returns 1, otherwise it returns a 0 and
+// has coded an escape code. In this case further coding is needed.
+//
+// Returns: 1 in case a byte was coded. 0 in case of escape code.
+char ppmc_code_byte_order0(void)
+{
+ unsigned long counter;
+
+ ppmc_get_totf_order0(); //get total cump
+
+ // See if the byte is present
+ if(order0_array[byte]==0) //a probability of 0
+ {
+
+ // Because it was not present, output an escape code, prepare variables
+
+ symb_cump=order0_max_cump; //obviously its cump is current max_cump
+ //without escape code's space
+
+ symb_prob=order0_defined_bytes; //the number of defined bytes
+
+ // Code the escape code
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 0; //byte not coded
+ }
+ else
+ {
+
+ coded_in_order=0;
+
+ // The symbol is present, code it under order-0
+
+ symb_prob=order0_array[byte]; //get probability directly
+
+ // Make cump for current symbol
+
+ symb_cump=0; //for first symbol is 0
+ for(counter=0; counter!=byte ; ++counter)
+ symb_cump+=order0_array[counter]; //sum probabilities before our symbol
+
+ // Code the symbol
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 1; //symbol coded under order-0
+ }
+}
+
+
+// This functions update order-0 probabilities with current byte, also takes
+// care about updating variables, and renormalizing.
+void ppmc_update_order0(void)
+{
+ if(order0_array[byte]==0)
+ {
+ // It had a zero probability
+ order0_array[byte]++; //increment symbol probability
+ ++order0_defined_bytes; //new byte defined
+ ++order0_max_cump; //total cump
+ return;
+ }
+ else
+ {
+ // It had a non-zero probability
+
+ // Increment its probability
+ order0_array[byte]++; //increment symbol probability
+ ++order0_max_cump; //total cump
+
+ // Check to see if its the maximum in this case renormalize
+ if(order0_array[byte]==255)
+ ppmc_renormalize_order0();
+
+ return;
+ }
+}
+
+
+// This functions renormalizes the probabilities at order-0 updating variables
+void ppmc_renormalize_order0(void)
+{
+ unsigned long counter;
+
+ // Initialize variables
+ order0_defined_bytes=0; //clear them
+ order0_max_cump=0;
+
+ // Loop over all probabilities, divide them by a factor of 2 and update variables
+ for(counter=0 ; counter!=256 ; ++counter)
+ {
+ order0_array[counter]>>=1; //divide by a factor of 2
+
+ if(order0_array[counter]!=0) //see if it has a non zero probability
+ order0_defined_bytes++;
+
+ order0_max_cump+=order0_array[counter]; //sum to the total cump
+ }
+}
+
+
+// Decodes the symbol correspoding to the current order, it returns the byte
+// or in case of a escape code it returns -1
+void ppmc_decode_order0(void)
+{
+ unsigned long current_cump, counter;
+
+
+ // Get the total cump needed for decoding symbol
+ ppmc_get_totf_order0(); //total cump needed for decoding
+ symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it
+
+ // Now check if it's an escape code
+ if(symb_cump>=order0_max_cump) //the defined code space for the escape code
+ {
+
+ // Update coding values
+
+ ppmc_get_escape_prob_order0();
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Mark as escape code
+
+ byte=-1;
+
+ return; //an escape code
+ }
+ else
+ {
+ // Now we have to check what symbol it is
+
+ current_cump=0; //cump of the current symbol
+
+ for(counter=0 ; counter!= 256 ; ++counter)
+ {
+ if(symb_cump<current_cump)
+ break; //symbol found, break search loop
+ current_cump+=order0_array[counter]; //update cump
+ }
+
+ counter--; //-1 (see ac_ppmc.html, searching for a given symbol)
+
+ byte=counter; //update the variable with the found symbol
+
+
+ // Get the cump and prob of byte
+
+ symb_prob=order0_array[byte]; //the probabilty
+ symb_cump=current_cump-order0_array[byte]; //using the cump which was
+ //already made
+
+ // Update coding state
+
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ coded_in_order=0; //decoded under order-0
+
+ return;
+ }
+
+}
+
+
+// This function returns the probability for the escape codes in the variables
+void ppmc_get_escape_prob_order0(void)
+{
+ // To understand that remember that the escape allocated for the escape code
+ // is above the current maximum cump and that it has a probability determined
+ // by the scheme C.
+ symb_prob=order0_defined_bytes;
+ symb_cump=order0_max_cump;
+ total_cump=order0_defined_bytes+order0_max_cump;
+}
+
+
+
+// ORDER-1 functions
+//
+// Order-1 uses 256 arrays one for every context, obviously they are accessed
+// like that: order1_array[o1_byte][byte]
+//
+// Also for computing the value of the current escape code in a given context
+// we need to know how many bytes are defined in a given context, they are
+// stored in an array, and you use them like that:
+// order1_defined_bytes_array[o1_byte]
+//
+// The same goes for the maximum cump of a given context:
+// order1_max_cump_array[o1_byte]
+//
+// Read the order-0 explanation.
+//
+// To ensure that the updating and coding is done correctly, "byte" and
+// "o1_byte" can't be changed till all the coding and updating is done.
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-1. o1_byte should contain the order-1
+void ppmc_get_totf_order1(void)
+{
+ // Total cump is current total cump plus the escape for the escape code
+ total_cump=order1_defined_bytes_array[o1_byte]+order1_max_cump_array[o1_byte];
+}
+
+
+// Codes a byte under order-1 and returns 1.
+// Otherwise it returns a 0 it may be that it has coded an escape code, or
+// that current table was empty, in this case nothing is outputted
+// In both cases further coding is needed.
+//
+// Returns: 1 in case a byte was coded. 0 in case of escape code or empty table.
+// In case the byte is coded under this context, coded_in_order=1.
+char ppmc_code_byte_order1(void)
+{
+ unsigned long counter;
+
+
+ // First check if current order-1 table is empty
+ if(order1_defined_bytes_array[o1_byte]==0) //it's empty
+ {
+ return 0; //byte not coded, nothing done
+ }
+
+
+ // Now try to code this byte under order-1
+
+ ppmc_get_totf_order1(); //get total cump
+
+ // See if the byte is present
+ if(order1_array[o1_byte][byte]==0) //a probability of 0
+ {
+
+ // Because it was not present, output an escape code, prepare variables
+
+ symb_cump=order1_max_cump_array[o1_byte];//obviously its cump is current max_cump
+ //without escape code's space
+
+ symb_prob=order1_defined_bytes_array[o1_byte];//the number of defined bytes
+
+ // Code the escape code
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 0; //byte not coded, escape coded
+ }
+ else
+ {
+
+ coded_in_order=1; //because we coded it under order-1
+
+ // The symbol is present, code it under order-1
+
+ symb_prob=order1_array[o1_byte][byte]; //get probability directly
+
+ // Make cump for current symbol
+
+ symb_cump=0; //for first symbol is 0
+ for(counter=0; counter!=byte ; ++counter)
+ symb_cump+=order1_array[o1_byte][counter]; //sum probabilities before our symbol
+
+ // Code the symbol
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 1; //symbol coded under order-1
+ }
+}
+
+
+// This functions update order-1 probabilities with current byte, also takes
+// care about updating variables, and renormalizing. o1_byte must be filled.
+void ppmc_update_order1(void)
+{
+ if(order1_array[o1_byte][byte]==0)
+ {
+ // It had a zero probability
+ order1_array[o1_byte][byte]++; //increment symbol probability
+ ++order1_defined_bytes_array[o1_byte]; //new byte defined
+ ++order1_max_cump_array[o1_byte]; //total cump
+ return;
+ }
+ else
+ {
+ // It had a non-zero probability
+
+ // Increment its probability
+ order1_array[o1_byte][byte]++; //increment symbol probability
+ ++order1_max_cump_array[o1_byte]; //total cump
+
+ // Check to see if its the maximum in this case renormalize
+ if(order1_array[o1_byte][byte]==255)
+ ppmc_renormalize_order1();
+
+ return;
+ }
+}
+
+
+// This functions renormalizes the probabilities at order-1 updating variables
+void ppmc_renormalize_order1(void)
+{
+ unsigned long counter;
+
+ // Initialize variables
+ order1_defined_bytes_array[o1_byte]=0; //clear them
+ order1_max_cump_array[o1_byte]=0;
+
+ // Loop over all probabilities, divide them by a factor of 2 and update variables
+ for(counter=0 ; counter!=256 ; ++counter)
+ {
+ order1_array[o1_byte][counter]>>=1; //divide by a factor of 2
+
+ if(order1_array[o1_byte][counter]!=0) //see if it has a non zero probability
+ order1_defined_bytes_array[o1_byte]++;
+
+ order1_max_cump_array[o1_byte]+=order1_array[o1_byte][counter]; //sum to the total cump
+ }
+}
+
+
+// Decodes the symbol correspoding to the current order, it returns the byte
+// or in case of an escape code or empty table it returns -1.
+// It updates "coded_in_order".
+void ppmc_decode_order1(void)
+{
+ unsigned long current_cump, counter;
+
+
+ // First check if current order-1 table is empty
+ if(order1_defined_bytes_array[o1_byte]==0) //it's empty
+ {
+ byte=-1; //byte not coded, nothing done
+ return;
+ }
+
+
+ // Get the total cump needed for decoding symbol
+ ppmc_get_totf_order1(); //total cump needed for decoding
+ symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it
+
+ // Now check if it's an escape code
+ if(symb_cump>=order1_max_cump_array[o1_byte]) //the defined code space for the escape code
+ {
+
+ // Update coding values
+
+ ppmc_get_escape_prob_order1();
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Mark as escape code
+
+ byte=-1;
+
+ return; //an escape code
+ }
+ else
+ {
+ // Now we have to check what symbol it is
+
+ current_cump=0; //cump of the current symbol
+
+ for(counter=0 ; counter!= 256 ; ++counter)
+ {
+ if(symb_cump<current_cump)
+ break; //symbol found, break search loop
+ current_cump+=order1_array[o1_byte][counter]; //update cump
+ }
+
+ counter--; //-1 (see ac_ppmc.html, searching for a given symbol)
+
+ byte=counter; //update the variable with the found symbol
+
+
+ // Get the cump and prob of byte
+
+ symb_prob=order1_array[o1_byte][byte]; //the probabilty
+ symb_cump=current_cump-order1_array[o1_byte][byte]; //using the cump which was
+ //already made
+
+ // Update coding state
+
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+
+ // Update coded_in_order used for update exclusion
+
+ coded_in_order=1;
+
+ return;
+ }
+
+}
+
+
+// This function returns the probability for the escape codes in the variables
+void ppmc_get_escape_prob_order1(void)
+{
+ // To understand that remember that the escape allocated for the escape code
+ // is above the current maximum cump and that it has a probability determined
+ // by the scheme C.
+ symb_prob=order1_defined_bytes_array[o1_byte];
+ symb_cump=order1_max_cump_array[o1_byte];
+ total_cump=order1_defined_bytes_array[o1_byte]+order1_max_cump_array[o1_byte];
+}
+
+
+
+// ORDER-2 functions
+//
+// Order-2 uses a table which holds the contexts data structure of every
+// structure, its accessed with "ppmc_order2_hash_key(o1_byte, o2_byte)"
+// and because it uses direct addressing there's no need to check for hash
+// collisions, which makes it faster. The variable "o2_cntxt" contains o1_byte
+// and o2_byte used directly to index the order-2 array.
+//
+// Though order-2 uses the same routines as lower orders they are rather
+// different, because we can't allocate memory for all the probabilities
+// tables, we have to use linked lists for the bytes and probabilities.
+//
+// Under order-2 we don't let a probability to be lower than 1, that is,
+// there can't be a symbol in a linked list with a 0 probability, I choosed
+// this for three factors: the code is easier to read (it isn't messy)
+// we gain some speed, and the loss of compression seems little.
+//
+// To ensure that the updating is done correctly, "o2_cntxt" and
+// "o2_ll_node" must not be modified by any routine except those who manage
+// order-2.
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-2. cntxt must have o1_byte and o2_byte hashed with the hash key
+// for order-2.
+void ppmc_get_totf_order2(void)
+{
+ // Total cump is current total cump plus the escape for the escape code
+ total_cump=order2_array[o2_cntxt].defined_bytes+order2_array[o2_cntxt].max_cump;
+}
+
+
+// Codes a byte under order-2 and returns 1.
+// Otherwise it returns a 0. It may be that it has coded an escape code, or
+// that current table was empty.
+//
+// Returns: 1 in case a byte was coded. 0 in case of escape code or empty table.
+// In case the byte is coded under this context, coded_in_order=2.
+char ppmc_code_byte_order2(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+
+ // Initialize o2_cntxt
+
+ o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte);
+
+
+ // First check if current order-2 context is empty
+ if(order2_array[o2_cntxt].defined_bytes==0) //it's empty
+ {
+ return 0; //byte not coded, nothing done
+ }
+
+
+ // Now try to code this byte under order-2
+
+ ppmc_get_totf_order2(); //get total cump
+
+
+ // See if the byte is present and compute its cump at the same time
+
+ node=order2_array[o2_cntxt].prob; //pointer to first element in the linked list
+
+ symb_cump=0; //the first symbol always has a 0 cump
+
+
+ // Now search the byte in the linked list
+
+ do{
+ if(node->byte==byte)
+ goto ppmc_o2_byte_found; //bad thing, I know, anyone has a better idea?
+ symb_cump+=node->freq; //add the probability of this byte to the cump
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+
+ // If we reach that point, the byte was not found in the linked list
+ // so we don't need the cump, we have to output an escape code, whose
+ // probabilities are know using the context structure in the table.
+
+ // Byte was not present in the linked list, current node is the last one,
+ // and that's the node needed for creating a new node, save it.
+
+ o2_ll_node=node;
+
+ // Now get the probability and cump of the escape code
+
+ symb_cump=order2_array[o2_cntxt].max_cump;
+ symb_prob=order2_array[o2_cntxt].defined_bytes;
+
+ // Code the escape code
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 0;
+
+
+ // That code is executed when the byte is found in the linked list
+
+ppmc_o2_byte_found:
+
+
+ // Everything has been tested, now we can feel free to code the byte,
+ // the symb_cump is already computed, now get its probability and code
+ // the byte, also save pointer to this element in the linked lists for
+ // updating.
+
+ coded_in_order=2; //successfully coded under order-2
+
+ o2_ll_node=node; //save it for updating
+
+ symb_prob=node->freq; //get the probability of the byte
+
+ // Code it.
+
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 1; //byte coded under order-2
+}
+
+
+// This functions update order-2 probabilities with current byte, also takes
+// care about updating variables, and renormalizing.
+// Of course "o2_ll_node" must point to the element to update or the last one,
+// based on the "coded_in_order" and checking the pointer of the element we
+// can decide what to do.
+//
+// This updating is only for encoding.
+void ppmc_update_order2(void)
+{
+
+ // First of all check if that's the first byte in this context, in that case
+ // we have to initialize some variables in the context structure.
+
+ if(order2_array[o2_cntxt].defined_bytes==0) //no byte defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+ order2_array[o2_cntxt].defined_bytes=1;
+ order2_array[o2_cntxt].max_cump=1;
+ order2_array[o2_cntxt].prob=_bytes_pool;
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //nothing else to do
+ }
+
+
+ // The byte was coded under order two, otherwise it was coded under a
+ // lower order (never higher ones, remember that we are using update
+ // exclusion) in this case we have to create a new node in the list.
+
+ if(coded_in_order==2) //coded under order-2
+ {
+
+ // Update its count and variables of this context and check for renormalization
+
+ o2_ll_node->freq++; //increment its frequency (rather probability)
+
+ order2_array[o2_cntxt].max_cump++; //total cump
+
+ if(o2_ll_node->freq==255) //do we have to renormalize?
+ ppmc_renormalize_order2(); //renormalize
+
+ }
+ else
+ {
+
+ // Once every paranoid check has been done we are sure that this byte
+ // did not existed and so we have to create a new node in the linked
+ // list. Also we have to take care of memory issues.
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ o2_ll_node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ order2_array[o2_cntxt].max_cump++; //total cump
+ order2_array[o2_cntxt].defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ }
+
+}
+
+
+// This functions renormalizes the probabilities at order-2 updating context
+// variables.
+void ppmc_renormalize_order2(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+ // Initialize variables. Defined bytes remain the same.
+ order2_array[o2_cntxt].max_cump=0; //clear them
+
+ node=order2_array[o2_cntxt].prob; //get pointer to lined lists
+
+ // Divide all the probabilities by 2 and update context variables
+ while(1)
+ {
+ node->freq>>=1; //divide by a factor of 2
+
+ if(node->freq==0) //don't allow a probability to be 0
+ node->freq=1;
+
+ order2_array[o2_cntxt].max_cump+=node->freq; //sum to the total cump
+
+ if(node->next==0) //last element
+ break;
+ node=node->next;
+ }
+
+
+ //printf("\nRenormalization, context:%c%c",o2_byte,o1_byte);
+
+}
+
+
+// Decodes the symbol correspoding to the current order, it returns the byte
+// or in case of an escape code or empty table it returns -1.
+// It updates "coded_in_order".
+//
+// If we decode an escape code, we don't modify "o2_ll_node" and thus its
+// work of the updating routine to read till the end of the linked list
+// (for adding a new element) however if we decode a byte, we save on
+// "o2_ll_node" the pointer to its node. (so updating is faster)
+void ppmc_decode_order2(void)
+{
+ unsigned long current_cump, counter;
+ struct _byte_and_freq *node;
+
+ // Initialize o2_cntxt
+
+ o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte);
+
+
+ // First check if current order-2 context is empty
+ if(order2_array[o2_cntxt].defined_bytes==0) //it's empty
+ {
+ byte=-1; //byte not coded, nothing done
+ return;
+ }
+
+
+ // Get the total cump needed for decoding symbol
+ ppmc_get_totf_order2(); //total cump needed for decoding
+ symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it
+
+ // Now check if it's an escape code
+ if(symb_cump>=order2_array[o2_cntxt].max_cump) //the defined code space for the escape code
+ {
+
+ // Update coding values
+
+ ppmc_get_escape_prob_order2();
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Mark as escape code
+
+ byte=-1;
+
+ return; //an escape code
+ }
+ else
+ {
+ // Now we have to check what symbol it is
+
+ current_cump=0; //cump of the current symbol
+
+ node=order2_array[o2_cntxt].prob; //get pointer to linked lists
+
+ while(1)
+ {
+ current_cump+=node->freq; //update cump
+ if(symb_cump<current_cump)
+ break; //symbol found, break search loop
+
+ node=node->next; //next element
+ //we have no need to check for the last symbol, we'll never read further
+ //the end of the linked lists, before we'll found the last byte.
+ }
+
+
+ //read byte value and probability
+
+ symb_prob=node->freq; //get the probability for updating the state
+ byte=node->byte; //get byte
+ o2_ll_node=node; //used for updating
+
+
+ // Get the cump of byte
+
+ symb_cump=current_cump-symb_prob;
+
+ // Update coding state
+
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Update coded_in_order used for update exclusion
+
+ coded_in_order=2;
+
+ return;
+ }
+
+}
+
+
+// This is the routine for updating while decoding. We have to search the byte
+// in the linked list, if it's present, update its count, otherwise we have
+// hitted the end of the linked list, and there we have to create a new node.
+//
+// Of course if the byte was matched in order-2 we'll have a pointer to it
+// in "o2_ll_node" so we don't need to read the linked list. (we already did
+// in decoding)
+//
+// Another case which we also have to specially deal with, this is the case
+// when the context has not been initalized yet.
+void ppmc_update_dec_order2(void)
+{
+ struct _byte_and_freq *node;
+
+
+ // Handle the case when the context is not initialized
+ // This code is the same as the one for the encoding.
+
+ if(order2_array[o2_cntxt].defined_bytes==0) //no byte defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+ order2_array[o2_cntxt].defined_bytes=1;
+ order2_array[o2_cntxt].max_cump=1;
+ order2_array[o2_cntxt].prob=_bytes_pool;
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+
+ return; //nothing else to do
+ }
+
+
+ // Current context is initalized, proceed
+
+ if(coded_in_order==2) //check if it was decoded under order-2
+ {
+
+ // We can be sure that the pointer "o2_ll_node" points to its entry, and
+ // it has a non 0 probability (otherwise it couldn't be coded) so just
+ // update its probability and max_cump
+
+ o2_ll_node->freq++; //the probability of the byte
+ order2_array[o2_cntxt].max_cump++; //the max_cump
+
+ if(o2_ll_node->freq==255) //check for renormalization
+ ppmc_renormalize_order2();
+
+ }
+ else
+ {
+
+ // An escape code was decoded under order-2, we have to read till the
+ // end of the linked list so we can add a new node for this new byte.
+
+ node=order2_array[o2_cntxt].prob; //get pointer to linked list
+
+ while(1)
+ {
+ if(node->next==0) //check for the end of the linked list
+ break;
+ node=node->next; //next node
+ }
+
+
+ // We reached the end of the linked list, add a new node if possible,
+ // we are using the same code of "ppmc_update_order2()" with the
+ // difference that the pointer to the linked list is "node"
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ order2_array[o2_cntxt].max_cump++; //total cump
+ order2_array[o2_cntxt].defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //we are finished updating
+
+ }
+
+}
+
+
+// This function returns the probability for the escape codes in the variables
+void ppmc_get_escape_prob_order2(void)
+{
+ // To understand that remember that the escape allocated for the escape code
+ // is above the current maximum cump and that it has a probability determined
+ // by the scheme C.
+
+ symb_prob=order2_array[o2_cntxt].defined_bytes;
+ symb_cump=order2_array[o2_cntxt].max_cump;
+}
+
+
+
+// ORDER-3 functions
+//
+// The difference between order-3 and order-3 are just a few, instead of
+// keeping a table with the context structures, we keep a hash table with
+// pointers to linked lists with the context, so it's only a matter of
+// searching current context in the linked list corresponding to its hash
+// entry. This is done in "ppmc_get_totf_order3" because that's the first
+// routine that both encoding and decoding routines call.
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-3. Must be called while encoding or decoding before anything else
+// because it initializes the pointers to the context structure in
+// "o3_context" and o3_cntxt.
+//
+// If the hash entry is not initialized it returns "o3_context"=0
+// If the context is not present in the linked list of context, "o3_context"
+// will point to the last element in the linked list.
+// If the context is present "o3_context" will point to the context to use.
+// One can distinguish the last two by checking the context value of the
+// structure, if it's not the same, is the last element.
+//
+// The routine returns 0 if the hash entry is not initialized or if the
+// the context was not present. Otherwise it returns 1, meaning that we
+// have to code under this context.
+char ppmc_get_totf_order3(void)
+{
+ struct context *cntxt_node;
+
+
+ // First make the hash key for order-3
+
+ o3_cntxt=ppmc_order3_hash_key(o1_byte,o2_byte,o3_byte);
+ full_o3_cntxt=(o1_byte)+(o2_byte<<8)+(o3_byte<<16); //order-3
+
+
+ // Now check the hash entry in the table
+
+ if(order3_hasht[o3_cntxt]==0) //if 0, not initialized
+ {
+
+ o3_context=0; //no hash entry
+
+ return 0; //hash entry not initialized
+ }
+
+
+ // Now read trough the linked list of context searching current one
+
+ cntxt_node=order3_hasht[o3_cntxt];
+
+ while(1)
+ {
+
+ if(cntxt_node->order4321==full_o3_cntxt) //compare context
+ goto ppmc_gtf_cntxt_found;
+
+ if(cntxt_node->next==0) //end of context's linked list
+ break;
+
+ cntxt_node=cntxt_node->next; //next element
+
+ }
+
+
+ // Once there the context was not found
+ o3_context=cntxt_node; //pointer to last element in the linked list
+
+ return 0; //it was not present
+
+
+ // The context is found, so return pointer and cump
+
+ppmc_gtf_cntxt_found:
+
+ o3_context=cntxt_node;
+
+ // Total cump is current total cump plus the escape for the escape code
+
+ total_cump=o3_context->defined_bytes+o3_context->max_cump;
+
+ return 1; //context found
+
+}
+
+
+// Codes a byte under order-3 and returns 1.
+// Otherwise it returns a 0.
+//
+// In case the byte is coded under this context, coded_in_order=3.
+char ppmc_code_byte_order3(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+
+ // Get current context (if present) and total cump.
+
+ if(ppmc_get_totf_order3()==0)
+ return 0;
+
+
+ // See if the byte is present and compute its cump at the same time
+
+ node=o3_context->prob; //pointer to first element in the linked list
+
+ symb_cump=0; //the first symbol always has a 0 cump
+
+
+ // Now search the byte in the linked list
+
+ do{
+ if(node->byte==byte)
+ goto ppmc_o3_byte_found; //bad thing, I know, anyone has a better idea?
+ symb_cump+=node->freq; //add the probability of this byte to the cump
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+
+ // If we reach that point, the byte was not found in the linked list
+ // so we don't need the cump, we have to output an escape code, whose
+ // probabilities are know using the context structure in the table.
+
+ // Byte was not present in the linked list, current node is the last one,
+ // and that's the node needed for creating a new node, save it.
+
+ o3_ll_node=node;
+
+ // Now get the probability and cump of the escape code
+
+ symb_cump=o3_context->max_cump;
+ symb_prob=o3_context->defined_bytes;
+
+ // Code the escape code
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 0;
+
+
+ // That code is executed when the byte is found in the linked list
+
+ppmc_o3_byte_found:
+
+
+ // Everything has been tested, now we can feel free to code the byte,
+ // the symb_cump is already computed, now get its probability and code
+ // the byte, also save pointer to this element in the linked lists for
+ // updating.
+
+ coded_in_order=3; //successfully coded under order-3
+
+ o3_ll_node=node; //save it for updating
+
+ symb_prob=node->freq; //get the probability of the byte
+
+ // Code it.
+
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 1; //byte coded under order-3
+}
+
+
+// This functions update order-3 probabilities with current byte, also takes
+// care about updating variables, and renormalizing.
+//
+// "o3_ll_node" must point to the element to update or the last one,
+// based on the "coded_in_order" and checking the pointer of the element we
+// can decide what to do. Also "o3_context" must be initialized.
+//
+// This updating is only for encoding.
+void ppmc_update_order3(void)
+{
+
+ // First thing first, check if the hash entry is initialized
+
+ if(order3_hasht[o3_cntxt]==0) //no pointer to linked list defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ order3_hasht[o3_cntxt]=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o3_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //nothing else to do
+ }
+
+
+
+
+ // The byte was coded under order three, otherwise it was coded under a
+ // lower order (never higher ones, remember that we are using update
+ // exclusion) in this case we have to create a new node in the list.
+
+ if(coded_in_order==3) //coded under order-3
+ {
+
+ // Update its count and variables of this context and check for renormalization
+
+ o3_ll_node->freq++; //increment its frequency (rather probability)
+
+ o3_context->max_cump++; //total cump
+
+ if(o3_ll_node->freq==255) //do we have to renormalize?
+ ppmc_renormalize_order3(); //renormalize
+
+ }
+ else
+ {
+
+ // Now we have two cases, under a given context (which we actually found)
+ // we coded an escape coded, in that case just create a new node in the
+ // linked list of bytes and probabilities. Otherwise we didn't find the
+ // same node so we have to create it in the linked list for context.
+ // And we can be sure that it at least has one element and that
+ // "o3_context" points to the last element, so we can put the new element.
+
+ if(o3_context->order4321==full_o3_cntxt) //chech if that's the last
+ { //element or the a context found
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ o3_ll_node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ o3_context->max_cump++; //total cump
+ o3_context->defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+ }
+ else
+ {
+
+ // We have to create a new context node
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ o3_context->next=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o3_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ }
+
+ }
+
+}
+
+
+// This functions renormalizes the probabilities at order-3 updating context
+// variables.
+void ppmc_renormalize_order3(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+
+ // Initialize variables. Defined bytes remain the same.
+ o3_context->max_cump=0; //clear them
+
+ node=o3_context->prob; //get pointer to lined lists
+
+ // Divide all the probabilities by 2 and update context variables
+ while(1)
+ {
+ node->freq>>=1; //divide by a factor of 2
+
+ if(node->freq==0) //don't allow a probability to be 0
+ node->freq=1;
+
+ o3_context->max_cump+=node->freq; //sum to the total cump
+
+ if(node->next==0) //last element
+ break;
+ node=node->next;
+ }
+
+
+}
+
+
+// Decodes the symbol correspoding to the current order, it returns the byte
+// or in case of an escape code or empty table it returns -1.
+// It updates "coded_in_order".
+//
+// If we decode an escape code, we don't modify "o3_ll_node" and thus its
+// work of the updating routine to read till the end of the linked list
+// (for adding a new element) however if we decode a byte, we save on
+// "o3_ll_node" the pointer to its node. (so updating is faster)
+void ppmc_decode_order3(void)
+{
+ unsigned long current_cump, counter;
+ struct _byte_and_freq *node;
+
+
+ // Get current context (if present) and total cump.
+
+ if(ppmc_get_totf_order3()==0)
+ {
+ byte=-1;
+ return;
+ }
+
+
+ // Decode current cump
+
+ symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it
+
+ // Now check if it's an escape code
+ if(symb_cump>=o3_context->max_cump) //the defined code space for the escape code
+ {
+
+ // Update coding values
+
+ ppmc_get_escape_prob_order3();
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Mark as escape code
+
+ byte=-1;
+
+ return; //an escape code
+ }
+ else
+ {
+ // Now we have to check what symbol it is
+
+ current_cump=0; //cump of the current symbol
+
+ node=o3_context->prob; //get pointer to linked lists
+
+ while(1)
+ {
+ current_cump+=node->freq; //update cump
+ if(symb_cump<current_cump)
+ break; //symbol found, break search loop
+
+ node=node->next; //next element
+ //we have no need to check for the last symbol, we'll never read further
+ //the end of the linked lists, before we'll found the last byte.
+ }
+
+
+ //read byte value and probability
+
+ symb_prob=node->freq; //get the probability for updating the state
+ byte=node->byte; //get byte
+ o3_ll_node=node; //used for updating
+
+
+ // Get the cump of byte
+
+ symb_cump=current_cump-symb_prob;
+
+ // Update coding state
+
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Update coded_in_order used for update exclusion
+
+ coded_in_order=3;
+
+ return;
+ }
+
+}
+
+
+// This is the routine for updating while decoding. The only difference with
+// the routine for coding is that when an escape code was coded, "o3_ll_node"
+// is not initialized so we have to read till the end of the linked list.
+// Fortunately "o3_context" will be initialized so we don't need to read its
+// linked list.
+void ppmc_update_dec_order3(void)
+{
+ struct _byte_and_freq *node;
+
+ // First thing first, check if the hash entry is initialized
+
+ if(order3_hasht[o3_cntxt]==0) //no pointer to linked list defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ order3_hasht[o3_cntxt]=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o3_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //nothing else to do
+ }
+
+
+ // The byte was coded under order three, otherwise it was coded under a
+ // lower order (never higher ones, remember that we are using update
+ // exclusion) in this case we have to create a new node in the list.
+
+ if(coded_in_order==3) //coded under order-3
+ {
+
+ // Update its count and variables of this context and check for renormalization
+
+ o3_ll_node->freq++; //increment its frequency (rather probability)
+
+ o3_context->max_cump++; //total cump
+
+ if(o3_ll_node->freq==255) //do we have to renormalize?
+ ppmc_renormalize_order3(); //renormalize
+
+ }
+ else
+ {
+
+ // Now we have two cases, under a given context (which we actually found)
+ // we coded an escape coded, in that case just create a new node in the
+ // linked list of bytes and probabilities. Otherwise we didn't find the
+ // same node so we have to create it in the linked list for context.
+ // And we can be sure that it at least has one element and that
+ // "o3_context" points to the last element, so we can put the new element.
+
+ if(o3_context->order4321==full_o3_cntxt) //chech if that's the last
+ { //element or the a context found
+
+ // Read till the end of the linked list
+
+ node=o3_context->prob; //get pointer to linked list
+
+ while(1)
+ {
+ if(node->next==0) //check for the end of the linked list
+ break;
+ node=node->next; //next node
+ }
+
+ // Now add element
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ o3_context->max_cump++; //total cump
+ o3_context->defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+ }
+ else
+ {
+
+ // We have to create a new context node
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ o3_context->next=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o3_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ }
+
+ }
+
+}
+
+
+
+// This function returns the probability for the escape codes in the variables
+void ppmc_get_escape_prob_order3(void)
+{
+ // To understand that remember that the escape allocated for the escape code
+ // is above the current maximum cump and that it has a probability determined
+ // by the scheme C.
+
+ symb_prob=o3_context->defined_bytes;
+ symb_cump=o3_context->max_cump;
+}
+
+
+
+// ORDER-4 functions
+//
+// The routines for order-4 are *equal* to those for order-3, there are a few
+// changes like different global variables, and different hash keys.
+//
+// If you want to go to higher orders, you'd use the same code and data
+// structures, with the difference of the context bytes (order4321)
+// stored in every context's linked list.
+
+
+// Returns in the variable "total_cump" the current total cump of
+// order-4. Must be called while encoding or decoding before anything else
+// because it initializes the pointers to the context structure in
+// "o4_context" and o4_cntxt.
+//
+// If the hash entry is not initialized it returns "o4_context"=0
+// If the context is not present in the linked list of context, "o4_context"
+// will point to the last element in the linked list.
+// If the context is present "o4_context" will point to the context to use.
+// One can distinguish the last two by checking the context value of the
+// structure, if it's not the same, is the last element.
+//
+// The routine returns 0 if the hash entry is not initialized or if the
+// the context was not present. Otherwise it returns 1, meaning that we
+// have to code under this context.
+char ppmc_get_totf_order4(void)
+{
+ struct context *cntxt_node;
+
+
+ // First make the hash key for order-4
+
+ o4_cntxt=ppmc_order4_hash_key(o1_byte,o2_byte,o3_byte,o4_byte);
+ full_o4_cntxt=(o1_byte)+(o2_byte<<8)+(o3_byte<<16)+(o4_byte<<24); //order-4
+
+
+ // Now check the hash entry in the table
+
+ if(order4_hasht[o4_cntxt]==0) //if 0, not initialized
+ {
+
+ o4_context=0; //no hash entry
+
+ return 0; //hash entry not initialized
+ }
+
+
+ // Now read trough the linked list of context searching current one
+
+ cntxt_node=order4_hasht[o4_cntxt];
+
+ while(1)
+ {
+
+ if(cntxt_node->order4321==full_o4_cntxt) //compare context
+ goto ppmc_gtfo4_cntxt_found;
+
+ if(cntxt_node->next==0) //end of context's linked list
+ break;
+
+ cntxt_node=cntxt_node->next; //next element
+
+ }
+
+
+ // Once there the context was not found
+ o4_context=cntxt_node; //pointer to last element in the linked list
+
+ return 0; //it was not present
+
+
+ // The context is found, so return pointer and cump
+
+ppmc_gtfo4_cntxt_found:
+
+ o4_context=cntxt_node;
+
+ // Total cump is current total cump plus the escape for the escape code
+
+ total_cump=o4_context->defined_bytes+o4_context->max_cump;
+
+ return 1; //context found
+
+}
+
+
+// Codes a byte under order-4 and returns 1.
+// Otherwise it returns a 0.
+//
+// In case the byte is coded under this context, coded_in_order=4.
+char ppmc_code_byte_order4(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+
+ // Get current context (if present) and total cump.
+
+ if(ppmc_get_totf_order4()==0)
+ return 0;
+
+
+ // See if the byte is present and compute its cump at the same time
+
+ node=o4_context->prob; //pointer to first element in the linked list
+
+ symb_cump=0; //the first symbol always has a 0 cump
+
+
+ // Now search the byte in the linked list
+
+ do{
+ if(node->byte==byte)
+ goto ppmc_o4_byte_found; //bad thing, I know, anyone has a better idea?
+ symb_cump+=node->freq; //add the probability of this byte to the cump
+ if(node->next==0)
+ break;
+ node=node->next; //next element in the linked list
+ }while(1);
+
+
+ // If we reach that point, the byte was not found in the linked list
+ // so we don't need the cump, we have to output an escape code, whose
+ // probabilities are know using the context structure in the table.
+
+ // Byte was not present in the linked list, current node is the last one,
+ // and that's the node needed for creating a new node, save it.
+
+ o4_ll_node=node;
+
+ // Now get the probability and cump of the escape code
+
+ symb_cump=o4_context->max_cump;
+ symb_prob=o4_context->defined_bytes;
+
+ // Code the escape code
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 0;
+
+
+ // That code is executed when the byte is found in the linked list
+
+ppmc_o4_byte_found:
+
+
+ // Everything has been tested, now we can feel free to code the byte,
+ // the symb_cump is already computed, now get its probability and code
+ // the byte, also save pointer to this element in the linked lists for
+ // updating.
+
+ coded_in_order=4; //successfully coded under order-4
+
+ o4_ll_node=node; //save it for updating
+
+ symb_prob=node->freq; //get the probability of the byte
+
+ // Code it.
+
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+
+ return 1; //byte coded under order-4
+}
+
+
+// This functions update order-4 probabilities with current byte, also takes
+// care about updating variables, and renormalizing.
+//
+// "o4_ll_node" must point to the element to update or the last one,
+// based on the "coded_in_order" and checking the pointer of the element we
+// can decide what to do. Also "o4_context" must be initialized.
+//
+// This updating is only for encoding.
+void ppmc_update_order4(void)
+{
+
+ // First thing first, check if the hash entry is initialized
+
+ if(order4_hasht[o4_cntxt]==0) //no pointer to linked list defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ order4_hasht[o4_cntxt]=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o4_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //nothing else to do
+ }
+
+
+
+
+ // The byte was coded under order three, otherwise it was coded under a
+ // lower order (never higher ones, remember that we are using update
+ // exclusion) in this case we have to create a new node in the list.
+
+ if(coded_in_order==4) //coded under order-4
+ {
+
+ // Update its count and variables of this context and check for renormalization
+
+ o4_ll_node->freq++; //increment its frequency (rather probability)
+
+ o4_context->max_cump++; //total cump
+
+ if(o4_ll_node->freq==255) //do we have to renormalize?
+ ppmc_renormalize_order4(); //renormalize
+
+ }
+ else
+ {
+
+ // Now we have two cases, under a given context (which we actually found)
+ // we coded an escape coded, in that case just create a new node in the
+ // linked list of bytes and probabilities. Otherwise we didn't find the
+ // same node so we have to create it in the linked list for context.
+ // And we can be sure that it at least has one element and that
+ // "o4_context" points to the last element, so we can put the new element.
+
+ if(o4_context->order4321==full_o4_cntxt) //chech if that's the last
+ { //element or the a context found
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ o4_ll_node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ o4_context->max_cump++; //total cump
+ o4_context->defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+ }
+ else
+ {
+
+ // We have to create a new context node
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ o4_context->next=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o4_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ }
+
+ }
+
+}
+
+
+// This functions renormalizes the probabilities at order-4 updating context
+// variables.
+void ppmc_renormalize_order4(void)
+{
+ unsigned long counter;
+ struct _byte_and_freq *node;
+
+
+ // Initialize variables. Defined bytes remain the same.
+ o4_context->max_cump=0; //clear them
+
+ node=o4_context->prob; //get pointer to lined lists
+
+ // Divide all the probabilities by 2 and update context variables
+ while(1)
+ {
+ node->freq>>=1; //divide by a factor of 2
+
+ if(node->freq==0) //don't allow a probability to be 0
+ node->freq=1;
+
+ o4_context->max_cump+=node->freq; //sum to the total cump
+
+ if(node->next==0) //last element
+ break;
+ node=node->next;
+ }
+
+
+}
+
+
+// Decodes the symbol correspoding to the current order, it returns the byte
+// or in case of an escape code or empty table it returns -1.
+// It updates "coded_in_order".
+//
+// If we decode an escape code, we don't modify "o4_ll_node" and thus its
+// work of the updating routine to read till the end of the linked list
+// (for adding a new element) however if we decode a byte, we save on
+// "o4_ll_node" the pointer to its node. (so updating is faster)
+void ppmc_decode_order4(void)
+{
+ unsigned long current_cump, counter;
+ struct _byte_and_freq *node;
+
+
+ // Get current context (if present) and total cump.
+
+ if(ppmc_get_totf_order4()==0)
+ {
+ byte=-1;
+ return;
+ }
+
+
+ // Decode current cump
+
+ symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it
+
+ // Now check if it's an escape code
+ if(symb_cump>=o4_context->max_cump) //the defined code space for the escape code
+ {
+
+ // Update coding values
+
+ ppmc_get_escape_prob_order4();
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Mark as escape code
+
+ byte=-1;
+
+ return; //an escape code
+ }
+ else
+ {
+ // Now we have to check what symbol it is
+
+ current_cump=0; //cump of the current symbol
+
+ node=o4_context->prob; //get pointer to linked lists
+
+ while(1)
+ {
+ current_cump+=node->freq; //update cump
+ if(symb_cump<current_cump)
+ break; //symbol found, break search loop
+
+ node=node->next; //next element
+ //we have no need to check for the last symbol, we'll never read further
+ //the end of the linked lists, before we'll found the last byte.
+ }
+
+
+ //read byte value and probability
+
+ symb_prob=node->freq; //get the probability for updating the state
+ byte=node->byte; //get byte
+ o4_ll_node=node; //used for updating
+
+
+ // Get the cump of byte
+
+ symb_cump=current_cump-symb_prob;
+
+ // Update coding state
+
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+
+ // Update coded_in_order used for update exclusion
+
+ coded_in_order=4;
+
+ return;
+ }
+
+}
+
+
+// This is the routine for updating while decoding. The only difference with
+// the routine for coding is that when an escape code was coded, "o4_ll_node"
+// is not initialized so we have to read till the end of the linked list.
+// Fortunately "o4_context" will be initialized so we don't need to read its
+// linked list.
+void ppmc_update_dec_order4(void)
+{
+ struct _byte_and_freq *node;
+
+ // First thing first, check if the hash entry is initialized
+
+ if(order4_hasht[o4_cntxt]==0) //no pointer to linked list defined yet
+ {
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ order4_hasht[o4_cntxt]=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o4_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ return; //nothing else to do
+ }
+
+
+ // The byte was coded under order four, otherwise it was coded under a
+ // lower order (never higher ones, remember that we are using update
+ // exclusion) in this case we have to create a new node in the list.
+
+ if(coded_in_order==4) //coded under order-4
+ {
+
+ // Update its count and variables of this context and check for renormalization
+
+ o4_ll_node->freq++; //increment its frequency (rather probability)
+
+ o4_context->max_cump++; //total cump
+
+ if(o4_ll_node->freq==255) //do we have to renormalize?
+ ppmc_renormalize_order4(); //renormalize
+
+ }
+ else
+ {
+
+ // Now we have two cases, under a given context (which we actually found)
+ // we coded an escape coded, in that case just create a new node in the
+ // linked list of bytes and probabilities. Otherwise we didn't find the
+ // same node so we have to create it in the linked list for context.
+ // And we can be sure that it at least has one element and that
+ // "o4_context" points to the last element, so we can put the new element.
+
+ if(o4_context->order4321==full_o4_cntxt) //chech if that's the last
+ { //element or the a context found
+
+ // Read till the end of the linked list
+
+ node=o4_context->prob; //get pointer to linked list
+
+ while(1)
+ {
+ if(node->next==0) //check for the end of the linked list
+ break;
+ node=node->next; //next node
+ }
+
+ // Now add element
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate mem
+
+ node->next=_bytes_pool; //put it in the next free entry
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ o4_context->max_cump++; //total cump
+ o4_context->defined_bytes++; //total cump
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+ }
+ else
+ {
+
+ // We have to create a new context node
+
+ if(ppmc_out_of_memory==1)
+ return; //exit this function, we can't allocate memory
+
+
+ // First create the context
+
+ o4_context->next=_context_pool;
+
+ _context_pool->next=0; //this is the last element
+ _context_pool->order4321=full_o4_cntxt; //put context
+ _context_pool->prob=_bytes_pool; //pointer to linked list
+ _context_pool->max_cump=1;
+ _context_pool->defined_bytes=1;
+
+
+ // Do update of linked list variables and memory use of contexts
+
+ ++_context_pool; //next time use next entry (this is a pointer)
+
+ if(_context_pool==_context_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_context_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_context_pool=(struct context *)malloc
+ (sizeof(struct context)*_context_pool_elements_inc))==NULL)
+ {
+ printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _context_pool_array[_context_pool_index]=_context_pool;
+
+ _context_pool_max=_context_pool+_context_pool_elements_inc;
+
+ _context_pool_index++;
+
+ }
+
+ // Now care about the first (and last) linked list element
+
+ _bytes_pool->byte=byte; //initialize byte to current one
+ _bytes_pool->freq=1; //it appeared once
+ _bytes_pool->next=0; //now this is last element in ll
+
+ // Do update of linked list variables and memory use
+
+ ++_bytes_pool; //next time use next entry (this is a pointer)
+
+ if(_bytes_pool==_bytes_pool_max) //maximum reached
+ {
+
+ // First check to see that we still have entries in the array
+
+ if(_bytes_pool_index==_mempool_max_index)
+ {
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ // Allocate memory for new buffer
+
+ if((_bytes_pool=(struct _byte_and_freq *)malloc
+ (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL)
+ {
+ printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index);
+ ppmc_out_of_memory=1; //flush
+ return;
+ }
+
+ _bytes_pool_array[_bytes_pool_index]=_bytes_pool;
+
+ _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc;
+
+ _bytes_pool_index++;
+
+ }
+
+ }
+
+ }
+
+}
+
+
+
+// This function returns the probability for the escape codes in the variables
+void ppmc_get_escape_prob_order4(void)
+{
+ // To understand that remember that the escape allocated for the escape code
+ // is above the current maximum cump and that it has a probability determined
+ // by the scheme C.
+
+ symb_prob=o4_context->defined_bytes;
+ symb_cump=o4_context->max_cump;
+}
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "ppmc.h"
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+
+ Part of the ppmc encoder and decoder.
+
+ This module contains the definitions of different functions and all the
+ data structures defined by ppmc. Also contains defines.
+*/
+
+// Definitions
+
+#define ppmc_order4_hash_size 65536
+#define ppmc_order4_hash_key(k,j,i,l) ( (k)+(j<<8)+(i<<1)+(l<<9) )& ppmc_order4_hash_size-1
+#define ppmc_order3_hash_size 65536
+#define ppmc_order3_hash_key(k,j,i) ((k)+(j<<7)+(i<<11)) & ppmc_order3_hash_size-1
+#define ppmc_order2_hash_key(k,j) ((k)+(j<<8))
+#define _bytes_pool_elements 125000 //this is used the first time
+ //that we allocate memory, that's
+ //the number of entries
+#define _bytes_pool_elements_inc 125000 //if we need to alloc again, this
+ //is the number of entries to get
+#define _context_pool_elements 50000
+#define _context_pool_elements_inc 50000
+
+#define _mempool_max_index 1000 //the number of entries in the array with
+ //pointers
+
+
+// Data structures
+
+// This structure contains a single element of a linked lists which contains
+// the probability distribution of a given order. This structure takes 6 bytes.
+struct _byte_and_freq{
+unsigned char byte; //the byte itself
+unsigned char freq; //and the frequency of it
+struct _byte_and_freq *next; //pointer to next element in linked list or 0
+};
+
+
+// This structure is used for both order-3 and order-4. It takes 20 bytes,
+// and it can still hold another byte more. (only 19 being used)
+// Order 2-1-0-(-1) use different structures for a faster accessing.
+struct context{
+struct context *next; //next context in the hash entry
+unsigned long order4321; //order-4-3-2-1 (or order-3-2-1 for order-3)
+struct _byte_and_freq *prob; //pointer to linked lists containing probability distribution
+unsigned int max_cump; //maximum cumulative probability (can't exceed (2^16)-1 )
+unsigned int defined_bytes; //the number of bytes in this context
+};
+
+// That's the same but for order-2 where there's no hash collisions.
+struct context_o2{
+struct _byte_and_freq *prob; //pointer to linked lists containing probability distribution
+unsigned int max_cump; //maximum cumulative probability (can't exceed (2^16)-1 )
+unsigned int defined_bytes; //the number of bytes in this context
+};
+
+
+// Declaration of functions
+
+
+// Functions for initializing
+void ppmc_alloc_memory(void);
+void ppmc_initialize_contexts(void);
+void ppmc_encoder_initialize(void);
+void ppmc_decoder_initialize(void);
+void ppmc_free_memory(void);
+void ppmc_flush_mem_enc(void);
+void ppmc_flush_mem_dec(void);
+
+// Functions for order-(-1)
+void ppmc_get_prob_ordern1(void);
+unsigned long ppmc_get_symbol_ordern1(void);
+void ppmc_get_totf_ordern1(void);
+void ppmc_renormalize_order1(void);
+
+// Functions for order-0
+void ppmc_get_totf_order0(void);
+char ppmc_code_byte_order0(void);
+void ppmc_update_order0(void);
+void ppmc_renormalize_order0(void);
+void ppmc_decode_order0(void);
+void ppmc_get_escape_prob_order0(void);
+void ppmc_get_prob_order0(void);
+
+// Functions for order-1
+void ppmc_get_totf_order1(void);
+char ppmc_code_byte_order1(void);
+void ppmc_update_order1(void);
+void ppmc_renormalize_order1(void);
+void ppmc_decode_order1(void);
+void ppmc_get_escape_prob_order1(void);
+void ppmc_get_prob_order1(void);
+
+
+// Functions for order-2
+void ppmc_get_totf_order2(void);
+char ppmc_code_byte_order2(void);
+void ppmc_update_order2(void);
+void ppmc_renormalize_order2(void);
+void ppmc_decode_order2(void);
+void ppmc_update_dec_order2(void);
+void ppmc_get_escape_prob_order2(void);
+void ppmc_get_prob_order2(void);
+
+
+// Functions for order-3
+char ppmc_get_totf_order3(void);
+char ppmc_code_byte_order3(void);
+void ppmc_update_order3(void);
+void ppmc_renormalize_order3(void);
+void ppmc_decode_order3(void);
+void ppmc_update_dec_order3(void);
+void ppmc_get_escape_prob_order3(void);
+void ppmc_get_prob_order3(void);
+
+
+// Functions for order-4
+char ppmc_get_totf_order4(void);
+char ppmc_code_byte_order4(void);
+void ppmc_update_order4(void);
+void ppmc_renormalize_order4(void);
+void ppmc_decode_order4(void);
+void ppmc_update_dec_order4(void);
+void ppmc_get_escape_prob_order4(void);
+void ppmc_get_prob_order4(void);
+
+
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "ppmcdata.c"
+Email: arturo@arturocampos.com
+Web: http://www.arturocampos.com
+
+Part of the ppmc encoder and decoder.
+
+This module contains global data.
+*/
+
+#include "ppmc.h" //defines
+#include "range.h"
+
+// Order-4 uses a hash table which points to the start of a linked list with
+// the different context, which has the cump, the number of different symbols
+// and a pointer to the linked list with the bytes and frequencies.
+// Order-3 is almost the same, both take 262144 bytes.
+struct context *order4_hasht[ppmc_order4_hash_size];
+
+struct context *order3_hasht[ppmc_order3_hash_size];
+
+
+// The array for order-2 is different, as we do directly hashing, and thus
+// we have no need to do the stuff of linked lists for the context itself,
+// so it contains the context used. This takes 1310720 bytes.
+struct context_o2 order2_array[65536];
+
+
+// Those are the multiple arrays for order-1. It takes 65536 bytes.
+unsigned char order1_array[256][256];
+unsigned int order1_defined_bytes_array[256]; //the defined bytes in every context
+unsigned int order1_max_cump_array[256]; //max cump of every context
+
+
+// This is the array for order-0. It takes 256 bytes.
+unsigned char order0_array[256];
+unsigned int order0_defined_bytes;
+unsigned int order0_max_cump;
+
+
+// No need of variables for order-(-1), because it's fixed.
+
+
+// Those are the pointers and variables used for managing the mem pool for
+// both context, and bytes and frequencies.
+struct _byte_and_freq *_bytes_pool, //pointer to pool containing linked
+ //lists with bytes and frequencies
+ *_bytes_pool_max; //the maximum of this buffer
+ struct context *_context_pool; //pointer to pool containing contexts
+ struct context *_context_pool_max; //the same as with _bytes_pool
+
+ unsigned long _bytes_pool_index; //index in array of pointers
+ unsigned long _context_pool_index;
+
+ //the following is an array keeping pointers to different buffers. A new
+ //buffer is allocated when the current one is full, so we always have a
+ //buffer for linked lists. (without allocating a buffer for every element)
+ struct _byte_and_freq *_bytes_pool_array[_mempool_max_index];
+ struct context *_context_pool_array[_mempool_max_index];
+
+ char ppmc_out_of_memory; //0 if we have enough memory, 1 instead, any
+ //routine that needs to allocate memory must
+ //quit if that's 1.
+
+
+ // Variables which contain current byte to code and order
+ unsigned long byte, //current byte to code
+ o1_byte, //order-1 byte
+ o2_byte, //order-2 byte
+ o3_byte, //order-3 byte
+ o4_byte; //order-4 byte
+
+unsigned long o2_cntxt; //used in the hash key of order-2
+unsigned long o3_cntxt; //use as hash key for order-3
+unsigned long o4_cntxt; //use as hash key for order-4
+unsigned long full_o3_cntxt; //o1_byte, o2_byte and o3_byte together
+unsigned long full_o4_cntxt; //order-4-3-2-1
+
+unsigned long coded_in_order; //in which order the last byte was coded
+//it's for update exclusion
+//also used for decoding
+
+// Variables used for coding
+
+unsigned long
+total_cump, //the total cumulative probability
+ symb_cump, //the symbol cumulative probability
+ symb_prob; //the symbol frequency
+
+rangecoder rc_coder; //state of range coder
+rangecoder rc_decoder; //state of range decoder
+
+// File handles
+
+FILE *file_input, //file to code
+ *file_output; //file where the coded data is placed
+
+
+// Pointers to linked lists and context structures used for faster updating
+// or creation of new nodes, because instead of reading again all the linked
+// list till the end (in the case of creation) we have a pointer to the last
+// element. In the case that a byte was present in the linked lists but it
+// had a 0 count, we just have to update its probability. And in the case
+// that it already was present and we coded it under that context or a lower
+// one, we just have to update its probability.
+
+
+struct _byte_and_freq *o2_ll_node; //pointer to linked lists under order-2
+//where does it points depends in which
+//order the byte was coded.
+struct _byte_and_freq *o3_ll_node; //the same but for order-3
+struct _byte_and_freq *o4_ll_node;
+
+struct context *o3_context; //pointer to current order-3 context
+struct context *o4_context; //pointer to current order-3 context
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "ppmcdata.h"
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+
+ Part of the ppmc encoder and decoder.
+
+ This module contains externs definition for global data.
+*/
+
+#include "ppmc.h"
+
+// Order-4 uses a hash table which points to the start of a linked list with
+// the different context, which has the cump, the number of different symbols
+// and a pointer to the linked list with the bytes and frequencies.
+// Order-3 is almost the same, both take 262144 bytes.
+extern struct context *order4_hasht[];
+
+extern struct context *order3_hasht[];
+
+
+// The array for order-2 is different, as we do directly hashing, and thus
+// we have no need to do the stuff of linked lists for the context itself,
+// so it contains the context used. This takes 1310720 bytes.
+extern struct context_o2 order2_array[];
+
+
+// Those are the multiple arrays for order-1. It takes 65536 bytes.
+extern unsigned char order1_array[256][256];
+extern unsigned int order1_defined_bytes_array[]; //the defined bytes in every context
+extern unsigned int order1_max_cump_array[]; //max cump of every context
+
+
+// This is the array for order-0. It takes 256 bytes.
+extern unsigned char order0_array[];
+extern unsigned int order0_defined_bytes;
+extern unsigned int order0_max_cump;
+
+
+// Those are the pointers and variables used for managing the mem pool for
+// both context, and bytes and frequencies.
+extern struct _byte_and_freq *_bytes_pool, //pointer to pool containing linked
+ //lists with bytes and frequencies
+ *_bytes_pool_max; //the maximum of this buffer
+extern struct context *_context_pool; //pointer to pool containing contexts
+extern struct context *_context_pool_max; //the same as with _bytes_pool
+
+extern unsigned long _bytes_pool_index; //index in array of pointers
+extern unsigned long _context_pool_index;
+
+//the following is an array keeping pointers to different buffers. A new
+//buffer is allocated when the current one is full, so we always have a
+//buffer for linked lists. (without allocating a buffer for every element)
+extern struct _byte_and_freq *_bytes_pool_array[_mempool_max_index];
+extern struct context *_context_pool_array[_mempool_max_index];
+
+extern char ppmc_out_of_memory; //0 if we have enough memory, 1 instead, any
+ //routine that needs to allocate memory must
+ //quit if that's 1.
+
+
+
+// Variables which contain current byte to code and order
+extern unsigned long //they are only bytes
+ byte, //current byte to code
+ o1_byte, //order-1 byte
+ o2_byte, //order-2 byte
+ o3_byte, //order-3 byte
+ o4_byte; //order-4 byte
+
+extern unsigned long o2_cntxt; //used in the hash key of order-2
+extern unsigned long o3_cntxt; //use as hash key for order-3
+unsigned long o4_cntxt; //use as hash key for order-4
+extern unsigned long full_o3_cntxt; //o1_byte, o2_byte and o3_byte together
+extern unsigned long full_o4_cntxt; //order-4-3-2-1
+
+extern unsigned long coded_in_order; //in which order the last byte was coded
+ //it's for update exclusion
+ //also used for decoding
+// Variables used for coding
+
+extern unsigned long //no need for negative values
+ total_cump, //the total cumulative probability
+ symb_cump, //the symbol cumulative probability
+ symb_prob; //the symbol frequency
+
+extern rangecoder rc_coder; //state of range coder
+extern rangecoder rc_decoder; //state of range decoder
+
+// File handles
+
+ FILE *file_input, //file to code
+ *file_output; //file where the coded data is placed
+
+
+
+// Pointers to linked lists and context structures used for faster updating
+// or creation of new nodes, because instead of reading again all the linked
+// list till the end (in the case of creation) we have a pointer to the last
+// element. In the case that a byte was present in the linked lists but it
+// had a 0 count, we just have to update its probability. And in the case
+// that it already was present and we coded it under that context or a lower
+// one, we just have to update its probability.
+
+
+extern struct _byte_and_freq *o2_ll_node;//pointer to linked lists under order-2
+ //where does it points depends in which
+ //order the byte was coded.
+extern struct _byte_and_freq *o3_ll_node; //the same but for order-3
+extern struct _byte_and_freq *o4_ll_node;
+
+extern struct context *o3_context; //pointer to current order-3 context
+extern struct context *o4_context; //pointer to current order-3 context
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "ppmcmain.c"
+Email: arturo@arturocampos.com
+Web: http://www.arturocampos.com
+
+Part of the ppmc encoder only.
+
+This module is the main module and calls the different modules to do
+the encoding of a file. When done prints bpb and kbyps.
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "range.h" //the range coder functions and data
+#include "ppmcdata.h"
+
+
+long filesize(FILE *stream);
+
+
+
+//Main
+int main (char argc, char *argv[])
+{
+ unsigned long counter, //temporal counter for loops like for or while
+ counter2, //another temporal counter for sub loops
+ size_file_input; //the size of the input file
+
+
+ // Print title, version and copyright
+ printf("PPMC using range coder. (without exclusion)\n");
+ printf("Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.\n");
+ printf("Permission is granted to make verbatim copies of this program for private\n");
+ printf("use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.\n");
+
+ // Check for correct number of parameters
+ if(argc!=3)
+ {
+ printf("Bad number of arguments.\n");
+ exit(1);
+ }
+
+
+ // Try to open input and output files
+ if((file_input=fopen(argv[1],"r+b"))==NULL)
+ {
+ printf("Couldn't open %s.\n",argv[1]);
+ exit(1);
+ }
+
+ if((file_output=fopen(argv[2],"w+b"))==NULL)
+ {
+ printf("Couldn't create %s.\n",argv[2]);
+ exit(1);
+ }
+
+
+ // Check input file length and not accept 0 length files
+ size_file_input=filesize(file_input);
+
+ if(size_file_input<5)
+ {
+ printf("Can't work with files below than 5 bytes!");
+ exit(1);
+ }
+
+
+ // First output file length
+ fwrite(&size_file_input,1,4,file_output); //input length
+
+
+ // Initialize ppmc encoder
+ ppmc_alloc_memory(); //get memory
+ ppmc_initialize_contexts(); //initialize model
+ ppmc_encoder_initialize();
+
+ // Initialize range coder
+ range_coder_init(&rc_coder,file_output);
+
+
+ // Start main loop which codes the file
+ while((byte=fgetc(file_input))!=EOF)
+ {
+
+ // Try to code current byte under order-4 if possible then go to lower orders
+ if(ppmc_code_byte_order4()==0)
+ if(ppmc_code_byte_order3()==0)
+ if(ppmc_code_byte_order2()==0)
+ if(ppmc_code_byte_order1()==0)
+ if(ppmc_code_byte_order0()==0) //else try to code under order-0
+ {
+ // Code under order-(-1)
+ ppmc_get_prob_ordern1();
+ range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob);
+ coded_in_order=0; //update all the tables (unless order-(-1))
+ }
+
+
+ // Now do update exclusion
+
+ switch(coded_in_order)
+ {
+ case 0: ppmc_update_order0(); //update only order-0
+ case 1: ppmc_update_order1(); //update order-0 and order-1
+ case 2: ppmc_update_order2(); //update order-2 1 and 0...
+ case 3: ppmc_update_order3();
+ case 4: ppmc_update_order4();
+ default: break;
+ };
+
+
+
+ // Update order variables
+
+ o4_byte=o3_byte;
+ o3_byte=o2_byte;
+ o2_byte=o1_byte;
+ o1_byte=byte; //current one is next time order-1
+
+
+ // Check if we run out of memory, in that case, flush the encoder
+
+ if(ppmc_out_of_memory==1)
+ {
+ printf("Flushing memory! Output file might be not decodable.\n");
+ ppmc_flush_mem_enc();
+ }
+
+
+ }
+
+
+ // Flush range coder
+ range_coder_flush(&rc_coder);
+
+ // Free memory
+ ppmc_free_memory();
+
+
+ // Print bpb and kbyps
+ printf("%s at %f bpb.\n",argv[1],((float)filesize(file_output)/(float)size_file_input)*(float)8);
+
+
+ // Close file handles
+ fclose(file_input);
+ fclose(file_output);
+
+
+
+ // Nicely exit
+ return 0;
+}
+
+
+// Routines not used by ppmc but rather by main.
+// Not including the range coder.
+
+
+// Returns the file size of a given file.
+long filesize(FILE *stream)
+{
+ long curpos, length;
+
+ curpos = ftell(stream);
+ fseek(stream, 0L, SEEK_END);
+ length = ftell(stream);
+ fseek(stream, curpos, SEEK_SET);
+ return length;
+}
+
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "range.c"
+Email: arturo@arturocampos.com
+Web: http://www.arturocampos.com
+
+This module contains the routines of both the range coder and decoder.
+
+The range coder works internally in 32 bits, and uses bytes as symbols.
+Also the end of message symbol is used. So z=257.
+
+Both input and output use rc_file as the file stream. Of course we can't
+code and decode at the same time. All the input or output comes from the
+same file, no matter what range coder structure are we using. The modules
+here provided don't manage the io except for reading and writing, they
+don't open nor close the files. The reading and writing is done via
+putc and getc.
+*/
+
+#include "range.h"
+#include <stdlib.h>
+
+/*
+ Inits the range coder state. Must be called before encoding any symbol.
+ It uses a magic number 0xB3 as the first byte outputted.
+ -rangecoder *rc, the range coder to be used.
+
+ Shoulde be called like that:
+ range_coder_init(&o0_rc_state,file_output);
+ */
+void range_coder_init(rangecoder *rc, FILE *stream)
+{
+ rc_file=stream;
+ rc->low=0; //define state
+ rc->range=0x80000000;
+ rc->byte_buffer=0xB3; //magic number
+ rc->help=0; //temp value
+}
+
+
+/*
+ Encodes a symbol.
+ -rangecoder *rc, the range coder to be used.
+ -unsigned long tot_f, the maximum cumulative frequency
+ -unsigned long lt_f, the cumulative probabilty of the symbol
+ -unsigned long sy_f, the probability of the symbol
+ */
+void range_coder_encode(rangecoder *rc,unsigned long tot_f, unsigned long lt_f,unsigned long sy_f)
+{
+ unsigned long temp, r;
+
+ range_coder_renormalize(rc); //&rc?
+
+ r=rc->range/tot_f;
+ temp=r*lt_f;
+ if(lt_f+sy_f<tot_f)
+ rc->range=r*sy_f;
+ else
+ rc->range-=temp;
+ rc->low+=temp;
+}
+
+/*
+ Renormalizes the state when coding.
+ -rangecoder *rc, the range coder to be used.
+ */
+
+void range_coder_renormalize(rangecoder *rc)
+{
+ while(rc->range<=(unsigned long)0x00800000)
+ {
+ if(rc->low<(unsigned long)0x7F800000)
+ {
+ putc(rc->byte_buffer,rc_file);
+ for(;rc->help;rc->help--)
+ putc(0xFF,rc_file);
+ rc->byte_buffer=(unsigned char)(rc->low>>23);
+ }
+ else
+ {
+ if(rc->low&(unsigned long)0x80000000)
+ {
+ putc(rc->byte_buffer+1,rc_file);
+ for(;rc->help;rc->help--)
+ putc(0x00,rc_file);
+ rc->byte_buffer=(unsigned char)(rc->low>>23);
+ }
+ else
+ rc->help++;
+ }
+ rc->range<<=8;
+ rc->low=(rc->low<<8)&(unsigned long)(0x7FFFFFFF);
+ }
+}
+
+
+/*
+ Flushes the encoder. Must be called when the coding is done.
+ -rangecoder *rc, the range coder to be used.
+
+ Shoulde be called like that:
+ range_coder_flush(&o0_rc_state);
+ */
+void range_coder_flush(rangecoder *rc)
+{
+ unsigned long tmp;
+
+ range_coder_renormalize(rc);
+ tmp = rc->low >> 23;
+ if (tmp > 0xff)
+ {
+ putc(rc->byte_buffer+1,rc_file);
+ for(; rc->help; rc->help--)
+ putc(0,rc_file);
+ }
+ else
+ {
+ putc(rc->byte_buffer,rc_file);
+ for(; rc->help; rc->help--)
+ putc(0xff,rc_file);
+ }
+
+ putc(tmp & 0xff,rc_file);
+ putc((tmp = rc->low >> (23-8)) & 0xff,rc_file);
+}
+
+
+/*
+ Inits the range decoder state. Also checks for the magic number, and
+ quits in case it isn't the first, so be careful.
+ -rangecoder *rc, the range coder to be used.
+ */
+void range_decoder_init(rangecoder *rc, FILE *stream)
+{
+ unsigned int _rd_c;
+
+ rc_file=stream;
+ if((_rd_c=getc(rc_file))!=0xB3)
+ {
+ printf("\nThis is not range coded data. Magic number not found. Exiting.");
+ exit(1);
+ }
+ rc->byte_buffer=getc(rc_file);
+ rc->low=rc->byte_buffer>>1;
+ rc->range=0x80;
+}
+
+
+/*
+ Decode a symbol, get its cumulative probability.
+Input:
+-rangecoder *rc, the range coder to be used.
+-unsigned long tot_f, the maximum cumulative probability
+Output:
+-unsigned long, cumulative probability of the current symbol
+Should be called like that:
+current_cump=range_decoder_decode(&o0_rc_state,o0_tot_f);
+*/
+unsigned long range_decoder_decode(rangecoder *rc, unsigned long tot_f)
+{
+ unsigned long temp;
+
+ range_decoder_renormalize(rc);
+ rc->help=rc->range/tot_f;
+ temp=rc->low/rc->help;
+ if(temp>=tot_f)
+ return tot_f-1;
+ else
+ return temp;
+}
+
+
+/*
+ Updates the state so next symbol can be decoded.
+Input:
+-rangecoder *rc, the range coder to be used.
+-unsigned long tot_f, the maximum cumulative probability
+-unsigned long lt_f, the cumulative probabilty of the symbol
+-unsigned long sy_f, the probability of the symbol
+
+*/
+void range_decoder_update(rangecoder *rc, unsigned long tot_f, unsigned long lt_f,unsigned long sy_f)
+{
+ unsigned long temp;
+
+ temp=rc->help*lt_f;
+ rc->low-=temp;
+ if(lt_f+sy_f<tot_f)
+ rc->range=rc->help*sy_f;
+ else
+ rc->range-=temp;
+}
+
+
+/*
+ Renormalizes the state while decoding.
+ -rangecoder *rc, the range coder to be used.
+ */
+void range_decoder_renormalize(rangecoder *rc)
+{
+ while(rc->range<=0x00800000)
+ {
+ rc->low=(rc->low<<8)|((rc->byte_buffer<<7)&0xFF);
+ rc->byte_buffer=getc(rc_file);
+ rc->low |= rc->byte_buffer >> (1);
+ rc->range<<=8;
+ }
+}
+
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this file for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "range.h"
+ Email: arturo@arturocampos.com
+ Web: http://www.arturocampos.com
+
+ Declarations for the coder.
+*/
+
+#include <stdio.h>
+
+typedef struct{
+ unsigned long low, range, help;
+ unsigned char byte_buffer;
+}rangecoder;
+
+FILE *rc_file;
+
+void range_coder_init(rangecoder *rc, FILE *stream); //coding routines
+void range_coder_encode(rangecoder *rc,unsigned long tot_f, unsigned long lt_f,unsigned long sy_f);
+void range_coder_renormalize(rangecoder *rc);
+void range_coder_flush(rangecoder *rc);
+void range_decoder_init(rangecoder *rc, FILE *stream);//decoding routines
+unsigned long range_decoder_decode(rangecoder *rc, unsigned long tot_f);
+void range_decoder_update(rangecoder *rc, unsigned long tot_f, unsigned long lt_f,unsigned long sy_f);
+void range_decoder_renormalize(rangecoder *rc);
+
+
+typedef unsigned long code_value;
+#define CODE_BITS 32
+#define Top_value ((code_value)1 << (CODE_BITS-1))
+#define SHIFT_BITS (CODE_BITS - 9)
+#define EXTRA_BITS ((CODE_BITS-2) % 8 + 1)
+#define Bottom_value (Top_value >> 8)
+#define outbyte(cod,x) putc(x,stdout)
+#define inbyte(cod) getc(stdin)
--- /dev/null
+#!/bin/bash
+time ./ppmc "$1" "$1".ppmc
+time ./exclusion/ppmc "$1" "$1".ppmc-e
+time gzip -c "$1" "$2" > "$1".gz
+time bzip2 -c "$1" "$2" > "$1".bz2
+ls -l "$1".*
--- /dev/null
+/*
+ Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.
+ Permission is granted to make verbatim copies of this program for private
+ use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.
+
+ This file is: "unppmc.c"
+Email: arturo@arturocampos.com
+Web: http://www.arturocampos.com
+
+
+Part of the ppmc decoder.
+
+This module is the main module and calls the different modules to do
+the decoding of a file. When done prints kbyps.
+*/
+
+
+// Bibliotecas necesarias
+#include <stdio.h>
+#include <stdlib.h>
+#include "range.h" //the range coder functions and data
+#include "ppmcdata.h"
+
+
+// Declaracion de funciones del ppmcmain.c
+long filesize(FILE *stream);
+
+
+
+
+//Main
+void main (int argc, char *argv[])
+{
+ unsigned long counter, //temporal counter for loops like for or while
+ counter2, //another temporal counter for sub loops
+ size_file_output, //the size of the output file
+ main_counter; //used in main
+ char expected_flush=0; //used for checking flushing which can't be done
+
+
+ // Print title, version and copyright
+ printf("UNPPMC using range coder.\n");
+ printf("Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.\n");
+ printf("Permission is granted to make verbatim copies of this program for private\n");
+ printf("use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.\n");
+
+
+
+ // Check for correct number of parameters
+ if(argc!=3)
+ {
+ printf("Bad number of arguments.\n");
+ exit(1);
+ }
+
+
+ // Try to open input and output files
+ if((file_input=fopen(argv[1],"r+b"))==NULL)
+ {
+ printf("Couldn't open %s.\n",argv[1]);
+ exit(1);
+ }
+
+ if((file_output=fopen(argv[2],"w+b"))==NULL)
+ {
+ printf("Couldn't create %s.\n",argv[2]);
+ exit(1);
+ }
+
+
+ // Get output length
+ fread(&size_file_output,1,4,file_input);
+
+
+ // Initialize ppmc decoder
+ ppmc_alloc_memory();
+ ppmc_initialize_contexts();
+ ppmc_decoder_initialize();
+
+
+
+ // Initialize decoder
+ range_decoder_init(&rc_decoder,file_input);
+
+
+ // Start main loop which decodes the file
+ main_counter=size_file_output-4; //take in account the bytes already written
+ expected_flush=0; //we don't expect a flush yet
+
+ while(main_counter!=0)
+ {
+
+ // Try to decode current byte in order-4 if possible, else in lower ones
+ ppmc_decode_order4();
+ if(byte==-1)
+ ppmc_decode_order3();
+ if(byte==-1)
+ {
+ ppmc_decode_order2();
+ if(byte==-1)
+ {
+ ppmc_decode_order1();
+ if(byte==-1)
+ {
+ ppmc_decode_order0();
+ if(byte==-1) //check if it was an escape code
+ {
+ // Decode in order-(-1)
+ ppmc_get_totf_ordern1();
+ symb_cump=range_decoder_decode(&rc_decoder,total_cump);
+ byte=ppmc_get_symbol_ordern1();
+ ppmc_get_prob_ordern1();
+ range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob);
+ coded_in_order=0; //update all orders
+
+ // Now see if it's the code of flushing
+
+ if(symb_cump==256)
+ {
+ printf("Flushing.\n");
+ ppmc_flush_mem_dec();
+ expected_flush=0;
+ continue; //do not output byte nor update
+ }
+
+ }
+ }
+ }
+ }
+
+ // Output byte and update model
+
+ fputc(byte,file_output);
+
+ switch(coded_in_order) //update exclusion
+ {
+ case 0: ppmc_update_order0(); //update only order-0
+ case 1: ppmc_update_order1(); //update order-0 and order-1
+ case 2: ppmc_update_dec_order2(); //update order-0 1 and 2
+ case 3: ppmc_update_dec_order3();
+ case 4: ppmc_update_dec_order4();
+ default: break;
+ };
+
+
+ // Check if flushing has to be done and has not been done.
+ // This is optional, in case you limit the memory usage, you don't
+ // need to include this
+
+ if(expected_flush==1) // If flushing didn't happen, we can't decode
+ {
+ printf("Can't decompress file. Not enough memory.\nTry in a machine with more memory.\n");
+ exit(1);
+ }
+ if(ppmc_out_of_memory==1)
+ {
+ expected_flush=1; // Next code must be a flush code, otherwise we don't
+ // have enough memory, and therefore we can't decode
+ }
+
+
+ // Update order variables
+
+ o4_byte=o3_byte;
+ o3_byte=o2_byte;
+ o2_byte=o1_byte;
+ o1_byte=byte; //current one, is next time order-1
+
+ // Byte decoded and model updated, loop
+ main_counter--;
+
+
+ }
+
+
+ ppmc_free_memory();
+
+ // Close file handles and free memory
+ fclose(file_input);
+ fclose(file_output);
+
+
+ // Nicely exit
+ exit(0);
+}
+
+
+// Ruotines not used by ppmc but rather by main.
+// Not including the range coder.
+
+
+// Returns the file size of a given file.
+long filesize(FILE *stream)
+{
+ long curpos, length;
+
+ curpos = ftell(stream);
+ fseek(stream, 0L, SEEK_END);
+ length = ftell(stream);
+ fseek(stream, curpos, SEEK_SET);
+ return length;
+}
+
+
--- /dev/null
+# Makefile de ejemplo para C++
+#
+# Creado: jue abr 15 15:34:19 ART 2004
+#
+# Copyleft 2004 - Leandro Lucarella, Bajo licencia GPL [http://www.gnu.org/]
+#
+
+# CONFIGURACION
+################
+
+# Nombre del ejecutable.
+target = rle
+
+# Extensión de los archivos a compilar (c para C, cpp o cc o cxx para C++).
+extension = c
+
+# Archivos con el código fuente que componen el ejecutable. Si no se especifica,
+# toma todos los archivos con la extensión mencionada. Para especificar hay que
+# descomentar la línea (quitarle el '#' del principio).
+# NOTA: No poner cabeceras (.h).
+#fuentes = entrada.cpp
+
+# Si es un programa GTK+, descomentá (quitale el '#' a) la siguiente línea.
+#gtk = si
+
+
+# CONFIGURACION "AVANZADA"
+###########################
+
+# Opciones para el compilador C.
+#CFLAGS = -Wall -ggdb -ansi -pedantic -DDEBUG
+CFLAGS = -Wall -O3 -ansi -pedantic -DNDEBUG -g
+
+# Opciones para el compilador C++.
+#CXXFLAGS = $(CFLAGS) -fno-inline
+CXXFLAGS = $(CFLAGS)
+
+
+# VARIABLES CALCULADAS A PARTIR DE LA CONFIGURACION
+####################################################
+
+# Agrego flags y libs de GTK+ de ser necesario.
+ifdef gtk
+CFLAGS += $(shell pkg-config --cflags gtk+-2.0)
+CXXFLAGS += $(shell pkg-config --cflags gtk+-2.0)
+LDFLAGS += $(shell pkg-config --libs gtk+-2.0)
+endif
+
+# Uso enlazador de c++ si es código no C.
+ifeq ($(extension), c)
+enlazador = $(CC)
+else
+enlazador = $(CXX)
+endif
+
+# Si no especifica archivos, tomo todos.
+fuentes ?= $(wildcard *.$(extension))
+
+
+# REGLAS
+#########
+
+.PHONY: all clean
+
+all: $(target)
+
+o_files = $(patsubst %.$(extension),%.o,$(fuentes))
+
+$(target): $(o_files)
+ $(enlazador) $(LDFLAGS) $(o_files) $(LOADLIBES) $(LDLIBS) -o $(target)
+
+clean:
+ @$(RM) -fv *.o $(target)
+
--- /dev/null
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define SIZE 10
+#define BIT_SIZE 4
+#define MIN_LEN 2
+
+/* Insertar un char al final */
+void push_end(char *s, char c);
+int esta_en_memoria(char *memoria, char *pal, int *len);
+
+void out_ini(char *f);
+void out_bit(char b);
+void out_char(char c);
+void out_data(char len, char pos);
+void out_end();
+
+unsigned int out_buffer;
+FILE *out;
+int bit_count;
+
+int main(int argc, char *argv[])
+{
+ int mc, ic; /* Contadores */
+ char memoria[SIZE];
+ char inspeccion[SIZE];
+ char c, i; /* caracter leido */
+ FILE *fp;
+
+ /* inicio todo lindo */
+ memset(memoria, 1, SIZE);
+ memset(inspeccion, 1, SIZE);
+ mc = ic = 0;
+
+ fp = fopen(argv[1], "rt");
+
+ /* llego INSPECCION */
+ while (((c = fgetc(fp)) != EOF) && (ic < SIZE)) {
+ push_end(inspeccion, c);
+ ic++;
+ }
+
+ ungetc(c, fp);
+
+ out_ini(argv[2]);
+ /* Comienza el juevo */
+ while (c != EOF) {
+ int pos, len;
+ /* Busco */
+ pos = esta_en_memoria(memoria, inspeccion, &len);
+ if (pos != -1) {
+ /* La cadena se repite! */
+ printf("[0,%d,%d]\n", pos, len);
+ out_bit(0);
+ out_data(len, pos);
+
+ /* Tengo que meter caracteres */
+ for(i=0; i<len; i++) {
+ push_end(memoria, inspeccion[0]);
+ c=fgetc(fp);
+ if (c != EOF)
+ push_end(inspeccion, c);
+ }
+ } else {
+ /* la cadena no se repite, saco un bit 1 y el codigo ascii*/
+ printf("[1,%c]\n", inspeccion[0]);
+ out_bit(0);
+ out_char(inspeccion[0]);
+ /* Desplazo */
+ push_end(memoria, inspeccion[0]);
+ push_end(inspeccion, c=fgetc(fp));
+ }
+ }
+
+ /* llegue al eof, sigo hasta vaciar inspeccion */
+ while (ic > 0) {
+ int pos, len;
+ /* busco */
+ pos = esta_en_memoria(memoria, inspeccion, &len);
+ if (pos != -1) {
+ /* La cadena se repite! */
+ printf("[0,%d,%d]\n", pos, len);
+ out_bit(0);
+ out_data(len, pos);
+
+ /* Tengo que meter caracteres */
+ for(i=0; i<len; i++) {
+ push_end(memoria, inspeccion[0]);
+ push_end(inspeccion, '\0');
+ }
+ ic -= len;
+ } else {
+ /* la cadena no se repite, saco un bit 1 y el codigo ascii*/
+ printf("[1,%c]\n", inspeccion[0]);
+ out_bit(0);
+ out_char(inspeccion[0]);
+ /* Desplazo */
+ push_end(memoria, inspeccion[0]);
+ push_end(inspeccion, '\0');
+ ic--;
+ }
+ }
+
+ out_end();
+ fclose(fp);
+ printf("\n");
+ return 0;
+}
+
+void push_end(char *s, char c)
+{
+ int i;
+ for(i=0; i<SIZE-1; i++)
+ s[i] = s[i+1];
+ s[SIZE-1] = c;
+}
+
+int esta_en_memoria(char *memoria, char *pal, int *len)
+{
+ int i,j,k;
+ int len_max, pos_max=-1;
+
+ i=j=0;
+ k=0;
+ len_max = -1;
+ while (i<SIZE) {
+ if (memoria[i] == pal[j]) {
+ k++;
+ if (k>len_max) {
+ pos_max = i-k+1;
+ len_max = k;
+ }
+
+ j++;
+ } else {
+ j = 0;
+ k = 0;
+ }
+ i++;
+ }
+
+ if (len_max >= MIN_LEN) {
+ (*len) = len_max;
+ return pos_max;
+ }
+ return -1;
+}
+
+void out_bit(char b)
+{
+ char c;
+ if (bit_count+1 >= 32) {
+ /* Tengo que hacer lugar! , saco 1 byte*/
+ c = (out_buffer >> (bit_count-8));
+ bit_count -= 8;
+ /* lo grabo en el archivo */
+ fwrite(&c, 1, 1, out);
+ }
+
+ bit_count++;
+ out_buffer <<= 1;
+ out_buffer |= (0x1&b);
+}
+
+void out_char(char c)
+{
+ char cc;
+ if (bit_count+8 >= 32) {
+ /* Tengo que hacer lugar! , saco 1 byte */
+ cc = (out_buffer >> (bit_count-8));
+ bit_count -= 8;
+ /* lo grabo en el archivo */
+ fwrite(&cc, 1, 1, out);
+ }
+
+ bit_count+=8;
+ out_buffer <<= 8;
+ out_buffer |= c;
+}
+
+void out_data(char len, char pos)
+{
+ char cc;
+ char b;
+ while (bit_count+BIT_SIZE*2 >= 32) {
+ /* Tengo que hacer lugar! , saco 1 byte */
+ cc = (out_buffer >> (bit_count-8));
+ bit_count -= 8;
+ /* lo grabo en el archivo */
+ fwrite(&cc, 1, 1, out);
+ }
+
+ b = 0x0;
+ b = ((0x3|pos)<<3) & ((0x3|len));
+ bit_count+=BIT_SIZE*2;
+ out_buffer <<= BIT_SIZE*2;
+ out_buffer |= b;
+}
+
+void out_ini(char *f)
+{
+ out = fopen(f, "wb");
+ bit_count = 0;
+ out_buffer = 0;
+}
+
+void out_end()
+{
+ char cc;
+ /* Saco lo que falte */
+ while (bit_count > 8) {
+ cc = (out_buffer >> (bit_count-8));
+ bit_count -= 8;
+ fwrite(&cc, 1, 1, out);
+ }
+
+ if (bit_count > 0) {
+ cc = (out_buffer >> bit_count);
+ fwrite(&cc, 1, 1, out);
+ }
+ fclose(out);
+}
+