From 720b7c10f4f1804a9c058df886078e2e51be1fc3 Mon Sep 17 00:00:00 2001 From: Leandro Lucarella Date: Thu, 17 Jun 2004 05:06:42 +0000 Subject: [PATCH] Minioptimizacion para que la tabla de orden-(-1) se ajuste mejor a archivos de texto. Sirve de poco y nada. --- src/ppmc/luca/Makefile | 18 + src/ppmc/luca/ppmc.c | 3122 ++++++++++++++++++++++++++++++++++++++ src/ppmc/luca/ppmc.h | 135 ++ src/ppmc/luca/ppmcdata.c | 119 ++ src/ppmc/luca/ppmcdata.h | 117 ++ src/ppmc/luca/ppmcmain.c | 176 +++ src/ppmc/luca/range.c | 212 +++ src/ppmc/luca/range.h | 39 + src/ppmc/luca/unppmc.c | 204 +++ 9 files changed, 4142 insertions(+) create mode 100644 src/ppmc/luca/Makefile create mode 100644 src/ppmc/luca/ppmc.c create mode 100644 src/ppmc/luca/ppmc.h create mode 100644 src/ppmc/luca/ppmcdata.c create mode 100644 src/ppmc/luca/ppmcdata.h create mode 100644 src/ppmc/luca/ppmcmain.c create mode 100644 src/ppmc/luca/range.c create mode 100644 src/ppmc/luca/range.h create mode 100644 src/ppmc/luca/unppmc.c diff --git a/src/ppmc/luca/Makefile b/src/ppmc/luca/Makefile new file mode 100644 index 0000000..887aa40 --- /dev/null +++ b/src/ppmc/luca/Makefile @@ -0,0 +1,18 @@ + +TARGETS=ppmc unppmc +COMMON= ppmc.o ppmcdata.o range.o + +CFLAGS=-O3 -Wall -DNDEBUG + +all: $(TARGETS) + +ppmc: $(COMMON) ppmcmain.c + +unppmc: $(COMMON) unppmc.c + +clean: + + @$(RM) -f *.o $(TARGETS) + +.PHONY: all clean + diff --git a/src/ppmc/luca/ppmc.c b/src/ppmc/luca/ppmc.c new file mode 100644 index 0000000..371c39e --- /dev/null +++ b/src/ppmc/luca/ppmc.c @@ -0,0 +1,3122 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmc.c" + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Part of the ppmc encoder and decoder. + + This module contains the whole ppmc encoder. It uses hash tables for + managing most of the orders. And a maximum order of 4. It codes bytes. + Order-1-0-(-1) are all handled in tables. Order-2 has a table with + direct hashing with pointers to the linked lists. Order-4 and order-3 + both have hash tables with pointers to contexts in a linked lists which + finally have a pointer to the start of the linked list with the + probability distribution. Update exclusion is used, but exclusion is not. + + Please, note that if the machine where the decoder is run doesn't has as + much memory as the computer where the encoder was ran, the decoder will + not be able to properly decode the file, because it will not be able to + keep track of new statistics, in this case it will just exit. + + For applications where the loss of data is not admisible, I suggest you to + limit both encoder and decoder's memory requeriments to a given minimum + which both machines have. +*/ + + +#include +#include +#include "range.h" +#include "ppmcdata.h" + +static unsigned ordern1_table[] = { + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 4303574, 88451153, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 143318, 9172380, 1146547, 100, 143318, 573273, 859910, 10318928, + 10318928, 6019374, 573273, 26657231, 1576502, 43138853, 10462246, 3439642, + 6162693, 6162693, 6162693, 6162693, 6162693, 6162693, 6162693, 6162693, + 6162693, 6162693, 8742425, 4299553, 14188526, 3439642, 9888973, 429955, + 143318, 27230505, 2436413, 22357678, 10892202, 40989076, 1576502, 1433184, + 286636, 37406115, 573273, 286636, 19204672, 13901889, 19491309, 10462246, + 16481621, 1003229, 21211130, 13615252, 17914806, 2866368, 4012916, 429955, + 1719821, 1146547, 143318, 286636, 143318, 286636, 143318, 1576502, + 143318, 315013952, 32246651, 165962764, 171122228, 418203235, 23790862, 26800550, + 17054895, 230742703, 7452559, 1003229, 163669669, 75385504, 225153284, 261412851, + 88140846, 24794091, 215837585, 223003507, 162523121, 110355206, 38122707, 5302782, + 7595877, 18918035, 8885743, 859910, 100, 859910, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, +}; +#define ordern1_total_cump 3651941255u + +// Ruotines used by ppmc. Not including the range coder. +// +// They are for initializing of both encoder and decoder, and unless there +// are two version, both encoder and decoder use the same routines. Like +// "ppmc_initialize_contexts". + + +// This one allocs the memory needed by ppmc, and adjust some pointers used +// for allocating elements in the linked lists. The mempool arrays must be +// initialized now. +void ppmc_alloc_memory(void) +{ + unsigned long counter; + + + // Init mempool buffers + + for(counter=0;counter!=_mempool_max_index;++counter) + { + _bytes_pool_array[counter]=0; + _context_pool_array[counter]=0; + } + + _bytes_pool_index=1; //first entry will be used now + _context_pool_index=1; + + + // Allocate memory for ppmc structures and adjust some variables + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + + //save pointers in the array for freeing + _bytes_pool_array[0]=_bytes_pool; + _context_pool_array[0]=_context_pool; + + + //adjust variables + _bytes_pool_max=_bytes_pool+_bytes_pool_elements; + _context_pool_max=_context_pool+_context_pool_elements; + + ppmc_out_of_memory=0; //we still have memory +} + + +// This routine initializes all the contexts, and all the tables including +// those who care about the number of bytes defined in a context. +void ppmc_initialize_contexts(void) +{ + unsigned long counter, counter2; + + + // Order-0 + for(counter=0;counter!=256;++counter) //clear table + order0_array[counter]=0; + + order0_defined_bytes=0; //adjust variables + order0_max_cump=0; + + + // Order-1 + for(counter=0;counter!=256;++counter) //erase every table of every context + for(counter2=0;counter2!=256;++counter2) + order1_array[counter][counter2]=0; + + for(counter=0;counter!=256;++counter) //adjust variables + { + order1_defined_bytes_array[counter]=0; + order1_max_cump_array[counter]=0; + } + + + // Order-2 + for(counter=0;counter!=65536;++counter) + { + //order2_array[counter].prob=0; //clear pointer to bytes and frequencies + //order2_array[counter].max_cump=0; + order2_array[counter].defined_bytes=0; + } + + + // Order-4-3 + for(counter=0;counter!=65536;++counter) //order-4-3 + { + order4_hasht[counter]=0; + order3_hasht[counter]=0; + } +} + + +// This routine initializes the encode model by outputting as many bytes as +// needed to prepare the models. This should be called before the main loop +// and after the memory has been allocated and tables initialized. +// +// It does not need uses the range coder. It output the first 1 bytes. +void ppmc_encoder_initialize(void) +{ + + // Initialize order-0 and prepare different bytes for orders + fputc((byte=fgetc(file_input)),file_output); + o4_byte=byte; //order-4 + + fputc((byte=fgetc(file_input)),file_output); + o3_byte=byte; //order-3 + + fputc((byte=fgetc(file_input)),file_output); + o2_byte=byte; //order-2 + ppmc_update_order0(); + + fputc((byte=fgetc(file_input)),file_output); + o1_byte=byte; + +} + + +// This routine initializes the decoder model, should be called to do the same +// changes as "ppmc_encoder_initialize()" did. +void ppmc_decoder_initialize(void) +{ + + // Initialize order-0 and context bytes + byte=fgetc(file_input); + o4_byte=byte; //order-4 + fputc(byte,file_output); + + byte=fgetc(file_input); + o3_byte=byte; //order-3 + fputc(byte,file_output); + + byte=fgetc(file_input); + o2_byte=byte; //order-2 + + fputc(byte,file_output); //output first byte + ppmc_update_order0(); + + byte=fgetc(file_input); + o1_byte=byte; //order-1 + fputc(byte,file_output); +} + + +// Once coding or decoding is finished you have to call this routine. +// It must be called when done. +void ppmc_free_memory(void) +{ + unsigned long counter; + + // Free the memory buffers + + for(counter=0;counter!=_mempool_max_index;++counter) + { + if(_bytes_pool_array[counter]!=0) + free(_bytes_pool_array[counter]); + + if(_context_pool_array[counter]!=0) + free(_context_pool_array[counter]); + } + +} + + +// This routine flushes the memory and restarts all the tables of +// probabilities, current order bytes are not modified, this function +// is called when we ran out of memory. We have to output the code +// number 256 which means memory flushing, for doing this we have to go +// to order-(-1) so we have to output an escape code in all the orders +// till we reach order-(-1) where we can code it. Then we free all the +// memory, alloc it again, and reinitialize all the orders. +// +// However we may find the case when the current order is not initialized, +// in this case we don't need to output an escape code. +void ppmc_flush_mem_enc(void) +{ + + // Code an escape code in order-4 + if(ppmc_get_totf_order4()!=0) //if 0 no need of escape code + { + + ppmc_get_escape_prob_order4(); //get prob and cump + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + } + + + // Code an escape code in order-3 + if(ppmc_get_totf_order3()!=0) //if 0 no need of escape code + { + + ppmc_get_escape_prob_order3(); //get prob and cump + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + } + + + // Code an escape code in order-2 + + o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte); + + // First check if current order-2 context is empty + if(order2_array[o2_cntxt].defined_bytes!=0) //it's not empty + { + ppmc_get_totf_order2(); + ppmc_get_escape_prob_order2(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + } + + + // Code an escape code in order-1 + + // First check if current order-1 table is empty + if(order1_defined_bytes_array[o1_byte]!=0) //it's not empty + { + ppmc_get_totf_order1(); + ppmc_get_escape_prob_order1(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + } + + + // Code an escape code in order-0. Order-0 always has at least one symbol + + ppmc_get_totf_order0(); + ppmc_get_escape_prob_order0(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + + + // Now we can code the code 256 + + symb_prob=1; + symb_cump=256; + total_cump=257; + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + + // Now that decoder knows the flushing, free memory and reinit + + ppmc_free_memory(); + ppmc_alloc_memory(); + ppmc_initialize_contexts(); + + + // Be sure that order-0 has at least one probability + + order0_array[o1_byte]++; + order0_max_cump++; + order0_defined_bytes++; + +} + + +// When the decoder gets the symbol of flushing, most of the job is done +// because we already got all the escape codes, so we only have to reinit. +void ppmc_flush_mem_dec(void) +{ + + // Free memory and reinit + + ppmc_free_memory(); + ppmc_alloc_memory(); + ppmc_initialize_contexts(); + + + // Be sure that order-0 has at least one probability + + order0_array[o1_byte]++; + order0_max_cump++; + order0_defined_bytes++; + + +} + + + +// ORDER-(-1) functions, also called ordern1 (Negative1) in functions +// +// Because order-(-1) does not need to update its probability tables, it +// has no tables, and relies on the fact that the cump of byte is its own +// value, and the probability is fixed, 1, and the total cump is 257. +// +// The alphabet has the following distribution: 0-255 the bytes. 256 is +// an special symbol which means that we have flushed the encoder tables, +// and thus the encoder must flush its tables too. +// +// The rest of the tables only have 256 symbols, because we have no need +// of assign a symbol to the flush code (which already is the order-(-1) +// table) nor to the escape code. + + +// Gets the probability for a given symbol in the order-(-1) (ordern1) +void ppmc_get_prob_ordern1(void) +{ + int i; + unsigned acum = 0; + for (i = 0; i < 256 || i == byte; ++i) + acum += ordern1_table[i]; + symb_cump=acum; + symb_prob=ordern1_table[i]; + total_cump=ordern1_total_cump; +} + + +// Returns in the variable "total_cump" the current total cump of +// order-(-1) +void ppmc_get_totf_ordern1(void) +{ + total_cump=ordern1_total_cump; +} + + +// Returns the symbol for a given cump under order-(-1) +unsigned long ppmc_get_symbol_ordern1 (void) +{ + return symb_cump; +} + + + +// ORDER-0 functions +// +// Due to the fact that order-0 has no context, I use an array for all the +// probabilities under order-0, just as you could do in a basic model for +// arithmetic coding. +// +// The main array is: order0_array. Where order0_array[byte] contains the +// probability for a given byte. The same applies to order-1. +// +// To ensure that the updating and coding is done correctly, "byte" can't +// be changed till all the coding and updating is done. + + +// Returns in the variable "total_cump" the current total cump of +// order-0 +void ppmc_get_totf_order0(void) +{ + // Total cump is current total cump plus the escape for the escape code + total_cump=order0_defined_bytes+order0_max_cump; +} + + +// Codes a byte under order-0 and returns 1, otherwise it returns a 0 and +// has coded an escape code. In this case further coding is needed. +// +// Returns: 1 in case a byte was coded. 0 in case of escape code. +char ppmc_code_byte_order0(void) +{ + unsigned long counter; + + ppmc_get_totf_order0(); //get total cump + + // See if the byte is present + if(order0_array[byte]==0) //a probability of 0 + { + + // Because it was not present, output an escape code, prepare variables + + symb_cump=order0_max_cump; //obviously its cump is current max_cump + //without escape code's space + + symb_prob=order0_defined_bytes; //the number of defined bytes + + // Code the escape code + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 0; //byte not coded + } + else + { + + coded_in_order=0; + + // The symbol is present, code it under order-0 + + symb_prob=order0_array[byte]; //get probability directly + + // Make cump for current symbol + + symb_cump=0; //for first symbol is 0 + for(counter=0; counter!=byte ; ++counter) + symb_cump+=order0_array[counter]; //sum probabilities before our symbol + + // Code the symbol + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 1; //symbol coded under order-0 + } +} + + +// This functions update order-0 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +void ppmc_update_order0(void) +{ + if(order0_array[byte]==0) + { + // It had a zero probability + order0_array[byte]++; //increment symbol probability + ++order0_defined_bytes; //new byte defined + ++order0_max_cump; //total cump + return; + } + else + { + // It had a non-zero probability + + // Increment its probability + order0_array[byte]++; //increment symbol probability + ++order0_max_cump; //total cump + + // Check to see if its the maximum in this case renormalize + if(order0_array[byte]==255) + ppmc_renormalize_order0(); + + return; + } +} + + +// This functions renormalizes the probabilities at order-0 updating variables +void ppmc_renormalize_order0(void) +{ + unsigned long counter; + + // Initialize variables + order0_defined_bytes=0; //clear them + order0_max_cump=0; + + // Loop over all probabilities, divide them by a factor of 2 and update variables + for(counter=0 ; counter!=256 ; ++counter) + { + order0_array[counter]>>=1; //divide by a factor of 2 + + if(order0_array[counter]!=0) //see if it has a non zero probability + order0_defined_bytes++; + + order0_max_cump+=order0_array[counter]; //sum to the total cump + } +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of a escape code it returns -1 +void ppmc_decode_order0(void) +{ + unsigned long current_cump, counter; + + + // Get the total cump needed for decoding symbol + ppmc_get_totf_order0(); //total cump needed for decoding + symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=order0_max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order0(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + for(counter=0 ; counter!= 256 ; ++counter) + { + if(symb_cump>=1; //divide by a factor of 2 + + if(order1_array[o1_byte][counter]!=0) //see if it has a non zero probability + order1_defined_bytes_array[o1_byte]++; + + order1_max_cump_array[o1_byte]+=order1_array[o1_byte][counter]; //sum to the total cump + } +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +void ppmc_decode_order1(void) +{ + unsigned long current_cump, counter; + + + // First check if current order-1 table is empty + if(order1_defined_bytes_array[o1_byte]==0) //it's empty + { + byte=-1; //byte not coded, nothing done + return; + } + + + // Get the total cump needed for decoding symbol + ppmc_get_totf_order1(); //total cump needed for decoding + symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=order1_max_cump_array[o1_byte]) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order1(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + for(counter=0 ; counter!= 256 ; ++counter) + { + if(symb_cumpbyte==byte) + goto ppmc_o2_byte_found; //bad thing, I know, anyone has a better idea? + symb_cump+=node->freq; //add the probability of this byte to the cump + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + + // If we reach that point, the byte was not found in the linked list + // so we don't need the cump, we have to output an escape code, whose + // probabilities are know using the context structure in the table. + + // Byte was not present in the linked list, current node is the last one, + // and that's the node needed for creating a new node, save it. + + o2_ll_node=node; + + // Now get the probability and cump of the escape code + + symb_cump=order2_array[o2_cntxt].max_cump; + symb_prob=order2_array[o2_cntxt].defined_bytes; + + // Code the escape code + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 0; + + + // That code is executed when the byte is found in the linked list + +ppmc_o2_byte_found: + + + // Everything has been tested, now we can feel free to code the byte, + // the symb_cump is already computed, now get its probability and code + // the byte, also save pointer to this element in the linked lists for + // updating. + + coded_in_order=2; //successfully coded under order-2 + + o2_ll_node=node; //save it for updating + + symb_prob=node->freq; //get the probability of the byte + + // Code it. + + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 1; //byte coded under order-2 +} + + +// This functions update order-2 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +// Of course "o2_ll_node" must point to the element to update or the last one, +// based on the "coded_in_order" and checking the pointer of the element we +// can decide what to do. +// +// This updating is only for encoding. +void ppmc_update_order2(void) +{ + + // First of all check if that's the first byte in this context, in that case + // we have to initialize some variables in the context structure. + + if(order2_array[o2_cntxt].defined_bytes==0) //no byte defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + order2_array[o2_cntxt].defined_bytes=1; + order2_array[o2_cntxt].max_cump=1; + order2_array[o2_cntxt].prob=_bytes_pool; + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + // The byte was coded under order two, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==2) //coded under order-2 + { + + // Update its count and variables of this context and check for renormalization + + o2_ll_node->freq++; //increment its frequency (rather probability) + + order2_array[o2_cntxt].max_cump++; //total cump + + if(o2_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order2(); //renormalize + + } + else + { + + // Once every paranoid check has been done we are sure that this byte + // did not existed and so we have to create a new node in the linked + // list. Also we have to take care of memory issues. + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + o2_ll_node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + order2_array[o2_cntxt].max_cump++; //total cump + order2_array[o2_cntxt].defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + +} + + +// This functions renormalizes the probabilities at order-2 updating context +// variables. +void ppmc_renormalize_order2(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + // Initialize variables. Defined bytes remain the same. + order2_array[o2_cntxt].max_cump=0; //clear them + + node=order2_array[o2_cntxt].prob; //get pointer to lined lists + + // Divide all the probabilities by 2 and update context variables + while(1) + { + node->freq>>=1; //divide by a factor of 2 + + if(node->freq==0) //don't allow a probability to be 0 + node->freq=1; + + order2_array[o2_cntxt].max_cump+=node->freq; //sum to the total cump + + if(node->next==0) //last element + break; + node=node->next; + } + + + //printf("\nRenormalization, context:%c%c",o2_byte,o1_byte); + +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +// +// If we decode an escape code, we don't modify "o2_ll_node" and thus its +// work of the updating routine to read till the end of the linked list +// (for adding a new element) however if we decode a byte, we save on +// "o2_ll_node" the pointer to its node. (so updating is faster) +void ppmc_decode_order2(void) +{ + unsigned long current_cump, counter; + struct _byte_and_freq *node; + + // Initialize o2_cntxt + + o2_cntxt=ppmc_order2_hash_key(o1_byte,o2_byte); + + + // First check if current order-2 context is empty + if(order2_array[o2_cntxt].defined_bytes==0) //it's empty + { + byte=-1; //byte not coded, nothing done + return; + } + + + // Get the total cump needed for decoding symbol + ppmc_get_totf_order2(); //total cump needed for decoding + symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=order2_array[o2_cntxt].max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order2(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + node=order2_array[o2_cntxt].prob; //get pointer to linked lists + + while(1) + { + current_cump+=node->freq; //update cump + if(symb_cumpnext; //next element + //we have no need to check for the last symbol, we'll never read further + //the end of the linked lists, before we'll found the last byte. + } + + + //read byte value and probability + + symb_prob=node->freq; //get the probability for updating the state + byte=node->byte; //get byte + o2_ll_node=node; //used for updating + + + // Get the cump of byte + + symb_cump=current_cump-symb_prob; + + // Update coding state + + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Update coded_in_order used for update exclusion + + coded_in_order=2; + + return; + } + +} + + +// This is the routine for updating while decoding. We have to search the byte +// in the linked list, if it's present, update its count, otherwise we have +// hitted the end of the linked list, and there we have to create a new node. +// +// Of course if the byte was matched in order-2 we'll have a pointer to it +// in "o2_ll_node" so we don't need to read the linked list. (we already did +// in decoding) +// +// Another case which we also have to specially deal with, this is the case +// when the context has not been initalized yet. +void ppmc_update_dec_order2(void) +{ + struct _byte_and_freq *node; + + + // Handle the case when the context is not initialized + // This code is the same as the one for the encoding. + + if(order2_array[o2_cntxt].defined_bytes==0) //no byte defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + order2_array[o2_cntxt].defined_bytes=1; + order2_array[o2_cntxt].max_cump=1; + order2_array[o2_cntxt].prob=_bytes_pool; + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + + return; //nothing else to do + } + + + // Current context is initalized, proceed + + if(coded_in_order==2) //check if it was decoded under order-2 + { + + // We can be sure that the pointer "o2_ll_node" points to its entry, and + // it has a non 0 probability (otherwise it couldn't be coded) so just + // update its probability and max_cump + + o2_ll_node->freq++; //the probability of the byte + order2_array[o2_cntxt].max_cump++; //the max_cump + + if(o2_ll_node->freq==255) //check for renormalization + ppmc_renormalize_order2(); + + } + else + { + + // An escape code was decoded under order-2, we have to read till the + // end of the linked list so we can add a new node for this new byte. + + node=order2_array[o2_cntxt].prob; //get pointer to linked list + + while(1) + { + if(node->next==0) //check for the end of the linked list + break; + node=node->next; //next node + } + + + // We reached the end of the linked list, add a new node if possible, + // we are using the same code of "ppmc_update_order2()" with the + // difference that the pointer to the linked list is "node" + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + order2_array[o2_cntxt].max_cump++; //total cump + order2_array[o2_cntxt].defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //we are finished updating + + } + +} + + +// This function returns the probability for the escape codes in the variables +void ppmc_get_escape_prob_order2(void) +{ + // To understand that remember that the escape allocated for the escape code + // is above the current maximum cump and that it has a probability determined + // by the scheme C. + + symb_prob=order2_array[o2_cntxt].defined_bytes; + symb_cump=order2_array[o2_cntxt].max_cump; +} + + + +// ORDER-3 functions +// +// The difference between order-3 and order-3 are just a few, instead of +// keeping a table with the context structures, we keep a hash table with +// pointers to linked lists with the context, so it's only a matter of +// searching current context in the linked list corresponding to its hash +// entry. This is done in "ppmc_get_totf_order3" because that's the first +// routine that both encoding and decoding routines call. + + +// Returns in the variable "total_cump" the current total cump of +// order-3. Must be called while encoding or decoding before anything else +// because it initializes the pointers to the context structure in +// "o3_context" and o3_cntxt. +// +// If the hash entry is not initialized it returns "o3_context"=0 +// If the context is not present in the linked list of context, "o3_context" +// will point to the last element in the linked list. +// If the context is present "o3_context" will point to the context to use. +// One can distinguish the last two by checking the context value of the +// structure, if it's not the same, is the last element. +// +// The routine returns 0 if the hash entry is not initialized or if the +// the context was not present. Otherwise it returns 1, meaning that we +// have to code under this context. +char ppmc_get_totf_order3(void) +{ + struct context *cntxt_node; + + + // First make the hash key for order-3 + + o3_cntxt=ppmc_order3_hash_key(o1_byte,o2_byte,o3_byte); + full_o3_cntxt=(o1_byte)+(o2_byte<<8)+(o3_byte<<16); //order-3 + + + // Now check the hash entry in the table + + if(order3_hasht[o3_cntxt]==0) //if 0, not initialized + { + + o3_context=0; //no hash entry + + return 0; //hash entry not initialized + } + + + // Now read trough the linked list of context searching current one + + cntxt_node=order3_hasht[o3_cntxt]; + + while(1) + { + + if(cntxt_node->order4321==full_o3_cntxt) //compare context + goto ppmc_gtf_cntxt_found; + + if(cntxt_node->next==0) //end of context's linked list + break; + + cntxt_node=cntxt_node->next; //next element + + } + + + // Once there the context was not found + o3_context=cntxt_node; //pointer to last element in the linked list + + return 0; //it was not present + + + // The context is found, so return pointer and cump + +ppmc_gtf_cntxt_found: + + o3_context=cntxt_node; + + // Total cump is current total cump plus the escape for the escape code + + total_cump=o3_context->defined_bytes+o3_context->max_cump; + + return 1; //context found + +} + + +// Codes a byte under order-3 and returns 1. +// Otherwise it returns a 0. +// +// In case the byte is coded under this context, coded_in_order=3. +char ppmc_code_byte_order3(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order3()==0) + return 0; + + + // See if the byte is present and compute its cump at the same time + + node=o3_context->prob; //pointer to first element in the linked list + + symb_cump=0; //the first symbol always has a 0 cump + + + // Now search the byte in the linked list + + do{ + if(node->byte==byte) + goto ppmc_o3_byte_found; //bad thing, I know, anyone has a better idea? + symb_cump+=node->freq; //add the probability of this byte to the cump + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + + // If we reach that point, the byte was not found in the linked list + // so we don't need the cump, we have to output an escape code, whose + // probabilities are know using the context structure in the table. + + // Byte was not present in the linked list, current node is the last one, + // and that's the node needed for creating a new node, save it. + + o3_ll_node=node; + + // Now get the probability and cump of the escape code + + symb_cump=o3_context->max_cump; + symb_prob=o3_context->defined_bytes; + + // Code the escape code + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 0; + + + // That code is executed when the byte is found in the linked list + +ppmc_o3_byte_found: + + + // Everything has been tested, now we can feel free to code the byte, + // the symb_cump is already computed, now get its probability and code + // the byte, also save pointer to this element in the linked lists for + // updating. + + coded_in_order=3; //successfully coded under order-3 + + o3_ll_node=node; //save it for updating + + symb_prob=node->freq; //get the probability of the byte + + // Code it. + + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 1; //byte coded under order-3 +} + + +// This functions update order-3 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +// +// "o3_ll_node" must point to the element to update or the last one, +// based on the "coded_in_order" and checking the pointer of the element we +// can decide what to do. Also "o3_context" must be initialized. +// +// This updating is only for encoding. +void ppmc_update_order3(void) +{ + + // First thing first, check if the hash entry is initialized + + if(order3_hasht[o3_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order3_hasht[o3_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + + + // The byte was coded under order three, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==3) //coded under order-3 + { + + // Update its count and variables of this context and check for renormalization + + o3_ll_node->freq++; //increment its frequency (rather probability) + + o3_context->max_cump++; //total cump + + if(o3_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order3(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o3_context" points to the last element, so we can put the new element. + + if(o3_context->order4321==full_o3_cntxt) //chech if that's the last + { //element or the a context found + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + o3_ll_node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o3_context->max_cump++; //total cump + o3_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o3_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + +// This functions renormalizes the probabilities at order-3 updating context +// variables. +void ppmc_renormalize_order3(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Initialize variables. Defined bytes remain the same. + o3_context->max_cump=0; //clear them + + node=o3_context->prob; //get pointer to lined lists + + // Divide all the probabilities by 2 and update context variables + while(1) + { + node->freq>>=1; //divide by a factor of 2 + + if(node->freq==0) //don't allow a probability to be 0 + node->freq=1; + + o3_context->max_cump+=node->freq; //sum to the total cump + + if(node->next==0) //last element + break; + node=node->next; + } + + +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +// +// If we decode an escape code, we don't modify "o3_ll_node" and thus its +// work of the updating routine to read till the end of the linked list +// (for adding a new element) however if we decode a byte, we save on +// "o3_ll_node" the pointer to its node. (so updating is faster) +void ppmc_decode_order3(void) +{ + unsigned long current_cump, counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order3()==0) + { + byte=-1; + return; + } + + + // Decode current cump + + symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=o3_context->max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order3(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + node=o3_context->prob; //get pointer to linked lists + + while(1) + { + current_cump+=node->freq; //update cump + if(symb_cumpnext; //next element + //we have no need to check for the last symbol, we'll never read further + //the end of the linked lists, before we'll found the last byte. + } + + + //read byte value and probability + + symb_prob=node->freq; //get the probability for updating the state + byte=node->byte; //get byte + o3_ll_node=node; //used for updating + + + // Get the cump of byte + + symb_cump=current_cump-symb_prob; + + // Update coding state + + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Update coded_in_order used for update exclusion + + coded_in_order=3; + + return; + } + +} + + +// This is the routine for updating while decoding. The only difference with +// the routine for coding is that when an escape code was coded, "o3_ll_node" +// is not initialized so we have to read till the end of the linked list. +// Fortunately "o3_context" will be initialized so we don't need to read its +// linked list. +void ppmc_update_dec_order3(void) +{ + struct _byte_and_freq *node; + + // First thing first, check if the hash entry is initialized + + if(order3_hasht[o3_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order3_hasht[o3_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + // The byte was coded under order three, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==3) //coded under order-3 + { + + // Update its count and variables of this context and check for renormalization + + o3_ll_node->freq++; //increment its frequency (rather probability) + + o3_context->max_cump++; //total cump + + if(o3_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order3(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o3_context" points to the last element, so we can put the new element. + + if(o3_context->order4321==full_o3_cntxt) //chech if that's the last + { //element or the a context found + + // Read till the end of the linked list + + node=o3_context->prob; //get pointer to linked list + + while(1) + { + if(node->next==0) //check for the end of the linked list + break; + node=node->next; //next node + } + + // Now add element + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o3_context->max_cump++; //total cump + o3_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o3_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o3_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + + +// This function returns the probability for the escape codes in the variables +void ppmc_get_escape_prob_order3(void) +{ + // To understand that remember that the escape allocated for the escape code + // is above the current maximum cump and that it has a probability determined + // by the scheme C. + + symb_prob=o3_context->defined_bytes; + symb_cump=o3_context->max_cump; +} + + + +// ORDER-4 functions +// +// The routines for order-4 are *equal* to those for order-3, there are a few +// changes like different global variables, and different hash keys. +// +// If you want to go to higher orders, you'd use the same code and data +// structures, with the difference of the context bytes (order4321) +// stored in every context's linked list. + + +// Returns in the variable "total_cump" the current total cump of +// order-4. Must be called while encoding or decoding before anything else +// because it initializes the pointers to the context structure in +// "o4_context" and o4_cntxt. +// +// If the hash entry is not initialized it returns "o4_context"=0 +// If the context is not present in the linked list of context, "o4_context" +// will point to the last element in the linked list. +// If the context is present "o4_context" will point to the context to use. +// One can distinguish the last two by checking the context value of the +// structure, if it's not the same, is the last element. +// +// The routine returns 0 if the hash entry is not initialized or if the +// the context was not present. Otherwise it returns 1, meaning that we +// have to code under this context. +char ppmc_get_totf_order4(void) +{ + struct context *cntxt_node; + + + // First make the hash key for order-4 + + o4_cntxt=ppmc_order4_hash_key(o1_byte,o2_byte,o3_byte,o4_byte); + full_o4_cntxt=(o1_byte)+(o2_byte<<8)+(o3_byte<<16)+(o4_byte<<24); //order-4 + + + // Now check the hash entry in the table + + if(order4_hasht[o4_cntxt]==0) //if 0, not initialized + { + + o4_context=0; //no hash entry + + return 0; //hash entry not initialized + } + + + // Now read trough the linked list of context searching current one + + cntxt_node=order4_hasht[o4_cntxt]; + + while(1) + { + + if(cntxt_node->order4321==full_o4_cntxt) //compare context + goto ppmc_gtfo4_cntxt_found; + + if(cntxt_node->next==0) //end of context's linked list + break; + + cntxt_node=cntxt_node->next; //next element + + } + + + // Once there the context was not found + o4_context=cntxt_node; //pointer to last element in the linked list + + return 0; //it was not present + + + // The context is found, so return pointer and cump + +ppmc_gtfo4_cntxt_found: + + o4_context=cntxt_node; + + // Total cump is current total cump plus the escape for the escape code + + total_cump=o4_context->defined_bytes+o4_context->max_cump; + + return 1; //context found + +} + + +// Codes a byte under order-4 and returns 1. +// Otherwise it returns a 0. +// +// In case the byte is coded under this context, coded_in_order=4. +char ppmc_code_byte_order4(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order4()==0) + return 0; + + + // See if the byte is present and compute its cump at the same time + + node=o4_context->prob; //pointer to first element in the linked list + + symb_cump=0; //the first symbol always has a 0 cump + + + // Now search the byte in the linked list + + do{ + if(node->byte==byte) + goto ppmc_o4_byte_found; //bad thing, I know, anyone has a better idea? + symb_cump+=node->freq; //add the probability of this byte to the cump + if(node->next==0) + break; + node=node->next; //next element in the linked list + }while(1); + + + // If we reach that point, the byte was not found in the linked list + // so we don't need the cump, we have to output an escape code, whose + // probabilities are know using the context structure in the table. + + // Byte was not present in the linked list, current node is the last one, + // and that's the node needed for creating a new node, save it. + + o4_ll_node=node; + + // Now get the probability and cump of the escape code + + symb_cump=o4_context->max_cump; + symb_prob=o4_context->defined_bytes; + + // Code the escape code + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 0; + + + // That code is executed when the byte is found in the linked list + +ppmc_o4_byte_found: + + + // Everything has been tested, now we can feel free to code the byte, + // the symb_cump is already computed, now get its probability and code + // the byte, also save pointer to this element in the linked lists for + // updating. + + coded_in_order=4; //successfully coded under order-4 + + o4_ll_node=node; //save it for updating + + symb_prob=node->freq; //get the probability of the byte + + // Code it. + + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + + return 1; //byte coded under order-4 +} + + +// This functions update order-4 probabilities with current byte, also takes +// care about updating variables, and renormalizing. +// +// "o4_ll_node" must point to the element to update or the last one, +// based on the "coded_in_order" and checking the pointer of the element we +// can decide what to do. Also "o4_context" must be initialized. +// +// This updating is only for encoding. +void ppmc_update_order4(void) +{ + + // First thing first, check if the hash entry is initialized + + if(order4_hasht[o4_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order4_hasht[o4_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + + + // The byte was coded under order three, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==4) //coded under order-4 + { + + // Update its count and variables of this context and check for renormalization + + o4_ll_node->freq++; //increment its frequency (rather probability) + + o4_context->max_cump++; //total cump + + if(o4_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order4(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o4_context" points to the last element, so we can put the new element. + + if(o4_context->order4321==full_o4_cntxt) //chech if that's the last + { //element or the a context found + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + o4_ll_node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o4_context->max_cump++; //total cump + o4_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o4_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + +// This functions renormalizes the probabilities at order-4 updating context +// variables. +void ppmc_renormalize_order4(void) +{ + unsigned long counter; + struct _byte_and_freq *node; + + + // Initialize variables. Defined bytes remain the same. + o4_context->max_cump=0; //clear them + + node=o4_context->prob; //get pointer to lined lists + + // Divide all the probabilities by 2 and update context variables + while(1) + { + node->freq>>=1; //divide by a factor of 2 + + if(node->freq==0) //don't allow a probability to be 0 + node->freq=1; + + o4_context->max_cump+=node->freq; //sum to the total cump + + if(node->next==0) //last element + break; + node=node->next; + } + + +} + + +// Decodes the symbol correspoding to the current order, it returns the byte +// or in case of an escape code or empty table it returns -1. +// It updates "coded_in_order". +// +// If we decode an escape code, we don't modify "o4_ll_node" and thus its +// work of the updating routine to read till the end of the linked list +// (for adding a new element) however if we decode a byte, we save on +// "o4_ll_node" the pointer to its node. (so updating is faster) +void ppmc_decode_order4(void) +{ + unsigned long current_cump, counter; + struct _byte_and_freq *node; + + + // Get current context (if present) and total cump. + + if(ppmc_get_totf_order4()==0) + { + byte=-1; + return; + } + + + // Decode current cump + + symb_cump=range_decoder_decode(&rc_decoder,total_cump); //decode it + + // Now check if it's an escape code + if(symb_cump>=o4_context->max_cump) //the defined code space for the escape code + { + + // Update coding values + + ppmc_get_escape_prob_order4(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Mark as escape code + + byte=-1; + + return; //an escape code + } + else + { + // Now we have to check what symbol it is + + current_cump=0; //cump of the current symbol + + node=o4_context->prob; //get pointer to linked lists + + while(1) + { + current_cump+=node->freq; //update cump + if(symb_cumpnext; //next element + //we have no need to check for the last symbol, we'll never read further + //the end of the linked lists, before we'll found the last byte. + } + + + //read byte value and probability + + symb_prob=node->freq; //get the probability for updating the state + byte=node->byte; //get byte + o4_ll_node=node; //used for updating + + + // Get the cump of byte + + symb_cump=current_cump-symb_prob; + + // Update coding state + + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + + // Update coded_in_order used for update exclusion + + coded_in_order=4; + + return; + } + +} + + +// This is the routine for updating while decoding. The only difference with +// the routine for coding is that when an escape code was coded, "o4_ll_node" +// is not initialized so we have to read till the end of the linked list. +// Fortunately "o4_context" will be initialized so we don't need to read its +// linked list. +void ppmc_update_dec_order4(void) +{ + struct _byte_and_freq *node; + + // First thing first, check if the hash entry is initialized + + if(order4_hasht[o4_cntxt]==0) //no pointer to linked list defined yet + { + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + order4_hasht[o4_cntxt]=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + return; //nothing else to do + } + + + // The byte was coded under order four, otherwise it was coded under a + // lower order (never higher ones, remember that we are using update + // exclusion) in this case we have to create a new node in the list. + + if(coded_in_order==4) //coded under order-4 + { + + // Update its count and variables of this context and check for renormalization + + o4_ll_node->freq++; //increment its frequency (rather probability) + + o4_context->max_cump++; //total cump + + if(o4_ll_node->freq==255) //do we have to renormalize? + ppmc_renormalize_order4(); //renormalize + + } + else + { + + // Now we have two cases, under a given context (which we actually found) + // we coded an escape coded, in that case just create a new node in the + // linked list of bytes and probabilities. Otherwise we didn't find the + // same node so we have to create it in the linked list for context. + // And we can be sure that it at least has one element and that + // "o4_context" points to the last element, so we can put the new element. + + if(o4_context->order4321==full_o4_cntxt) //chech if that's the last + { //element or the a context found + + // Read till the end of the linked list + + node=o4_context->prob; //get pointer to linked list + + while(1) + { + if(node->next==0) //check for the end of the linked list + break; + node=node->next; //next node + } + + // Now add element + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate mem + + node->next=_bytes_pool; //put it in the next free entry + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + o4_context->max_cump++; //total cump + o4_context->defined_bytes++; //total cump + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + } + else + { + + // We have to create a new context node + + if(ppmc_out_of_memory==1) + return; //exit this function, we can't allocate memory + + + // First create the context + + o4_context->next=_context_pool; + + _context_pool->next=0; //this is the last element + _context_pool->order4321=full_o4_cntxt; //put context + _context_pool->prob=_bytes_pool; //pointer to linked list + _context_pool->max_cump=1; + _context_pool->defined_bytes=1; + + + // Do update of linked list variables and memory use of contexts + + ++_context_pool; //next time use next entry (this is a pointer) + + if(_context_pool==_context_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_context_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_context_pool=(struct context *)malloc + (sizeof(struct context)*_context_pool_elements_inc))==NULL) + { + printf("Can't allocate memory for context's memory pool.\nIndex: %d",_context_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _context_pool_array[_context_pool_index]=_context_pool; + + _context_pool_max=_context_pool+_context_pool_elements_inc; + + _context_pool_index++; + + } + + // Now care about the first (and last) linked list element + + _bytes_pool->byte=byte; //initialize byte to current one + _bytes_pool->freq=1; //it appeared once + _bytes_pool->next=0; //now this is last element in ll + + // Do update of linked list variables and memory use + + ++_bytes_pool; //next time use next entry (this is a pointer) + + if(_bytes_pool==_bytes_pool_max) //maximum reached + { + + // First check to see that we still have entries in the array + + if(_bytes_pool_index==_mempool_max_index) + { + ppmc_out_of_memory=1; //flush + return; + } + + // Allocate memory for new buffer + + if((_bytes_pool=(struct _byte_and_freq *)malloc + (sizeof(struct _byte_and_freq)*_bytes_pool_elements_inc))==NULL) + { + printf("\nCan't allocate memory for bytes memory pool.\nIndex: %d",_bytes_pool_index); + ppmc_out_of_memory=1; //flush + return; + } + + _bytes_pool_array[_bytes_pool_index]=_bytes_pool; + + _bytes_pool_max=_bytes_pool+_bytes_pool_elements_inc; + + _bytes_pool_index++; + + } + + } + + } + +} + + + +// This function returns the probability for the escape codes in the variables +void ppmc_get_escape_prob_order4(void) +{ + // To understand that remember that the escape allocated for the escape code + // is above the current maximum cump and that it has a probability determined + // by the scheme C. + + symb_prob=o4_context->defined_bytes; + symb_cump=o4_context->max_cump; +} + diff --git a/src/ppmc/luca/ppmc.h b/src/ppmc/luca/ppmc.h new file mode 100644 index 0000000..e21545e --- /dev/null +++ b/src/ppmc/luca/ppmc.h @@ -0,0 +1,135 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmc.h" + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Part of the ppmc encoder and decoder. + + This module contains the definitions of different functions and all the + data structures defined by ppmc. Also contains defines. +*/ + +// Definitions + +#define ppmc_order4_hash_size 65536 +#define ppmc_order4_hash_key(k,j,i,l) ( (k)+(j<<8)+(i<<1)+(l<<9) )& ppmc_order4_hash_size-1 +#define ppmc_order3_hash_size 65536 +#define ppmc_order3_hash_key(k,j,i) ((k)+(j<<7)+(i<<11)) & ppmc_order3_hash_size-1 +#define ppmc_order2_hash_key(k,j) ((k)+(j<<8)) +#define _bytes_pool_elements 125000 //this is used the first time + //that we allocate memory, that's + //the number of entries +#define _bytes_pool_elements_inc 125000 //if we need to alloc again, this + //is the number of entries to get +#define _context_pool_elements 50000 +#define _context_pool_elements_inc 50000 + +#define _mempool_max_index 1000 //the number of entries in the array with + //pointers + + +// Data structures + +// This structure contains a single element of a linked lists which contains +// the probability distribution of a given order. This structure takes 6 bytes. +struct _byte_and_freq{ +unsigned char byte; //the byte itself +unsigned char freq; //and the frequency of it +struct _byte_and_freq *next; //pointer to next element in linked list or 0 +}; + + +// This structure is used for both order-3 and order-4. It takes 20 bytes, +// and it can still hold another byte more. (only 19 being used) +// Order 2-1-0-(-1) use different structures for a faster accessing. +struct context{ +struct context *next; //next context in the hash entry +unsigned long order4321; //order-4-3-2-1 (or order-3-2-1 for order-3) +struct _byte_and_freq *prob; //pointer to linked lists containing probability distribution +unsigned int max_cump; //maximum cumulative probability (can't exceed (2^16)-1 ) +unsigned int defined_bytes; //the number of bytes in this context +}; + +// That's the same but for order-2 where there's no hash collisions. +struct context_o2{ +struct _byte_and_freq *prob; //pointer to linked lists containing probability distribution +unsigned int max_cump; //maximum cumulative probability (can't exceed (2^16)-1 ) +unsigned int defined_bytes; //the number of bytes in this context +}; + + +// Declaration of functions + + +// Functions for initializing +void ppmc_alloc_memory(void); +void ppmc_initialize_contexts(void); +void ppmc_encoder_initialize(void); +void ppmc_decoder_initialize(void); +void ppmc_free_memory(void); +void ppmc_flush_mem_enc(void); +void ppmc_flush_mem_dec(void); + +// Functions for order-(-1) +void ppmc_get_prob_ordern1(void); +unsigned long ppmc_get_symbol_ordern1(void); +void ppmc_get_totf_ordern1(void); +void ppmc_renormalize_order1(void); + +// Functions for order-0 +void ppmc_get_totf_order0(void); +char ppmc_code_byte_order0(void); +void ppmc_update_order0(void); +void ppmc_renormalize_order0(void); +void ppmc_decode_order0(void); +void ppmc_get_escape_prob_order0(void); +void ppmc_get_prob_order0(void); + +// Functions for order-1 +void ppmc_get_totf_order1(void); +char ppmc_code_byte_order1(void); +void ppmc_update_order1(void); +void ppmc_renormalize_order1(void); +void ppmc_decode_order1(void); +void ppmc_get_escape_prob_order1(void); +void ppmc_get_prob_order1(void); + + +// Functions for order-2 +void ppmc_get_totf_order2(void); +char ppmc_code_byte_order2(void); +void ppmc_update_order2(void); +void ppmc_renormalize_order2(void); +void ppmc_decode_order2(void); +void ppmc_update_dec_order2(void); +void ppmc_get_escape_prob_order2(void); +void ppmc_get_prob_order2(void); + + +// Functions for order-3 +char ppmc_get_totf_order3(void); +char ppmc_code_byte_order3(void); +void ppmc_update_order3(void); +void ppmc_renormalize_order3(void); +void ppmc_decode_order3(void); +void ppmc_update_dec_order3(void); +void ppmc_get_escape_prob_order3(void); +void ppmc_get_prob_order3(void); + + +// Functions for order-4 +char ppmc_get_totf_order4(void); +char ppmc_code_byte_order4(void); +void ppmc_update_order4(void); +void ppmc_renormalize_order4(void); +void ppmc_decode_order4(void); +void ppmc_update_dec_order4(void); +void ppmc_get_escape_prob_order4(void); +void ppmc_get_prob_order4(void); + + + diff --git a/src/ppmc/luca/ppmcdata.c b/src/ppmc/luca/ppmcdata.c new file mode 100644 index 0000000..bea0926 --- /dev/null +++ b/src/ppmc/luca/ppmcdata.c @@ -0,0 +1,119 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmcdata.c" +Email: arturo@arturocampos.com +Web: http://www.arturocampos.com + +Part of the ppmc encoder and decoder. + +This module contains global data. +*/ + +#include "ppmc.h" //defines +#include "range.h" + +// Order-4 uses a hash table which points to the start of a linked list with +// the different context, which has the cump, the number of different symbols +// and a pointer to the linked list with the bytes and frequencies. +// Order-3 is almost the same, both take 262144 bytes. +struct context *order4_hasht[ppmc_order4_hash_size]; + +struct context *order3_hasht[ppmc_order3_hash_size]; + + +// The array for order-2 is different, as we do directly hashing, and thus +// we have no need to do the stuff of linked lists for the context itself, +// so it contains the context used. This takes 1310720 bytes. +struct context_o2 order2_array[65536]; + + +// Those are the multiple arrays for order-1. It takes 65536 bytes. +unsigned char order1_array[256][256]; +unsigned int order1_defined_bytes_array[256]; //the defined bytes in every context +unsigned int order1_max_cump_array[256]; //max cump of every context + + +// This is the array for order-0. It takes 256 bytes. +unsigned char order0_array[256]; +unsigned int order0_defined_bytes; +unsigned int order0_max_cump; + + +// No need of variables for order-(-1), because it's fixed. + + +// Those are the pointers and variables used for managing the mem pool for +// both context, and bytes and frequencies. +struct _byte_and_freq *_bytes_pool, //pointer to pool containing linked + //lists with bytes and frequencies + *_bytes_pool_max; //the maximum of this buffer + struct context *_context_pool; //pointer to pool containing contexts + struct context *_context_pool_max; //the same as with _bytes_pool + + unsigned long _bytes_pool_index; //index in array of pointers + unsigned long _context_pool_index; + + //the following is an array keeping pointers to different buffers. A new + //buffer is allocated when the current one is full, so we always have a + //buffer for linked lists. (without allocating a buffer for every element) + struct _byte_and_freq *_bytes_pool_array[_mempool_max_index]; + struct context *_context_pool_array[_mempool_max_index]; + + char ppmc_out_of_memory; //0 if we have enough memory, 1 instead, any + //routine that needs to allocate memory must + //quit if that's 1. + + + // Variables which contain current byte to code and order + unsigned long byte, //current byte to code + o1_byte, //order-1 byte + o2_byte, //order-2 byte + o3_byte, //order-3 byte + o4_byte; //order-4 byte + +unsigned long o2_cntxt; //used in the hash key of order-2 +unsigned long o3_cntxt; //use as hash key for order-3 +unsigned long o4_cntxt; //use as hash key for order-4 +unsigned long full_o3_cntxt; //o1_byte, o2_byte and o3_byte together +unsigned long full_o4_cntxt; //order-4-3-2-1 + +unsigned long coded_in_order; //in which order the last byte was coded +//it's for update exclusion +//also used for decoding + +// Variables used for coding + +unsigned long +total_cump, //the total cumulative probability + symb_cump, //the symbol cumulative probability + symb_prob; //the symbol frequency + +rangecoder rc_coder; //state of range coder +rangecoder rc_decoder; //state of range decoder + +// File handles + +FILE *file_input, //file to code + *file_output; //file where the coded data is placed + + +// Pointers to linked lists and context structures used for faster updating +// or creation of new nodes, because instead of reading again all the linked +// list till the end (in the case of creation) we have a pointer to the last +// element. In the case that a byte was present in the linked lists but it +// had a 0 count, we just have to update its probability. And in the case +// that it already was present and we coded it under that context or a lower +// one, we just have to update its probability. + + +struct _byte_and_freq *o2_ll_node; //pointer to linked lists under order-2 +//where does it points depends in which +//order the byte was coded. +struct _byte_and_freq *o3_ll_node; //the same but for order-3 +struct _byte_and_freq *o4_ll_node; + +struct context *o3_context; //pointer to current order-3 context +struct context *o4_context; //pointer to current order-3 context diff --git a/src/ppmc/luca/ppmcdata.h b/src/ppmc/luca/ppmcdata.h new file mode 100644 index 0000000..f4bddc1 --- /dev/null +++ b/src/ppmc/luca/ppmcdata.h @@ -0,0 +1,117 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmcdata.h" + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Part of the ppmc encoder and decoder. + + This module contains externs definition for global data. +*/ + +#include "ppmc.h" + +// Order-4 uses a hash table which points to the start of a linked list with +// the different context, which has the cump, the number of different symbols +// and a pointer to the linked list with the bytes and frequencies. +// Order-3 is almost the same, both take 262144 bytes. +extern struct context *order4_hasht[]; + +extern struct context *order3_hasht[]; + + +// The array for order-2 is different, as we do directly hashing, and thus +// we have no need to do the stuff of linked lists for the context itself, +// so it contains the context used. This takes 1310720 bytes. +extern struct context_o2 order2_array[]; + + +// Those are the multiple arrays for order-1. It takes 65536 bytes. +extern unsigned char order1_array[256][256]; +extern unsigned int order1_defined_bytes_array[]; //the defined bytes in every context +extern unsigned int order1_max_cump_array[]; //max cump of every context + + +// This is the array for order-0. It takes 256 bytes. +extern unsigned char order0_array[]; +extern unsigned int order0_defined_bytes; +extern unsigned int order0_max_cump; + + +// Those are the pointers and variables used for managing the mem pool for +// both context, and bytes and frequencies. +extern struct _byte_and_freq *_bytes_pool, //pointer to pool containing linked + //lists with bytes and frequencies + *_bytes_pool_max; //the maximum of this buffer +extern struct context *_context_pool; //pointer to pool containing contexts +extern struct context *_context_pool_max; //the same as with _bytes_pool + +extern unsigned long _bytes_pool_index; //index in array of pointers +extern unsigned long _context_pool_index; + +//the following is an array keeping pointers to different buffers. A new +//buffer is allocated when the current one is full, so we always have a +//buffer for linked lists. (without allocating a buffer for every element) +extern struct _byte_and_freq *_bytes_pool_array[_mempool_max_index]; +extern struct context *_context_pool_array[_mempool_max_index]; + +extern char ppmc_out_of_memory; //0 if we have enough memory, 1 instead, any + //routine that needs to allocate memory must + //quit if that's 1. + + + +// Variables which contain current byte to code and order +extern unsigned long //they are only bytes + byte, //current byte to code + o1_byte, //order-1 byte + o2_byte, //order-2 byte + o3_byte, //order-3 byte + o4_byte; //order-4 byte + +extern unsigned long o2_cntxt; //used in the hash key of order-2 +extern unsigned long o3_cntxt; //use as hash key for order-3 +unsigned long o4_cntxt; //use as hash key for order-4 +extern unsigned long full_o3_cntxt; //o1_byte, o2_byte and o3_byte together +extern unsigned long full_o4_cntxt; //order-4-3-2-1 + +extern unsigned long coded_in_order; //in which order the last byte was coded + //it's for update exclusion + //also used for decoding +// Variables used for coding + +extern unsigned long //no need for negative values + total_cump, //the total cumulative probability + symb_cump, //the symbol cumulative probability + symb_prob; //the symbol frequency + +extern rangecoder rc_coder; //state of range coder +extern rangecoder rc_decoder; //state of range decoder + +// File handles + + FILE *file_input, //file to code + *file_output; //file where the coded data is placed + + + +// Pointers to linked lists and context structures used for faster updating +// or creation of new nodes, because instead of reading again all the linked +// list till the end (in the case of creation) we have a pointer to the last +// element. In the case that a byte was present in the linked lists but it +// had a 0 count, we just have to update its probability. And in the case +// that it already was present and we coded it under that context or a lower +// one, we just have to update its probability. + + +extern struct _byte_and_freq *o2_ll_node;//pointer to linked lists under order-2 + //where does it points depends in which + //order the byte was coded. +extern struct _byte_and_freq *o3_ll_node; //the same but for order-3 +extern struct _byte_and_freq *o4_ll_node; + +extern struct context *o3_context; //pointer to current order-3 context +extern struct context *o4_context; //pointer to current order-3 context diff --git a/src/ppmc/luca/ppmcmain.c b/src/ppmc/luca/ppmcmain.c new file mode 100644 index 0000000..7a99d12 --- /dev/null +++ b/src/ppmc/luca/ppmcmain.c @@ -0,0 +1,176 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "ppmcmain.c" +Email: arturo@arturocampos.com +Web: http://www.arturocampos.com + +Part of the ppmc encoder only. + +This module is the main module and calls the different modules to do +the encoding of a file. When done prints bpb and kbyps. +*/ + + +#include +#include +#include "range.h" //the range coder functions and data +#include "ppmcdata.h" + + +long filesize(FILE *stream); + + + +//Main +int main (char argc, char *argv[]) +{ + unsigned long counter, //temporal counter for loops like for or while + counter2, //another temporal counter for sub loops + size_file_input; //the size of the input file + + + // Print title, version and copyright + printf("PPMC using range coder. (without exclusion)\n"); + printf("Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.\n"); + printf("Permission is granted to make verbatim copies of this program for private\n"); + printf("use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.\n"); + + // Check for correct number of parameters + if(argc!=3) + { + printf("Bad number of arguments.\n"); + exit(1); + } + + + // Try to open input and output files + if((file_input=fopen(argv[1],"r+b"))==NULL) + { + printf("Couldn't open %s.\n",argv[1]); + exit(1); + } + + if((file_output=fopen(argv[2],"w+b"))==NULL) + { + printf("Couldn't create %s.\n",argv[2]); + exit(1); + } + + + // Check input file length and not accept 0 length files + size_file_input=filesize(file_input); + + if(size_file_input<5) + { + printf("Can't work with files below than 5 bytes!"); + exit(1); + } + + + // First output file length + fwrite(&size_file_input,1,4,file_output); //input length + + + // Initialize ppmc encoder + ppmc_alloc_memory(); //get memory + ppmc_initialize_contexts(); //initialize model + ppmc_encoder_initialize(); + + // Initialize range coder + range_coder_init(&rc_coder,file_output); + + + // Start main loop which codes the file + while((byte=fgetc(file_input))!=EOF) + { + + // Try to code current byte under order-4 if possible then go to lower orders + if(ppmc_code_byte_order4()==0) + if(ppmc_code_byte_order3()==0) + if(ppmc_code_byte_order2()==0) + if(ppmc_code_byte_order1()==0) + if(ppmc_code_byte_order0()==0) //else try to code under order-0 + { + // Code under order-(-1) + ppmc_get_prob_ordern1(); + range_coder_encode(&rc_coder,total_cump,symb_cump,symb_prob); + coded_in_order=0; //update all the tables (unless order-(-1)) + } + + + // Now do update exclusion + + switch(coded_in_order) + { + case 0: ppmc_update_order0(); //update only order-0 + case 1: ppmc_update_order1(); //update order-0 and order-1 + case 2: ppmc_update_order2(); //update order-2 1 and 0... + case 3: ppmc_update_order3(); + case 4: ppmc_update_order4(); + default: break; + }; + + + + // Update order variables + + o4_byte=o3_byte; + o3_byte=o2_byte; + o2_byte=o1_byte; + o1_byte=byte; //current one is next time order-1 + + + // Check if we run out of memory, in that case, flush the encoder + + if(ppmc_out_of_memory==1) + { + printf("Flushing memory! Output file might be not decodable.\n"); + ppmc_flush_mem_enc(); + } + + + } + + + // Flush range coder + range_coder_flush(&rc_coder); + + // Free memory + ppmc_free_memory(); + + + // Print bpb and kbyps + printf("%s at %f bpb.\n",argv[1],((float)filesize(file_output)/(float)size_file_input)*(float)8); + + + // Close file handles + fclose(file_input); + fclose(file_output); + + + + // Nicely exit + return 0; +} + + +// Routines not used by ppmc but rather by main. +// Not including the range coder. + + +// Returns the file size of a given file. +long filesize(FILE *stream) +{ + long curpos, length; + + curpos = ftell(stream); + fseek(stream, 0L, SEEK_END); + length = ftell(stream); + fseek(stream, curpos, SEEK_SET); + return length; +} + + diff --git a/src/ppmc/luca/range.c b/src/ppmc/luca/range.c new file mode 100644 index 0000000..168ddc2 --- /dev/null +++ b/src/ppmc/luca/range.c @@ -0,0 +1,212 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "range.c" +Email: arturo@arturocampos.com +Web: http://www.arturocampos.com + +This module contains the routines of both the range coder and decoder. + +The range coder works internally in 32 bits, and uses bytes as symbols. +Also the end of message symbol is used. So z=257. + +Both input and output use rc_file as the file stream. Of course we can't +code and decode at the same time. All the input or output comes from the +same file, no matter what range coder structure are we using. The modules +here provided don't manage the io except for reading and writing, they +don't open nor close the files. The reading and writing is done via +putc and getc. +*/ + +#include "range.h" +#include + +/* + Inits the range coder state. Must be called before encoding any symbol. + It uses a magic number 0xB3 as the first byte outputted. + -rangecoder *rc, the range coder to be used. + + Shoulde be called like that: + range_coder_init(&o0_rc_state,file_output); + */ +void range_coder_init(rangecoder *rc, FILE *stream) +{ + rc_file=stream; + rc->low=0; //define state + rc->range=0x80000000; + rc->byte_buffer=0xB3; //magic number + rc->help=0; //temp value +} + + +/* + Encodes a symbol. + -rangecoder *rc, the range coder to be used. + -unsigned long tot_f, the maximum cumulative frequency + -unsigned long lt_f, the cumulative probabilty of the symbol + -unsigned long sy_f, the probability of the symbol + */ +void range_coder_encode(rangecoder *rc,unsigned long tot_f, unsigned long lt_f,unsigned long sy_f) +{ + unsigned long temp, r; + + range_coder_renormalize(rc); //&rc? + + r=rc->range/tot_f; + temp=r*lt_f; + if(lt_f+sy_frange=r*sy_f; + else + rc->range-=temp; + rc->low+=temp; +} + +/* + Renormalizes the state when coding. + -rangecoder *rc, the range coder to be used. + */ + +void range_coder_renormalize(rangecoder *rc) +{ + while(rc->range<=(unsigned long)0x00800000) + { + if(rc->low<(unsigned long)0x7F800000) + { + putc(rc->byte_buffer,rc_file); + for(;rc->help;rc->help--) + putc(0xFF,rc_file); + rc->byte_buffer=(unsigned char)(rc->low>>23); + } + else + { + if(rc->low&(unsigned long)0x80000000) + { + putc(rc->byte_buffer+1,rc_file); + for(;rc->help;rc->help--) + putc(0x00,rc_file); + rc->byte_buffer=(unsigned char)(rc->low>>23); + } + else + rc->help++; + } + rc->range<<=8; + rc->low=(rc->low<<8)&(unsigned long)(0x7FFFFFFF); + } +} + + +/* + Flushes the encoder. Must be called when the coding is done. + -rangecoder *rc, the range coder to be used. + + Shoulde be called like that: + range_coder_flush(&o0_rc_state); + */ +void range_coder_flush(rangecoder *rc) +{ + unsigned long tmp; + + range_coder_renormalize(rc); + tmp = rc->low >> 23; + if (tmp > 0xff) + { + putc(rc->byte_buffer+1,rc_file); + for(; rc->help; rc->help--) + putc(0,rc_file); + } + else + { + putc(rc->byte_buffer,rc_file); + for(; rc->help; rc->help--) + putc(0xff,rc_file); + } + + putc(tmp & 0xff,rc_file); + putc((tmp = rc->low >> (23-8)) & 0xff,rc_file); +} + + +/* + Inits the range decoder state. Also checks for the magic number, and + quits in case it isn't the first, so be careful. + -rangecoder *rc, the range coder to be used. + */ +void range_decoder_init(rangecoder *rc, FILE *stream) +{ + unsigned int _rd_c; + + rc_file=stream; + if((_rd_c=getc(rc_file))!=0xB3) + { + printf("\nThis is not range coded data. Magic number not found. Exiting."); + exit(1); + } + rc->byte_buffer=getc(rc_file); + rc->low=rc->byte_buffer>>1; + rc->range=0x80; +} + + +/* + Decode a symbol, get its cumulative probability. +Input: +-rangecoder *rc, the range coder to be used. +-unsigned long tot_f, the maximum cumulative probability +Output: +-unsigned long, cumulative probability of the current symbol +Should be called like that: +current_cump=range_decoder_decode(&o0_rc_state,o0_tot_f); +*/ +unsigned long range_decoder_decode(rangecoder *rc, unsigned long tot_f) +{ + unsigned long temp; + + range_decoder_renormalize(rc); + rc->help=rc->range/tot_f; + temp=rc->low/rc->help; + if(temp>=tot_f) + return tot_f-1; + else + return temp; +} + + +/* + Updates the state so next symbol can be decoded. +Input: +-rangecoder *rc, the range coder to be used. +-unsigned long tot_f, the maximum cumulative probability +-unsigned long lt_f, the cumulative probabilty of the symbol +-unsigned long sy_f, the probability of the symbol + +*/ +void range_decoder_update(rangecoder *rc, unsigned long tot_f, unsigned long lt_f,unsigned long sy_f) +{ + unsigned long temp; + + temp=rc->help*lt_f; + rc->low-=temp; + if(lt_f+sy_frange=rc->help*sy_f; + else + rc->range-=temp; +} + + +/* + Renormalizes the state while decoding. + -rangecoder *rc, the range coder to be used. + */ +void range_decoder_renormalize(rangecoder *rc) +{ + while(rc->range<=0x00800000) + { + rc->low=(rc->low<<8)|((rc->byte_buffer<<7)&0xFF); + rc->byte_buffer=getc(rc_file); + rc->low |= rc->byte_buffer >> (1); + rc->range<<=8; + } +} + diff --git a/src/ppmc/luca/range.h b/src/ppmc/luca/range.h new file mode 100644 index 0000000..04b6fcc --- /dev/null +++ b/src/ppmc/luca/range.h @@ -0,0 +1,39 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this file for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "range.h" + Email: arturo@arturocampos.com + Web: http://www.arturocampos.com + + Declarations for the coder. +*/ + +#include + +typedef struct{ + unsigned long low, range, help; + unsigned char byte_buffer; +}rangecoder; + +FILE *rc_file; + +void range_coder_init(rangecoder *rc, FILE *stream); //coding routines +void range_coder_encode(rangecoder *rc,unsigned long tot_f, unsigned long lt_f,unsigned long sy_f); +void range_coder_renormalize(rangecoder *rc); +void range_coder_flush(rangecoder *rc); +void range_decoder_init(rangecoder *rc, FILE *stream);//decoding routines +unsigned long range_decoder_decode(rangecoder *rc, unsigned long tot_f); +void range_decoder_update(rangecoder *rc, unsigned long tot_f, unsigned long lt_f,unsigned long sy_f); +void range_decoder_renormalize(rangecoder *rc); + + +typedef unsigned long code_value; +#define CODE_BITS 32 +#define Top_value ((code_value)1 << (CODE_BITS-1)) +#define SHIFT_BITS (CODE_BITS - 9) +#define EXTRA_BITS ((CODE_BITS-2) % 8 + 1) +#define Bottom_value (Top_value >> 8) +#define outbyte(cod,x) putc(x,stdout) +#define inbyte(cod) getc(stdin) diff --git a/src/ppmc/luca/unppmc.c b/src/ppmc/luca/unppmc.c new file mode 100644 index 0000000..501f0c4 --- /dev/null +++ b/src/ppmc/luca/unppmc.c @@ -0,0 +1,204 @@ +/* + Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved. + Permission is granted to make verbatim copies of this program for private + use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK. + + This file is: "unppmc.c" +Email: arturo@arturocampos.com +Web: http://www.arturocampos.com + + +Part of the ppmc decoder. + +This module is the main module and calls the different modules to do +the decoding of a file. When done prints kbyps. +*/ + + +// Bibliotecas necesarias +#include +#include +#include "range.h" //the range coder functions and data +#include "ppmcdata.h" + + +// Declaracion de funciones del ppmcmain.c +long filesize(FILE *stream); + + + + +//Main +void main (int argc, char *argv[]) +{ + unsigned long counter, //temporal counter for loops like for or while + counter2, //another temporal counter for sub loops + size_file_output, //the size of the output file + main_counter; //used in main + char expected_flush=0; //used for checking flushing which can't be done + + + // Print title, version and copyright + printf("UNPPMC using range coder.\n"); + printf("Copyright (C) Arturo San Emeterio Campos 1999. All rights reserved.\n"); + printf("Permission is granted to make verbatim copies of this program for private\n"); + printf("use only. There is ABSOLUTELY NO WARRANTY. Use it at your OWN RISK.\n"); + + + + // Check for correct number of parameters + if(argc!=3) + { + printf("Bad number of arguments.\n"); + exit(1); + } + + + // Try to open input and output files + if((file_input=fopen(argv[1],"r+b"))==NULL) + { + printf("Couldn't open %s.\n",argv[1]); + exit(1); + } + + if((file_output=fopen(argv[2],"w+b"))==NULL) + { + printf("Couldn't create %s.\n",argv[2]); + exit(1); + } + + + // Get output length + fread(&size_file_output,1,4,file_input); + + + // Initialize ppmc decoder + ppmc_alloc_memory(); + ppmc_initialize_contexts(); + ppmc_decoder_initialize(); + + + + // Initialize decoder + range_decoder_init(&rc_decoder,file_input); + + + // Start main loop which decodes the file + main_counter=size_file_output-4; //take in account the bytes already written + expected_flush=0; //we don't expect a flush yet + + while(main_counter!=0) + { + + // Try to decode current byte in order-4 if possible, else in lower ones + ppmc_decode_order4(); + if(byte==-1) + ppmc_decode_order3(); + if(byte==-1) + { + ppmc_decode_order2(); + if(byte==-1) + { + ppmc_decode_order1(); + if(byte==-1) + { + ppmc_decode_order0(); + if(byte==-1) //check if it was an escape code + { + // Decode in order-(-1) + ppmc_get_totf_ordern1(); + symb_cump=range_decoder_decode(&rc_decoder,total_cump); + byte=ppmc_get_symbol_ordern1(); + ppmc_get_prob_ordern1(); + range_decoder_update(&rc_decoder,total_cump,symb_cump,symb_prob); + coded_in_order=0; //update all orders + + // Now see if it's the code of flushing + + if(symb_cump==256) + { + printf("Flushing.\n"); + ppmc_flush_mem_dec(); + expected_flush=0; + continue; //do not output byte nor update + } + + } + } + } + } + + // Output byte and update model + + fputc(byte,file_output); + + switch(coded_in_order) //update exclusion + { + case 0: ppmc_update_order0(); //update only order-0 + case 1: ppmc_update_order1(); //update order-0 and order-1 + case 2: ppmc_update_dec_order2(); //update order-0 1 and 2 + case 3: ppmc_update_dec_order3(); + case 4: ppmc_update_dec_order4(); + default: break; + }; + + + // Check if flushing has to be done and has not been done. + // This is optional, in case you limit the memory usage, you don't + // need to include this + + if(expected_flush==1) // If flushing didn't happen, we can't decode + { + printf("Can't decompress file. Not enough memory.\nTry in a machine with more memory.\n"); + exit(1); + } + if(ppmc_out_of_memory==1) + { + expected_flush=1; // Next code must be a flush code, otherwise we don't + // have enough memory, and therefore we can't decode + } + + + // Update order variables + + o4_byte=o3_byte; + o3_byte=o2_byte; + o2_byte=o1_byte; + o1_byte=byte; //current one, is next time order-1 + + // Byte decoded and model updated, loop + main_counter--; + + + } + + + ppmc_free_memory(); + + // Close file handles and free memory + fclose(file_input); + fclose(file_output); + + + // Nicely exit + exit(0); +} + + +// Ruotines not used by ppmc but rather by main. +// Not including the range coder. + + +// Returns the file size of a given file. +long filesize(FILE *stream) +{ + long curpos, length; + + curpos = ftell(stream); + fseek(stream, 0L, SEEK_END); + length = ftell(stream); + fseek(stream, curpos, SEEK_SET); + return length; +} + + -- 2.43.0