From d85375b5ea6773eb12e86e630961034cd5df1e96 Mon Sep 17 00:00:00 2001 From: Leandro Lucarella Date: Sun, 30 May 2004 11:07:26 +0000 Subject: [PATCH] =?utf8?q?Se=20separa=20el=20algoritmo=20de=20ordenamiento?= =?utf8?q?=20del=20ejemplo,=20se=20borran=20cosas=20obsoletas.=20Ya=20est?= =?utf8?q?=C3=A1=20todo=20listo,=20s=C3=B3lo=20falta=20convertir=20algunos?= =?utf8?q?=20tipos=20de=20datos=20para=20hacerlo=20gen=C3=A9rico.?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- emufs/external_sort/Makefile | 4 +- emufs/external_sort/base.h | 47 +++ emufs/external_sort/bufford.c | 6 +- emufs/external_sort/bufford.h | 15 +- emufs/external_sort/extsort.c | 120 +++++++ emufs/external_sort/extsort.cpp | 569 -------------------------------- emufs/external_sort/extsort.h | 95 +++--- emufs/external_sort/mergefile.h | 8 +- emufs/external_sort/mergepool.h | 6 +- emufs/external_sort/s_ext.c | 498 ---------------------------- emufs/external_sort/sort_test.c | 85 +---- 11 files changed, 236 insertions(+), 1217 deletions(-) create mode 100644 emufs/external_sort/base.h create mode 100644 emufs/external_sort/extsort.c delete mode 100644 emufs/external_sort/extsort.cpp delete mode 100644 emufs/external_sort/s_ext.c diff --git a/emufs/external_sort/Makefile b/emufs/external_sort/Makefile index 58ae406..12a24c5 100644 --- a/emufs/external_sort/Makefile +++ b/emufs/external_sort/Makefile @@ -9,7 +9,7 @@ ################ # Nombre del ejecutable. -TARGETS = s_ext bufford_test sort_test +TARGETS = bufford_test sort_test # Opciones para el compilador C. CFLAGS = -Wall -ggdb -DDEBUG @@ -25,7 +25,7 @@ CXXFLAGS = $(CFLAGS) -fno-inline all: $(TARGETS) bufford_test: bufford.o bufford_test.o -sort_test: bufford.o mergefile.o mergepool.o sort_test.o +sort_test: bufford.o mergefile.o mergepool.o extsort.o sort_test.o clean: @$(RM) -fv *.o $(TARGETS) diff --git a/emufs/external_sort/base.h b/emufs/external_sort/base.h new file mode 100644 index 0000000..fd8f8a9 --- /dev/null +++ b/emufs/external_sort/base.h @@ -0,0 +1,47 @@ +/* vim: set noexpandtab tabstop=4 shiftwidth=4: + *---------------------------------------------------------------------------- + * emufs + *---------------------------------------------------------------------------- + * This file is part of emufs. + * + * emufs is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) any later + * version. + * + * emufs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with emufs; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place, Suite 330, Boston, MA 02111-1307 USA + *---------------------------------------------------------------------------- + * Creado: dom may 30 07:48:10 ART 2004 + * Autores: Leandro Lucarella + *---------------------------------------------------------------------------- + * + * $Id: bufford.h 609 2004-05-30 10:14:21Z llucare $ + * + */ + +/** \file + * + * Algoritmo de ordenamiento externo. + * + * Interfaz del algoritmo de ordenamiento externo. + * + */ + +#ifndef _EXTSORT_BASE_H_ +#define _EXTSORT_BASE_H_ + +/** Función utilizada para comparar al ordenar los datos. + * \return 0 si son iguales, mayor que cero si el primer argumento es mayor y + * menor que cero si el primer argumento es menor. + */ +typedef int (*CMP_FUNC)(void*, void*); + +#endif /* _EXTSORT_BASE_H_ */ + diff --git a/emufs/external_sort/bufford.c b/emufs/external_sort/bufford.c index 0ea8f88..776c516 100644 --- a/emufs/external_sort/bufford.c +++ b/emufs/external_sort/bufford.c @@ -22,7 +22,7 @@ * Autores: Leandro Lucarella *---------------------------------------------------------------------------- * - * $Id: tipo1.c 548 2004-05-28 22:44:27Z llucare $ + * $Id$ * */ @@ -139,8 +139,8 @@ BUFFORD* bufford_new(size_t size, size_t reg_size, CMP_FUNC cmp) BUFFORD* b = malloc(sizeof(BUFFORD)); if (!b) return 0; assert(cmp); + assert(size > 0); assert(reg_size > 0); - assert(size > reg_size); b->cmp = cmp; b->size = 0; b->root = 0; @@ -155,7 +155,7 @@ BUFFORD* bufford_new_bytes_size(size_t size, size_t reg_size, CMP_FUNC cmp) if (!b) return 0; assert(cmp); assert(reg_size > 0); - assert(size > reg_size); + assert(size > (reg_size + sizeof(BUFFORD_NODE))); b->cmp = cmp; b->size = 0; b->root = 0; diff --git a/emufs/external_sort/bufford.h b/emufs/external_sort/bufford.h index 99ce0e2..f5da610 100644 --- a/emufs/external_sort/bufford.h +++ b/emufs/external_sort/bufford.h @@ -22,7 +22,7 @@ * Autores: Leandro Lucarella *---------------------------------------------------------------------------- * - * $Id: tipo1.h 542 2004-05-28 19:45:02Z rmarkie $ + * $Id$ * */ @@ -35,17 +35,12 @@ * */ -#ifndef _EXTERNAL_SORT_BUFFORD_H_ -#define _EXTERNAL_SORT_BUFFORD_H_ +#ifndef _EXTSORT_BUFFORD_H_ +#define _EXTSORT_BUFFORD_H_ +#include "base.h" #include -/** Función utilizada para comparar al ordenar los datos. - * \return 0 si son iguales, mayor que cero si el primer argumento es mayor y - * menor que cero si el primer argumento es menor. - */ -typedef int (*CMP_FUNC)(void*, void*); - /** Nodo del buffer ordenado (uso interno). */ typedef struct _BUFFORD_NODE { @@ -128,5 +123,5 @@ void* bufford_get_min(BUFFORD* buff); */ void* bufford_get_next(BUFFORD* buff, void* min); -#endif /* _EXTERNAL_SORT_BUFFORD_H_ */ +#endif /* _EXTSORT_BUFFORD_H_ */ diff --git a/emufs/external_sort/extsort.c b/emufs/external_sort/extsort.c new file mode 100644 index 0000000..854d083 --- /dev/null +++ b/emufs/external_sort/extsort.c @@ -0,0 +1,120 @@ +/* vim: set noexpandtab tabstop=4 shiftwidth=4: + *---------------------------------------------------------------------------- + * emufs + *---------------------------------------------------------------------------- + * This file is part of emufs. + * + * emufs is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) any later + * version. + * + * emufs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with emufs; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place, Suite 330, Boston, MA 02111-1307 USA + *---------------------------------------------------------------------------- + * Creado: dom may 30 07:48:10 ART 2004 + * Autores: Leandro Lucarella + *---------------------------------------------------------------------------- + * + * $Id: bufford.h 609 2004-05-30 10:14:21Z llucare $ + * + */ + +/** \file + * + * Algoritmo de ordenamiento externo. + * + * Implementación del algoritmo de ordenamiento externo. + * + */ + +#include "extsort.h" +#include "bufford.h" +#include "mergepool.h" +#include +#include + +int extsort(const char* filename, size_t buf_size, size_t reg_size, CMP_FUNC cmp) +{ + BUFFORD* b; + MERGEPOOL* pool; + FILE* fp; + int min; + assert(filename); + assert(reg_size); + assert(buf_size); + assert(cmp); + /* creo buffer ordenado */ + if (!(b = bufford_new_bytes_size(buf_size, reg_size, cmp))) { + return 0; /* ERROR */ + } + /* creo pool de archivos temporales ordenados */ + if (!(pool = mergepool_new())) { + bufford_delete(b); + return 0; /* ERROR */ + } + /* abro archivo a ordenar */ + if (!(fp = fopen(filename, "r"))) { + mergepool_delete(pool); + bufford_delete(b); + return 0; /* ERROR */ + } + /* lleno el buffer */ + while (1) { + int reg; + if (fscanf(fp, "%i", ®) < 1) break; /* EOF */ + if (!bufford_push(b, ®)) break; /* buffer lleno */ + } + /* creo archivos temporales ordenados usando el buffer */ + while (!bufford_empty(b)) { + int* c; + int x; + /* agrego un nuevo archivo temporal al pool */ + if (!mergepool_add_file(pool)) { + fclose(fp); + mergepool_delete(pool); + bufford_delete(b); + return 0; /* ERROR */ + } + /* lleno el archivo temporal ordenadamente usando el buffer */ + for (c = bufford_pop_min(b); c; c = bufford_pop_next(b, &x)) { + /* si hay un valor en la entrada, lo inserto */ + if ((fscanf(fp, "%i", &x) > 0) && !bufford_push(b, &x)) { + /* ERROR: no se pudo insertar */ + fclose(fp); + mergepool_delete(pool); + bufford_delete(b); + return 0; /* ERROR */ + } + x = *c; + free(c); + /* Agrego el dato al último archivo temporal */ + if (!mergepool_append_data(pool, x)) { + fclose(fp); + mergepool_delete(pool); + bufford_delete(b); + return 0; /* ERROR */ + } + } + } + fclose(fp); + /* Mezclo lotes */ + if (!(fp = fopen(filename, "w"))) { + mergepool_delete(pool); + bufford_delete(b); + } + /* obtengo próximo mínimo y lo guardo en el archivo de salida */ + while (mergepool_pop_min(pool, &min)) fprintf(fp, "%i\n", min); + /* libero todos los recursos */ + fclose(fp); + mergepool_delete(pool); + bufford_delete(b); + return 1; /* OK */ +} + diff --git a/emufs/external_sort/extsort.cpp b/emufs/external_sort/extsort.cpp deleted file mode 100644 index 7b295ec..0000000 --- a/emufs/external_sort/extsort.cpp +++ /dev/null @@ -1,569 +0,0 @@ -/* Filename: extsort.cpp - - Programmer: Br. David Carlson - - Date: April 18, 2003 - - This program does a case-insensitive sort of the text file that the user supplies - when prompted by the program. It is assumed that each line of the file contains a - word and is no more than 31 characters in length. No attempt is made to order in any - way words that are identical except for capitalization. (For example, there is no - guarantee what order the sort will place the words MacIntosh and Macintosh since - they are seen as identical. Duplicate words such as these are kept in the file.) - The output data is placed back under the original file name. - - Tested with: - Microsoft Visual C++ 6.0 - Microsoft Visual C++ .NET - This program will not work as is with g++ under Linux due to its use of items such as - the constant _MAX_PATH as well as the functions _itoa, and stricmp. -*/ - -#include "extsort.h" -#include - -void _itoa(int n, char* ext, int size) -{ - std::ostringstream oss; - oss << n; - strncpy(ext, oss.str().c_str(), size); -} - -int main(void) - { - PathType FileName; - fstream DataFile; - int NumRuns; - - cout << "Enter the name of the text file to be sorted: "; - cin.getline(FileName, _MAX_PATH); - - DataFile.open(FileName, ios::in); - if (DataFile.fail()) - Error("Could not open the file named ", FileName); - - // Copy sections of the data file into sorted runs. - NumRuns = MakeSortedRuns(DataFile, FileName); - DataFile.close(); - - // Merge the sorted runs until all of the data is in one sorted file (under the original FileName). - HandleMerges(NumRuns, FileName); - - return 0; - } - - -/* Given: DataFile A text file stream already opened for input. - FileName The name of this text file (as a char array string). - Task: To copy data from this file stream into sorted files (runs). - Return: DataFile The modified file stream is trivially modified by reading from it. - In the function name the number of runs is returned. - The sorted runs themselves are created and stored under the file names: - ExtSortTemp.0, ExtSortTemp.1, etc. -*/ -int MakeSortedRuns(fstream & DataFile, PathType FileName) - { - fstream OutFile; - StringType Word, Extension; - PathType OutFileName; - int NumWords, k, NumFiles = 0; - BufType Buffer; - bool MoreData; - - // Repeatedly fill one buffer and quicksort it, writing the buffer out to a temp file. - DataFile.getline(Word, MaxString); - if (DataFile.fail()) - MoreData = false; - else - MoreData = true; - - while (MoreData) - { - // Fill one buffer. - NumWords = FillBuffer(DataFile, Buffer, MoreData, Word); - QuickSort(Buffer, 0, NumWords - 1); - - // Construct the temp file name. - strcpy(OutFileName, "ExtSortTemp."); - _itoa(NumFiles, Extension, 10); // Convert the int in NumFiles to a char array string. - strcat(OutFileName, Extension); - - OutFile.open(OutFileName, ios::out); - if (OutFile.fail()) - Error("Could not open the file named ", OutFileName); - - for (k = 0; k < NumWords; k++) - OutFile << Buffer[k] << endl; - - #ifdef DEBUG - OutFile.seekp(0, ios_base::end); - cout << "Tamaño del archivo '" << OutFileName << "': " << OutFile.tellp() / 1024 << " KB\n"; - #endif - OutFile.close(); - NumFiles++; - } - - return NumFiles; - } - - -/* Given: NumFiles Number of sorted runs (files) to be merged. - FileName The name of the text file to contain the merged data. - Indirectly this function is also given the sorted runs, which are assumed to be - files named ExtSortTemp.0, ExtSortTemp.1, etc. - Task: To merge the data from these sorted runs into one combined, sorted file with - the name given by FileName. - Return: Nothing directly, but the file named by FileName is modified (by being sorted). -*/ -void HandleMerges(int NumFiles, PathType FileName) - { - StringType Extension; - PathType OutFileName, InFileName1, InFileName2; - int k, NumPairs, Count; - bool Odd; - - // Repeatedly merge 2 runs, writing data to a new temp file (but write last one to original file). - if (NumFiles == 0) // No data is present, so there is no work to do. - return; - else if (NumFiles == 1) - { - // Remove original file and rename the temp file using name of the original. - remove(FileName); - rename("ExtSortTemp.0", FileName); - - #ifdef DEBUG - cout << "Renaming ExtSortTemp.0 as " << FileName << endl; - #endif - - return; // The sort is finished. - } - else if (NumFiles == 2) - { - // Merge the 2 sorted temp files into original file. - Merge("ExtSortTemp.0", "ExtSortTemp.1", FileName); - remove("ExtSortTemp.0"); - remove("ExtSortTemp.1"); - return; // The sort is finished. - } - else // We have 3 or more files. - { - // Merge temp files, 2 at a time, until only 2 remain, then proceed as in last case. - while (NumFiles > 2) - { - // Merge ExtSortTemp.0 and ExtSortTemp.1 into ExtSortTempA.0, - // merge ExtSortTemp.2 and ExtSortTemp.3 into ExtSortTempA.1, etc. - NumPairs = NumFiles / 2; - for (k = 0, Count = 0; k < NumPairs; k++) - { - strcpy(InFileName1 , "ExtSortTemp."); - _itoa(Count++, Extension, 10); - strcat(InFileName1, Extension); - strcpy(InFileName2 , "ExtSortTemp."); - _itoa(Count++, Extension, 10); - strcat(InFileName2, Extension); - strcpy(OutFileName , "ExtSortTempA."); - _itoa(k, Extension, 10); - strcat(OutFileName, Extension); - Merge(InFileName1, InFileName2, OutFileName); - } - // If the number of files is odd, just rename the last one. - if (2 * NumPairs < NumFiles) - { - Odd = true; - strcpy(InFileName1 , "ExtSortTemp."); - _itoa(Count, Extension, 10); - strcat(InFileName1, Extension); - strcpy(OutFileName , "ExtSortTempA."); - _itoa(NumPairs, Extension, 10); - strcat(OutFileName, Extension); - rename(InFileName1, OutFileName); - - #ifdef DEBUG - cout << "Renaming " << InFileName1 << " as " << OutFileName << endl; - #endif - } - else - Odd = false; - - // Remove the temp files from before the merge. - for (k = 0; k < NumFiles; k++) - { - strcpy(InFileName1 , "ExtSortTemp."); - _itoa(k, Extension, 10); - strcat(InFileName1, Extension); - remove(InFileName1); - } - - // Get the new number of sorted files. - NumFiles = NumPairs; - if (Odd) - NumFiles++; - - // If number of ExtSortTempA files is now 2, merge them with output to original file and return. - if (NumFiles == 2) - { - Merge("ExtSortTempA.0", "ExtSortTempA.1", FileName); - remove("ExtSortTempA.0"); - remove("ExtSortTempA.1"); - return; // The sort is finished. - } - - // Otherwise, merge pairs of files as above, but start at the top end so as to - // incorporate any small remainder file. Note that we now merge from the - // ExtSortTempA files into ExtSortTemp files. - NumPairs = NumFiles / 2; - for (k = 0, Count = NumFiles - 1; k < NumPairs; k++) - { - strcpy(InFileName1 , "ExtSortTempA."); - _itoa(Count--, Extension, 10); - strcat(InFileName1, Extension); - strcpy(InFileName2 , "ExtSortTempA."); - _itoa(Count--, Extension, 10); - strcat(InFileName2, Extension); - strcpy(OutFileName , "ExtSortTemp."); - _itoa(k, Extension, 10); - strcat(OutFileName, Extension); - Merge(InFileName1, InFileName2, OutFileName); - } - // If the number of files is odd, just rename the last one. - if (2 * NumPairs < NumFiles) - { - Odd = true; - strcpy(InFileName1 , "ExtSortTempA."); - _itoa(0, Extension, 10); - strcat(InFileName1, Extension); - strcpy(OutFileName , "ExtSortTemp."); - _itoa(NumPairs, Extension, 10); - strcat(OutFileName, Extension); - rename(InFileName1, OutFileName); - - #ifdef DEBUG - cout << "Renaming " << InFileName1 << " as " << OutFileName << endl; - #endif - } - else - Odd = false; - - // Remove the temp files from before the merge. - for (k = 0; k < NumFiles; k++) - { - strcpy(InFileName1 , "ExtSortTempA."); - _itoa(k, Extension, 10); - strcat(InFileName1, Extension); - remove(InFileName1); - } - - // Get the new number of sorted files. - NumFiles = NumPairs; - if (Odd) - NumFiles++; - - // If number of ExtSortTemp files is now 2, merge them with output to original file and return. - if (NumFiles == 2) - { - Merge("ExtSortTemp.0", "ExtSortTemp.1", FileName); - remove("ExtSortTemp.0"); - remove("ExtSortTemp.1"); - return; // The sort is finished. - } - } - } - } - - -/* Given: InFile Text file stream already opened for input. - MoreData Indicates if there is more data from InFile to place into Buffer. - At the very least this indicates if there is an unused item in NextWord. - NextWord If MoreData is true, then NextWord contains an unprocessed word from - InFile that has yet to be placed into Buffer. - Task: If MoreData is true, this function places as much data as possible from InFile into - Buffer. This data includes NextWord and then as much new data as possible from InFile. - The Buffer will be filled if there is enough data to do so. - Return: In the function name is returned the number of words placed into Buffer. - InFile The modified file stream. - Buffer Contains the data that has been copied in from InFile. - MoreData Indicates if NextWord contains a word that has not yet been placed into Buffer. - NextWord If MoreData is true, this contains the next word from InFile that has - not yet been placed into Buffer. -*/ -int FillBuffer(fstream & InFile, BufType Buffer, bool & MoreData, StringType NextWord) - { - int k = 0; - - while (MoreData && (k < BufSize)) - { - strcpy(Buffer[k], NextWord); - InFile.getline(NextWord, MaxString); - if (InFile.fail()) - MoreData = false; - else - MoreData = true; - k++; - } - - return k; - } - - -/* Given: InFileName1 The name of the first text file to be merged. - InFileName2 The name of the second text file to be merged. - OutFileName The name of the file in which to place the merged data. - Task: To merge the sorted text files named InFileName1 and InFileName2 into one - named by OutFileName. - Return: Nothing directly, but the text file named by OutFileName is created and filled - with the sorted data from InFileName1 and InFileName2. -*/ -void Merge(StringType InFileName1, StringType InFileName2, StringType OutFileName) - { - fstream InFile1, InFile2, OutFile; - BufType Buffer1, Buffer2, Buffer3; - bool HaveData1, HaveData2, MoreData1, MoreData2; - StringType NextWord1, NextWord2; - int k, Result, NumWords1, NumWords2, Index1, Index2, Index3 = 0; - - InFile1.open(InFileName1, ios::in); - if (InFile1.fail()) - Error("Could not open the file named ", InFileName1); - - InFile2.open(InFileName2, ios::in); - if (InFile2.fail()) - Error("Could not open the file named ", InFileName2); - - OutFile.open(OutFileName, ios::out); - if (OutFile.fail()) - Error("Could not open the file named ", OutFileName); - - #ifdef DEBUG - cout << "Merging " << InFileName1 << " and " << InFileName2 << " to " << OutFileName << endl; - #endif - - // Try to fill a buffer from InFile1. - InFile1.getline(NextWord1, MaxString); - if (InFile1.fail()) - MoreData1 = false; - else - { - MoreData1 = true; - NumWords1 = FillBuffer(InFile1, Buffer1, MoreData1, NextWord1); - } - - // Try to fill a buffer from InFile2. - InFile2.getline(NextWord2, MaxString); - if (InFile2.fail()) - MoreData2 = false; - else - { - MoreData2 = true; - NumWords2 = FillBuffer(InFile2, Buffer2, MoreData2, NextWord2); - } - - Index1 = 0; - Index2 = 0; - // Each HaveData boolean indicates if we have more unmerged data. This data could be in - // the corresponding NextWord variable, the corresponding Buffer, or both. - HaveData1 = MoreData1 || (Index1 < NumWords1); - HaveData2 = MoreData2 || (Index2 < NumWords2); - - while (HaveData1 && HaveData2) - { - if (Index1 == NumWords1) - { - NumWords1 = FillBuffer(InFile1, Buffer1, MoreData1, NextWord1); - if (NumWords1 == 0) - break; // We are out of data from InFile1. - else - Index1 = 0; - } - if (Index2 == NumWords2) - { - NumWords2 = FillBuffer(InFile2, Buffer2, MoreData2, NextWord2); - if (NumWords2 == 0) - break; // We are out of data from InFile2. - else - Index2 = 0; - } - - Result = strcmp(Buffer1[Index1], Buffer2[Index2]); // case insensitive compare - - if (Result == 0) // Copy both data items. - { - Copy(Buffer1[Index1], Buffer3, Index3, OutFile); - Copy(Buffer2[Index2], Buffer3, Index3, OutFile); - Index1++; - Index2++; - } - else if (Result < 0) // Copy the first data item. - { - Copy(Buffer1[Index1], Buffer3, Index3, OutFile); - Index1++; - } - else // Result > 0, so copy the second data item. - { - Copy(Buffer2[Index2], Buffer3, Index3, OutFile); - Index2++; - } - - HaveData1 = MoreData1 || (Index1 < NumWords1); - HaveData2 = MoreData2 || (Index2 < NumWords2); - } - - // Handle remaining data in either file. - while (HaveData1) - { - if (Index1 == NumWords1) - { - NumWords1 = FillBuffer(InFile1, Buffer1, MoreData1, NextWord1); - if (NumWords1 == 0) - break; // We are out of data from InFile1. - else - Index1 = 0; - } - - Copy(Buffer1[Index1], Buffer3, Index3, OutFile); - Index1++; - HaveData1 = MoreData1 || (Index1 < NumWords1); - } - - while (HaveData2) - { - if (Index2 == NumWords2) - { - NumWords2 = FillBuffer(InFile2, Buffer2, MoreData2, NextWord2); - if (NumWords2 == 0) - break; // We are out of data from InFile2. - else - Index2 = 0; - } - - Copy(Buffer2[Index2], Buffer3, Index3, OutFile); - Index2++; - HaveData2 = MoreData2 || (Index2 < NumWords2); - } - - // Flush any remaining data from the output buffer. - for (k = 0; k < Index3; k++) - OutFile << Buffer3[k] << endl; - - #ifdef DEBUG - OutFile.seekp(0, ios_base::end); - cout << "Tamaño del archivo '" << OutFileName << "': " << OutFile.tellp() / 1024 << " KB\n"; - #endif - InFile1.close(); - InFile2.close(); - OutFile.close(); - } - - -/* Given: Word One word of data to be placed into Buffer. - Buffer A buffer that may already contain words. - Index The first unused index in Buffer. - OutFile A text file stream already opened for output. - Task: To place Word into Buffer. If the Buffer is full, the data - already in it is first written out to OutFile, then Word - is placed into Buffer as the only item present. - Return: Buffer The updated buffer. - Index The updated index of the first unused location in Buffer. - OutFile The possibly updated file stream. -*/ -void Copy(StringType Word, BufType Buffer, int & Index, fstream & OutFile) - { - int k; - - if (Index == BufSize) // There is no room in Buffer, so first write the Buffer data to OutFile. - { - for (k = 0; k < BufSize; k++) - OutFile << Buffer[k] << endl; - Index = 0; - } - - strcpy(Buffer[Index], Word); - Index++; - } - - -/* Given: First A char array string. - Second Another char array string. - Task: To swap these values. - Return: First Updated string. - Second Updated string. -*/ -inline void Swap(StringType First, StringType Second) - { - StringType Temp; - - strcpy(Temp, First); - strcpy(First, Second); - strcpy(Second, Temp); - } - - -/* Given: Buf Array of char array strings - Lower An index into the array. - Upper An index into the array. - Task: To quicksort the section of the array from index Lower to Upper. - Return: Buf The sorted array. -*/ -void QuickSort(BufType Buf, int Lower, int Upper) - { - int PivotIndex; - - if (Lower < Upper) - { - PivotIndex = Partition(Buf, Lower, Upper); - QuickSort(Buf, Lower, PivotIndex - 1); // sort left side - QuickSort(Buf, PivotIndex + 1, Upper); // sort right side - } - } - - -/* Given: Buf Array of char array strings. - Lower An index into the array. - Upper An index into the array. - Assume: Lower < Upper - Task: To partition the section of Buf between index Lower and - index Upper so that everything to the left of the returned - index is less or equal to the pivot item, which is Buf[Lower], - everything to the right of the returned index is greater - than the pivot item, and the pivot item itself is located - at the returned index. - Return: Buf The partitioned array. - In the function name this returns the index of the pivot value. -*/ -int Partition(BufType Buf, int Lower, int Upper) - { - int Left, Right; - StringType Pivot; - - strcpy(Pivot, Buf[Lower]); - Left = Lower; - Right = Upper; - - while (Left < Right) - { - // Scan from left, skipping items that belong there. Use case-insensitive compare. - while ((strcmp(Buf[Left], Pivot) <= 0) && (Left < Upper)) - Left++; - // Scan from right, skipping items that belong there. Use case-insensitive compare. - while (strcmp(Buf[Right], Pivot) > 0) - Right--; - if (Left < Right) - Swap(Buf[Left], Buf[Right]); - } - - strcpy(Buf[Lower], Buf[Right]); - strcpy(Buf[Right], Pivot); - return Right; // return the pivot index - } - - - -/* Given: MsgA, MsgB Two messages (string objects) to display. - Task: Display MsgA and MsgB, then exit the program. - Return: Nothing to the program, but does return an error code of 1 to the operating system. -*/ -void Error(const string & MsgA, const string & MsgB) - { - cerr << "Error: " << MsgA << MsgB << endl; - exit(1); - } - diff --git a/emufs/external_sort/extsort.h b/emufs/external_sort/extsort.h index 8001bbd..8ca30b4 100644 --- a/emufs/external_sort/extsort.h +++ b/emufs/external_sort/extsort.h @@ -1,51 +1,46 @@ -/* Filename: extsort.h - - Programmer: Br. David Carlson - - Date: April 18, 2003 - - This header file sets up the constants, types, and function prototypes used by the - ExtSort program. -*/ - -#include -#include -#include -using namespace std; - - -// Comment off the following line if you do not want to see debugging information. -//#define DEBUG - -enum { MaxString = 32, BufSize = 2048, _MAX_PATH = 64 }; - - -typedef char StringType[MaxString]; - -typedef StringType BufType[BufSize]; // A buffer will hold 2048 strings, for a total of 64K bytes. - -typedef char PathType[_MAX_PATH]; - - -void Error(const string & Msg); - -void Error(const string & MsgA, const string & MsgB); - -int MakeSortedRuns(fstream & DataFile, PathType FileName); - -bool LessThan(StringType First, StringType Second); - -void QuickSort(BufType Buf, int Lower, int Upper); - -void Swap(StringType First, StringType Second); - -int Partition(BufType Buf, int Lower, int Upper); - -void Merge(StringType InFileName1, StringType InFileName2, StringType OutFileName); - -int FillBuffer(fstream & InFile, BufType Buffer, bool & HaveData, StringType NextWord); - -void Copy(StringType Word, BufType Buffer, int & Index, fstream & OutFile); - -void HandleMerges(int NumFiles, PathType FileName); +/* vim: set noexpandtab tabstop=4 shiftwidth=4: + *---------------------------------------------------------------------------- + * emufs + *---------------------------------------------------------------------------- + * This file is part of emufs. + * + * emufs is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) any later + * version. + * + * emufs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with emufs; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place, Suite 330, Boston, MA 02111-1307 USA + *---------------------------------------------------------------------------- + * Creado: dom may 30 07:48:10 ART 2004 + * Autores: Leandro Lucarella + *---------------------------------------------------------------------------- + * + * $Id$ + * + */ + +/** \file + * + * Algoritmo de ordenamiento externo. + * + * Interfaz del algoritmo de ordenamiento externo. + * + */ + +#ifndef _EXTSORT_EXTSORT_H_ +#define _EXTSORT_EXTSORT_H_ + +#include "base.h" +#include + +int extsort(const char* filename, size_t buf_size, size_t reg_size, CMP_FUNC cmp); + +#endif /* _EXTSORT_EXTSORT_H_ */ diff --git a/emufs/external_sort/mergefile.h b/emufs/external_sort/mergefile.h index 44c5fdd..221a055 100644 --- a/emufs/external_sort/mergefile.h +++ b/emufs/external_sort/mergefile.h @@ -22,7 +22,7 @@ * Autores: Leandro Lucarella *---------------------------------------------------------------------------- * - * $Id: tipo1.h 542 2004-05-28 19:45:02Z rmarkie $ + * $Id$ * */ @@ -35,8 +35,8 @@ * */ -#ifndef _MERGEFILE_H_ -#define _MERGEFILE_H_ +#ifndef _EXTSORT_MERGEFILE_H_ +#define _EXTSORT_MERGEFILE_H_ #include @@ -62,5 +62,5 @@ int mergefile_peek(MERGEFILE* mf); int mergefile_has_more(MERGEFILE* mf); -#endif /* _MERGEFILE_H_ */ +#endif /* _EXTSORT_MERGEFILE_H_ */ diff --git a/emufs/external_sort/mergepool.h b/emufs/external_sort/mergepool.h index 49dee6a..f3dcdf1 100644 --- a/emufs/external_sort/mergepool.h +++ b/emufs/external_sort/mergepool.h @@ -35,8 +35,8 @@ * */ -#ifndef _MERGEPOOL_H_ -#define _MERGEPOOL_H_ +#ifndef _EXTSORT_MERGEPOOL_H_ +#define _EXTSORT_MERGEPOOL_H_ #include "mergefile.h" @@ -58,5 +58,5 @@ int mergepool_append_data(MERGEPOOL* mp, int data); int mergepool_pop_min(MERGEPOOL* mp, int* min); -#endif /* _MERGEPOOL_H_ */ +#endif /* _EXTSORT_MERGEPOOL_H_ */ diff --git a/emufs/external_sort/s_ext.c b/emufs/external_sort/s_ext.c deleted file mode 100644 index fc48d82..0000000 --- a/emufs/external_sort/s_ext.c +++ /dev/null @@ -1,498 +0,0 @@ -/* external sort */ - -#include -#include -#include - -/**************************** - * implementation dependent * - ****************************/ - -/* template for workfiles (8.3 format) */ -#define FNAME "_sort%03d.dat" -#define LNAME 13 - -/* comparison operators */ -#define compLT(x,y) (x < y) -#define compGT(x,y) (x > y) - -/* define the record to be sorted here */ -#define LRECL 100 -typedef int keyType; -typedef struct recTypeTag { - keyType key; /* sort key for record */ - #if LRECL - char data[LRECL-sizeof(keyType)]; /* other fields */ - #endif -} recType; - -/****************************** - * implementation independent * - ******************************/ - -typedef enum {false, true} bool; - -typedef struct tmpFileTag { - FILE *fp; /* file pointer */ - char name[LNAME]; /* filename */ - recType rec; /* last record read */ - int dummy; /* number of dummy runs */ - bool eof; /* end-of-file flag */ - bool eor; /* end-of-run flag */ - bool valid; /* true if rec is valid */ - int fib; /* ideal fibonacci number */ -} tmpFileType; - -static tmpFileType **file; /* array of file info for tmp files */ -static int nTmpFiles; /* number of tmp files */ -static char *ifName; /* input filename */ -static char *ofName; /* output filename */ - -static int level; /* level of runs */ -static int nNodes; /* number of nodes for selection tree */ - -void deleteTmpFiles(void) { - int i; - - /* delete merge files and free resources */ - if (file) { - for (i = 0; i < nTmpFiles; i++) { - if (file[i]) { - if (file[i]->fp) fclose(file[i]->fp); - if (*file[i]->name) remove(file[i]->name); - free (file[i]); - } - } - free (file); - } -} - -void termTmpFiles(int rc) { - - /* cleanup files */ - remove(ofName); - if (rc == 0) { - int fileT; - - /* file[T] contains results */ - fileT = nTmpFiles - 1; - fclose(file[fileT]->fp); file[fileT]->fp = NULL; - if (rename(file[fileT]->name, ofName)) { - perror("io1"); - deleteTmpFiles(); - exit(1); - } - *file[fileT]->name = 0; - } - deleteTmpFiles(); -} - -void cleanExit(int rc) { - - /* cleanup tmp files and exit */ - termTmpFiles(rc); - exit(rc); -} - -void *safeMalloc(size_t size) { - void *p; - - /* safely allocate memory and initialize to zero */ - if ((p = calloc(1, size)) == NULL) { - printf("error: malloc failed, size = %d\n", size); - cleanExit(1); - } - return p; -} - -void initTmpFiles(void) { - int i; - tmpFileType *fileInfo; - - /* initialize merge files */ - if (nTmpFiles < 3) nTmpFiles = 3; - file = safeMalloc(nTmpFiles * sizeof(tmpFileType*)); - fileInfo = safeMalloc(nTmpFiles * sizeof(tmpFileType)); - for (i = 0; i < nTmpFiles; i++) { - file[i] = fileInfo + i; - sprintf(file[i]->name, FNAME, i); - if ((file[i]->fp = fopen(file[i]->name, "w+b")) == NULL) { - perror("io2"); - cleanExit(1); - } - } -} - -recType *readRec(void) { - - typedef struct iNodeTag { /* internal node */ - struct iNodeTag *parent;/* parent of internal node */ - struct eNodeTag *loser; /* external loser */ - } iNodeType; - - typedef struct eNodeTag { /* external node */ - struct iNodeTag *parent;/* parent of external node */ - recType rec; /* input record */ - int run; /* run number */ - bool valid; /* input record is valid */ - } eNodeType; - - typedef struct nodeTag { - iNodeType i; /* internal node */ - eNodeType e; /* external node */ - } nodeType; - - static nodeType *node; /* array of selection tree nodes */ - static eNodeType *win; /* new winner */ - static FILE *ifp; /* input file */ - static bool eof; /* true if end-of-file, input */ - static int maxRun; /* maximum run number */ - static int curRun; /* current run number */ - iNodeType *p; /* pointer to internal nodes */ - static bool lastKeyValid; /* true if lastKey is valid */ - static keyType lastKey; /* last key written */ - - /* read next record using replacement selection */ - - /* check for first call */ - if (node == NULL) { - int i; - - if (nNodes < 2) nNodes = 2; - node = safeMalloc(nNodes * sizeof(nodeType)); - for (i = 0; i < nNodes; i++) { - node[i].i.loser = &node[i].e; - node[i].i.parent = &node[i/2].i; - node[i].e.parent = &node[(nNodes + i)/2].i; - node[i].e.run = 0; - node[i].e.valid = false; - } - win = &node[0].e; - lastKeyValid = false; - - if ((ifp = fopen(ifName, "rb")) == NULL) { - printf("error: file %s, unable to open\n", ifName); - cleanExit(1); - } - } - - while (1) { - - /* replace previous winner with new record */ - if (!eof) { - if (fread(&win->rec, sizeof(recType), 1, ifp) == 1) { - if ((!lastKeyValid || compLT(win->rec.key, lastKey)) - && (++win->run > maxRun)) - maxRun = win->run; - win->valid = true; - } else if (feof(ifp)) { - fclose(ifp); - eof = true; - win->valid = false; - win->run = maxRun + 1; - } else { - perror("io4"); - cleanExit(1); - } - } else { - win->valid = false; - win->run = maxRun + 1; - } - - /* adjust loser and winner pointers */ - p = win->parent; - do { - bool swap; - swap = false; - if (p->loser->run < win->run) { - swap = true; - } else if (p->loser->run == win->run) { - if (p->loser->valid && win->valid) { - if (compLT(p->loser->rec.key, win->rec.key)) - swap = true; - } else { - swap = true; - } - } - if (swap) { - /* p should be winner */ - eNodeType *t; - - t = p->loser; - p->loser = win; - win = t; - } - p = p->parent; - } while (p != &node[0].i); - - /* end of run? */ - if (win->run != curRun) { - /* win->run = curRun + 1 */ - if (win->run > maxRun) { - /* end of output */ - free(node); - return NULL; - } - curRun = win->run; - } - - /* output top of tree */ - if (win->run) { - lastKey = win->rec.key; - lastKeyValid = true; - return &win->rec; - } - } -} - -void makeRuns(void) { - recType *win; /* winner */ - int fileT; /* last file */ - int fileP; /* next to last file */ - int j; /* selects file[j] */ - - - /* Make initial runs using replacement selection. - * Runs are written using a Fibonacci distintbution. - */ - - /* initialize file structures */ - fileT = nTmpFiles - 1; - fileP = fileT - 1; - for (j = 0; j < fileT; j++) { - file[j]->fib = 1; - file[j]->dummy = 1; - } - file[fileT]->fib = 0; - file[fileT]->dummy = 0; - - level = 1; - j = 0; - - - win = readRec(); - while (win) { - bool anyrun; - - anyrun = false; - for (j = 0; win && j <= fileP; j++) { - bool run; - - run = false; - if (file[j]->valid) { - if (!compLT(win->key, file[j]->rec.key)) { - /* append to an existing run */ - run = true; - } else if (file[j]->dummy) { - /* start a new run */ - file[j]->dummy--; - run = true; - } - } else { - /* first run in file */ - file[j]->dummy--; - run = true; - } - - if (run) { - anyrun = true; - - /* flush run */ - while(1) { - if (fwrite(win, sizeof(recType), 1, file[j]->fp) != 1) { - perror("io3"); - cleanExit(1); - } - file[j]->rec.key = win->key; - file[j]->valid = true; - if ((win = readRec()) == NULL) break; - if (compLT(win->key, file[j]->rec.key)) break; - } - } - } - - /* if no room for runs, up a level */ - if (!anyrun) { - int t; - level++; - t = file[0]->fib; - for (j = 0; j <= fileP; j++) { - file[j]->dummy = t + file[j+1]->fib - file[j]->fib; - file[j]->fib = t + file[j+1]->fib; - } - } - } -} - -void rewindFile(int j) { - /* rewind file[j] and read in first record */ - file[j]->eor = false; - file[j]->eof = false; - rewind(file[j]->fp); - if (fread(&file[j]->rec, sizeof(recType), 1, file[j]->fp) != 1) { - if (feof(file[j]->fp)) { - file[j]->eor = true; - file[j]->eof = true; - } else { - perror("io5"); - cleanExit(1); - } - } -} - -void mergeSort(void) { - int fileT; - int fileP; - int j; - tmpFileType *tfile; - - /* polyphase merge sort */ - - fileT = nTmpFiles - 1; - fileP = fileT - 1; - - /* prime the files */ - for (j = 0; j < fileT; j++) { - rewindFile(j); - } - - /* each pass through loop merges one run */ - while (level) { - while(1) { - bool allDummies; - bool anyRuns; - - /* scan for runs */ - allDummies = true; - anyRuns = false; - for (j = 0; j <= fileP; j++) { - if (!file[j]->dummy) { - allDummies = false; - if (!file[j]->eof) anyRuns = true; - } - } - - if (anyRuns) { - int k; - keyType lastKey; - - /* merge 1 run file[0]..file[P] --> file[T] */ - - while(1) { - /* each pass thru loop writes 1 record to file[fileT] */ - - /* find smallest key */ - k = -1; - for (j = 0; j <= fileP; j++) { - if (file[j]->eor) continue; - if (file[j]->dummy) continue; - if (k < 0 || - (k != j && compGT(file[k]->rec.key, file[j]->rec.key))) - k = j; - } - if (k < 0) break; - - /* write record[k] to file[fileT] */ - if (fwrite(&file[k]->rec, sizeof(recType), 1, - file[fileT]->fp) != 1) { - perror("io6"); - cleanExit(1); - } - - /* replace record[k] */ - lastKey = file[k]->rec.key; - if (fread(&file[k]->rec, sizeof(recType), 1, - file[k]->fp) == 1) { - /* check for end of run on file[s] */ - if (compLT(file[k]->rec.key, lastKey)) - file[k]->eor = true; - } else if (feof(file[k]->fp)) { - file[k]->eof = true; - file[k]->eor = true; - } else { - perror("io7"); - cleanExit(1); - } - } - - /* fixup dummies */ - for (j = 0; j <= fileP; j++) { - if (file[j]->dummy) file[j]->dummy--; - if (!file[j]->eof) file[j]->eor = false; - } - - } else if (allDummies) { - for (j = 0; j <= fileP; j++) - file[j]->dummy--; - file[fileT]->dummy++; - } - - /* end of run */ - if (file[fileP]->eof && !file[fileP]->dummy) { - /* completed a fibonocci-level */ - level--; - if (!level) { - /* we're done, file[fileT] contains data */ - return; - } - - /* fileP is exhausted, reopen as new */ - fclose(file[fileP]->fp); - if ((file[fileP]->fp = fopen(file[fileP]->name, "w+b")) - == NULL) { - perror("io8"); - cleanExit(1); - } - file[fileP]->eof = false; - file[fileP]->eor = false; - - rewindFile(fileT); - - /* f[0],f[1]...,f[fileT] <-- f[fileT],f[0]...,f[T-1] */ - tfile = file[fileT]; - memmove(file + 1, file, fileT * sizeof(tmpFileType*)); - file[0] = tfile; - - /* start new runs */ - for (j = 0; j <= fileP; j++) - if (!file[j]->eof) file[j]->eor = false; - } - } - - } -} - - -void extSort(void) { - initTmpFiles(); - makeRuns(); - mergeSort(); - termTmpFiles(0); -} - -int main(int argc, char *argv[]) { - - /* command-line: - * - * ext ifName ofName nTmpFiles nNodes - * - * ext in.dat out.dat 5 2000 - * reads in.dat, sorts using 5 files and 2000 nodes, output to out.dat - */ - if (argc != 5) { - printf("%s ifName ofName nTmpFiles nNodes\n", argv[0]); - cleanExit(1); - } - - ifName = argv[1]; - ofName = argv[2]; - nTmpFiles = atoi(argv[3]); - nNodes = atoi(argv[4]); - - printf("extSort: nFiles=%d, nNodes=%d, lrecl=%d\n", - nTmpFiles, nNodes, sizeof(recType)); - - extSort(); - - return 0; -} diff --git a/emufs/external_sort/sort_test.c b/emufs/external_sort/sort_test.c index 0814e98..6417c91 100644 --- a/emufs/external_sort/sort_test.c +++ b/emufs/external_sort/sort_test.c @@ -1,9 +1,6 @@ -#include "bufford.h" -#include "mergepool.h" +#include "extsort.h" #include -#include -#include /* typedef struct @@ -24,6 +21,7 @@ int cmp(void* x, void* y) return 0; } +/* void buffer_dump(BUFFORD* b) { int* c; @@ -32,83 +30,14 @@ void buffer_dump(BUFFORD* b) printf("%i ", *c); printf("\n"); } +*/ int main(int argc, char* argv[]) { - MERGEPOOL* pool; - FILE* fp; - int i; - BUFFORD* b = bufford_new(8, sizeof(int), &cmp); - if (!b) return 1; - if (!(pool = mergepool_new())) { - bufford_delete(b); - return 2; - } - if (!(fp = fopen(argv[1], "r"))) { - mergepool_delete(pool); - bufford_delete(b); - return 12; - } - /* lleno el buffer */ - while (1) { - int reg; - fscanf(fp, "%i", ®); - if (feof(fp)) break; - if (!bufford_push(b, ®)) break; - } - printf("Buffer lleno, hago el primer lote...\n"); - buffer_dump(b); - /* Hago lotes */ - i = 0; - while (!bufford_empty(b)) { - int* c; - int x; - printf("Creando lote %i:\n", i); - if (!mergepool_add_file(pool)) { - fclose(fp); - mergepool_delete(pool); - bufford_delete(b); - return 3; - } - for (c = bufford_pop_min(b); c; c = bufford_pop_next(b, &x)) { - /* si hay un valor en la entrada, lo inserto */ - if (!feof(fp)) fscanf(fp, "%i", &x); - if (!feof(fp) && !bufford_push(b, &x)) { - /* ERROR: no se pudo insertar */ - fclose(fp); - mergepool_delete(pool); - bufford_delete(b); - return 4; - } - x = *c; - free(c); - printf("%- 8i\t", x); - if (!mergepool_append_data(pool, x)) { - printf("No se pudo escribir en el mergefile %i\n", i); - fclose(fp); - mergepool_delete(pool); - bufford_delete(b); - return 4; - } - buffer_dump(b); - } - printf("Lote %i finalizado!\n\n", i); - i++; - } - fclose(fp); - /* Mezclo lotes */ - { - int min; - FILE* fp = fopen("salida.txt", "w"); - assert(fp); - printf("Abriendo archivos y buscando mínimo...\n"); - printf("Iterando...\n"); - /* voy obteniendo el próximo mínimo y guardándolo en el archivo de salida */ - while (mergepool_pop_min(pool, &min)) fprintf(fp, "%i\n", min); - fclose(fp); - } - mergepool_delete(pool); - bufford_delete(b); + if (extsort(argv[1], 256, sizeof(int), &cmp)) + printf("Archivo ordenado!\n"); + else + printf("No se pudo ordenar archivo!\n"); return 0; } -- 2.43.0