################
# Nombre del ejecutable.
-TARGETS = s_ext bufford_test sort_test
+TARGETS = bufford_test sort_test
# Opciones para el compilador C.
CFLAGS = -Wall -ggdb -DDEBUG
all: $(TARGETS)
bufford_test: bufford.o bufford_test.o
-sort_test: bufford.o mergefile.o mergepool.o sort_test.o
+sort_test: bufford.o mergefile.o mergepool.o extsort.o sort_test.o
clean:
@$(RM) -fv *.o $(TARGETS)
--- /dev/null
+/* vim: set noexpandtab tabstop=4 shiftwidth=4:
+ *----------------------------------------------------------------------------
+ * emufs
+ *----------------------------------------------------------------------------
+ * This file is part of emufs.
+ *
+ * emufs is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free Software
+ * Foundation; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * emufs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with emufs; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place, Suite 330, Boston, MA 02111-1307 USA
+ *----------------------------------------------------------------------------
+ * Creado: dom may 30 07:48:10 ART 2004
+ * Autores: Leandro Lucarella <llucare@fi.uba.ar>
+ *----------------------------------------------------------------------------
+ *
+ * $Id: bufford.h 609 2004-05-30 10:14:21Z llucare $
+ *
+ */
+
+/** \file
+ *
+ * Algoritmo de ordenamiento externo.
+ *
+ * Interfaz del algoritmo de ordenamiento externo.
+ *
+ */
+
+#ifndef _EXTSORT_BASE_H_
+#define _EXTSORT_BASE_H_
+
+/** Función utilizada para comparar al ordenar los datos.
+ * \return 0 si son iguales, mayor que cero si el primer argumento es mayor y
+ * menor que cero si el primer argumento es menor.
+ */
+typedef int (*CMP_FUNC)(void*, void*);
+
+#endif /* _EXTSORT_BASE_H_ */
+
* Autores: Leandro Lucarella <llucare@fi.uba.ar>
*----------------------------------------------------------------------------
*
- * $Id: tipo1.c 548 2004-05-28 22:44:27Z llucare $
+ * $Id$
*
*/
BUFFORD* b = malloc(sizeof(BUFFORD));
if (!b) return 0;
assert(cmp);
+ assert(size > 0);
assert(reg_size > 0);
- assert(size > reg_size);
b->cmp = cmp;
b->size = 0;
b->root = 0;
if (!b) return 0;
assert(cmp);
assert(reg_size > 0);
- assert(size > reg_size);
+ assert(size > (reg_size + sizeof(BUFFORD_NODE)));
b->cmp = cmp;
b->size = 0;
b->root = 0;
* Autores: Leandro Lucarella <llucare@fi.uba.ar>
*----------------------------------------------------------------------------
*
- * $Id: tipo1.h 542 2004-05-28 19:45:02Z rmarkie $
+ * $Id$
*
*/
*
*/
-#ifndef _EXTERNAL_SORT_BUFFORD_H_
-#define _EXTERNAL_SORT_BUFFORD_H_
+#ifndef _EXTSORT_BUFFORD_H_
+#define _EXTSORT_BUFFORD_H_
+#include "base.h"
#include <stdlib.h>
-/** Función utilizada para comparar al ordenar los datos.
- * \return 0 si son iguales, mayor que cero si el primer argumento es mayor y
- * menor que cero si el primer argumento es menor.
- */
-typedef int (*CMP_FUNC)(void*, void*);
-
/** Nodo del buffer ordenado (uso interno). */
typedef struct _BUFFORD_NODE
{
*/
void* bufford_get_next(BUFFORD* buff, void* min);
-#endif /* _EXTERNAL_SORT_BUFFORD_H_ */
+#endif /* _EXTSORT_BUFFORD_H_ */
--- /dev/null
+/* vim: set noexpandtab tabstop=4 shiftwidth=4:
+ *----------------------------------------------------------------------------
+ * emufs
+ *----------------------------------------------------------------------------
+ * This file is part of emufs.
+ *
+ * emufs is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free Software
+ * Foundation; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * emufs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with emufs; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place, Suite 330, Boston, MA 02111-1307 USA
+ *----------------------------------------------------------------------------
+ * Creado: dom may 30 07:48:10 ART 2004
+ * Autores: Leandro Lucarella <llucare@fi.uba.ar>
+ *----------------------------------------------------------------------------
+ *
+ * $Id: bufford.h 609 2004-05-30 10:14:21Z llucare $
+ *
+ */
+
+/** \file
+ *
+ * Algoritmo de ordenamiento externo.
+ *
+ * Implementación del algoritmo de ordenamiento externo.
+ *
+ */
+
+#include "extsort.h"
+#include "bufford.h"
+#include "mergepool.h"
+#include <string.h>
+#include <assert.h>
+
+int extsort(const char* filename, size_t buf_size, size_t reg_size, CMP_FUNC cmp)
+{
+ BUFFORD* b;
+ MERGEPOOL* pool;
+ FILE* fp;
+ int min;
+ assert(filename);
+ assert(reg_size);
+ assert(buf_size);
+ assert(cmp);
+ /* creo buffer ordenado */
+ if (!(b = bufford_new_bytes_size(buf_size, reg_size, cmp))) {
+ return 0; /* ERROR */
+ }
+ /* creo pool de archivos temporales ordenados */
+ if (!(pool = mergepool_new())) {
+ bufford_delete(b);
+ return 0; /* ERROR */
+ }
+ /* abro archivo a ordenar */
+ if (!(fp = fopen(filename, "r"))) {
+ mergepool_delete(pool);
+ bufford_delete(b);
+ return 0; /* ERROR */
+ }
+ /* lleno el buffer */
+ while (1) {
+ int reg;
+ if (fscanf(fp, "%i", ®) < 1) break; /* EOF */
+ if (!bufford_push(b, ®)) break; /* buffer lleno */
+ }
+ /* creo archivos temporales ordenados usando el buffer */
+ while (!bufford_empty(b)) {
+ int* c;
+ int x;
+ /* agrego un nuevo archivo temporal al pool */
+ if (!mergepool_add_file(pool)) {
+ fclose(fp);
+ mergepool_delete(pool);
+ bufford_delete(b);
+ return 0; /* ERROR */
+ }
+ /* lleno el archivo temporal ordenadamente usando el buffer */
+ for (c = bufford_pop_min(b); c; c = bufford_pop_next(b, &x)) {
+ /* si hay un valor en la entrada, lo inserto */
+ if ((fscanf(fp, "%i", &x) > 0) && !bufford_push(b, &x)) {
+ /* ERROR: no se pudo insertar */
+ fclose(fp);
+ mergepool_delete(pool);
+ bufford_delete(b);
+ return 0; /* ERROR */
+ }
+ x = *c;
+ free(c);
+ /* Agrego el dato al último archivo temporal */
+ if (!mergepool_append_data(pool, x)) {
+ fclose(fp);
+ mergepool_delete(pool);
+ bufford_delete(b);
+ return 0; /* ERROR */
+ }
+ }
+ }
+ fclose(fp);
+ /* Mezclo lotes */
+ if (!(fp = fopen(filename, "w"))) {
+ mergepool_delete(pool);
+ bufford_delete(b);
+ }
+ /* obtengo próximo mínimo y lo guardo en el archivo de salida */
+ while (mergepool_pop_min(pool, &min)) fprintf(fp, "%i\n", min);
+ /* libero todos los recursos */
+ fclose(fp);
+ mergepool_delete(pool);
+ bufford_delete(b);
+ return 1; /* OK */
+}
+
+++ /dev/null
-/* Filename: extsort.cpp
-
- Programmer: Br. David Carlson
-
- Date: April 18, 2003
-
- This program does a case-insensitive sort of the text file that the user supplies
- when prompted by the program. It is assumed that each line of the file contains a
- word and is no more than 31 characters in length. No attempt is made to order in any
- way words that are identical except for capitalization. (For example, there is no
- guarantee what order the sort will place the words MacIntosh and Macintosh since
- they are seen as identical. Duplicate words such as these are kept in the file.)
- The output data is placed back under the original file name.
-
- Tested with:
- Microsoft Visual C++ 6.0
- Microsoft Visual C++ .NET
- This program will not work as is with g++ under Linux due to its use of items such as
- the constant _MAX_PATH as well as the functions _itoa, and stricmp.
-*/
-
-#include "extsort.h"
-#include <sstream>
-
-void _itoa(int n, char* ext, int size)
-{
- std::ostringstream oss;
- oss << n;
- strncpy(ext, oss.str().c_str(), size);
-}
-
-int main(void)
- {
- PathType FileName;
- fstream DataFile;
- int NumRuns;
-
- cout << "Enter the name of the text file to be sorted: ";
- cin.getline(FileName, _MAX_PATH);
-
- DataFile.open(FileName, ios::in);
- if (DataFile.fail())
- Error("Could not open the file named ", FileName);
-
- // Copy sections of the data file into sorted runs.
- NumRuns = MakeSortedRuns(DataFile, FileName);
- DataFile.close();
-
- // Merge the sorted runs until all of the data is in one sorted file (under the original FileName).
- HandleMerges(NumRuns, FileName);
-
- return 0;
- }
-
-
-/* Given: DataFile A text file stream already opened for input.
- FileName The name of this text file (as a char array string).
- Task: To copy data from this file stream into sorted files (runs).
- Return: DataFile The modified file stream is trivially modified by reading from it.
- In the function name the number of runs is returned.
- The sorted runs themselves are created and stored under the file names:
- ExtSortTemp.0, ExtSortTemp.1, etc.
-*/
-int MakeSortedRuns(fstream & DataFile, PathType FileName)
- {
- fstream OutFile;
- StringType Word, Extension;
- PathType OutFileName;
- int NumWords, k, NumFiles = 0;
- BufType Buffer;
- bool MoreData;
-
- // Repeatedly fill one buffer and quicksort it, writing the buffer out to a temp file.
- DataFile.getline(Word, MaxString);
- if (DataFile.fail())
- MoreData = false;
- else
- MoreData = true;
-
- while (MoreData)
- {
- // Fill one buffer.
- NumWords = FillBuffer(DataFile, Buffer, MoreData, Word);
- QuickSort(Buffer, 0, NumWords - 1);
-
- // Construct the temp file name.
- strcpy(OutFileName, "ExtSortTemp.");
- _itoa(NumFiles, Extension, 10); // Convert the int in NumFiles to a char array string.
- strcat(OutFileName, Extension);
-
- OutFile.open(OutFileName, ios::out);
- if (OutFile.fail())
- Error("Could not open the file named ", OutFileName);
-
- for (k = 0; k < NumWords; k++)
- OutFile << Buffer[k] << endl;
-
- #ifdef DEBUG
- OutFile.seekp(0, ios_base::end);
- cout << "Tamaño del archivo '" << OutFileName << "': " << OutFile.tellp() / 1024 << " KB\n";
- #endif
- OutFile.close();
- NumFiles++;
- }
-
- return NumFiles;
- }
-
-
-/* Given: NumFiles Number of sorted runs (files) to be merged.
- FileName The name of the text file to contain the merged data.
- Indirectly this function is also given the sorted runs, which are assumed to be
- files named ExtSortTemp.0, ExtSortTemp.1, etc.
- Task: To merge the data from these sorted runs into one combined, sorted file with
- the name given by FileName.
- Return: Nothing directly, but the file named by FileName is modified (by being sorted).
-*/
-void HandleMerges(int NumFiles, PathType FileName)
- {
- StringType Extension;
- PathType OutFileName, InFileName1, InFileName2;
- int k, NumPairs, Count;
- bool Odd;
-
- // Repeatedly merge 2 runs, writing data to a new temp file (but write last one to original file).
- if (NumFiles == 0) // No data is present, so there is no work to do.
- return;
- else if (NumFiles == 1)
- {
- // Remove original file and rename the temp file using name of the original.
- remove(FileName);
- rename("ExtSortTemp.0", FileName);
-
- #ifdef DEBUG
- cout << "Renaming ExtSortTemp.0 as " << FileName << endl;
- #endif
-
- return; // The sort is finished.
- }
- else if (NumFiles == 2)
- {
- // Merge the 2 sorted temp files into original file.
- Merge("ExtSortTemp.0", "ExtSortTemp.1", FileName);
- remove("ExtSortTemp.0");
- remove("ExtSortTemp.1");
- return; // The sort is finished.
- }
- else // We have 3 or more files.
- {
- // Merge temp files, 2 at a time, until only 2 remain, then proceed as in last case.
- while (NumFiles > 2)
- {
- // Merge ExtSortTemp.0 and ExtSortTemp.1 into ExtSortTempA.0,
- // merge ExtSortTemp.2 and ExtSortTemp.3 into ExtSortTempA.1, etc.
- NumPairs = NumFiles / 2;
- for (k = 0, Count = 0; k < NumPairs; k++)
- {
- strcpy(InFileName1 , "ExtSortTemp.");
- _itoa(Count++, Extension, 10);
- strcat(InFileName1, Extension);
- strcpy(InFileName2 , "ExtSortTemp.");
- _itoa(Count++, Extension, 10);
- strcat(InFileName2, Extension);
- strcpy(OutFileName , "ExtSortTempA.");
- _itoa(k, Extension, 10);
- strcat(OutFileName, Extension);
- Merge(InFileName1, InFileName2, OutFileName);
- }
- // If the number of files is odd, just rename the last one.
- if (2 * NumPairs < NumFiles)
- {
- Odd = true;
- strcpy(InFileName1 , "ExtSortTemp.");
- _itoa(Count, Extension, 10);
- strcat(InFileName1, Extension);
- strcpy(OutFileName , "ExtSortTempA.");
- _itoa(NumPairs, Extension, 10);
- strcat(OutFileName, Extension);
- rename(InFileName1, OutFileName);
-
- #ifdef DEBUG
- cout << "Renaming " << InFileName1 << " as " << OutFileName << endl;
- #endif
- }
- else
- Odd = false;
-
- // Remove the temp files from before the merge.
- for (k = 0; k < NumFiles; k++)
- {
- strcpy(InFileName1 , "ExtSortTemp.");
- _itoa(k, Extension, 10);
- strcat(InFileName1, Extension);
- remove(InFileName1);
- }
-
- // Get the new number of sorted files.
- NumFiles = NumPairs;
- if (Odd)
- NumFiles++;
-
- // If number of ExtSortTempA files is now 2, merge them with output to original file and return.
- if (NumFiles == 2)
- {
- Merge("ExtSortTempA.0", "ExtSortTempA.1", FileName);
- remove("ExtSortTempA.0");
- remove("ExtSortTempA.1");
- return; // The sort is finished.
- }
-
- // Otherwise, merge pairs of files as above, but start at the top end so as to
- // incorporate any small remainder file. Note that we now merge from the
- // ExtSortTempA files into ExtSortTemp files.
- NumPairs = NumFiles / 2;
- for (k = 0, Count = NumFiles - 1; k < NumPairs; k++)
- {
- strcpy(InFileName1 , "ExtSortTempA.");
- _itoa(Count--, Extension, 10);
- strcat(InFileName1, Extension);
- strcpy(InFileName2 , "ExtSortTempA.");
- _itoa(Count--, Extension, 10);
- strcat(InFileName2, Extension);
- strcpy(OutFileName , "ExtSortTemp.");
- _itoa(k, Extension, 10);
- strcat(OutFileName, Extension);
- Merge(InFileName1, InFileName2, OutFileName);
- }
- // If the number of files is odd, just rename the last one.
- if (2 * NumPairs < NumFiles)
- {
- Odd = true;
- strcpy(InFileName1 , "ExtSortTempA.");
- _itoa(0, Extension, 10);
- strcat(InFileName1, Extension);
- strcpy(OutFileName , "ExtSortTemp.");
- _itoa(NumPairs, Extension, 10);
- strcat(OutFileName, Extension);
- rename(InFileName1, OutFileName);
-
- #ifdef DEBUG
- cout << "Renaming " << InFileName1 << " as " << OutFileName << endl;
- #endif
- }
- else
- Odd = false;
-
- // Remove the temp files from before the merge.
- for (k = 0; k < NumFiles; k++)
- {
- strcpy(InFileName1 , "ExtSortTempA.");
- _itoa(k, Extension, 10);
- strcat(InFileName1, Extension);
- remove(InFileName1);
- }
-
- // Get the new number of sorted files.
- NumFiles = NumPairs;
- if (Odd)
- NumFiles++;
-
- // If number of ExtSortTemp files is now 2, merge them with output to original file and return.
- if (NumFiles == 2)
- {
- Merge("ExtSortTemp.0", "ExtSortTemp.1", FileName);
- remove("ExtSortTemp.0");
- remove("ExtSortTemp.1");
- return; // The sort is finished.
- }
- }
- }
- }
-
-
-/* Given: InFile Text file stream already opened for input.
- MoreData Indicates if there is more data from InFile to place into Buffer.
- At the very least this indicates if there is an unused item in NextWord.
- NextWord If MoreData is true, then NextWord contains an unprocessed word from
- InFile that has yet to be placed into Buffer.
- Task: If MoreData is true, this function places as much data as possible from InFile into
- Buffer. This data includes NextWord and then as much new data as possible from InFile.
- The Buffer will be filled if there is enough data to do so.
- Return: In the function name is returned the number of words placed into Buffer.
- InFile The modified file stream.
- Buffer Contains the data that has been copied in from InFile.
- MoreData Indicates if NextWord contains a word that has not yet been placed into Buffer.
- NextWord If MoreData is true, this contains the next word from InFile that has
- not yet been placed into Buffer.
-*/
-int FillBuffer(fstream & InFile, BufType Buffer, bool & MoreData, StringType NextWord)
- {
- int k = 0;
-
- while (MoreData && (k < BufSize))
- {
- strcpy(Buffer[k], NextWord);
- InFile.getline(NextWord, MaxString);
- if (InFile.fail())
- MoreData = false;
- else
- MoreData = true;
- k++;
- }
-
- return k;
- }
-
-
-/* Given: InFileName1 The name of the first text file to be merged.
- InFileName2 The name of the second text file to be merged.
- OutFileName The name of the file in which to place the merged data.
- Task: To merge the sorted text files named InFileName1 and InFileName2 into one
- named by OutFileName.
- Return: Nothing directly, but the text file named by OutFileName is created and filled
- with the sorted data from InFileName1 and InFileName2.
-*/
-void Merge(StringType InFileName1, StringType InFileName2, StringType OutFileName)
- {
- fstream InFile1, InFile2, OutFile;
- BufType Buffer1, Buffer2, Buffer3;
- bool HaveData1, HaveData2, MoreData1, MoreData2;
- StringType NextWord1, NextWord2;
- int k, Result, NumWords1, NumWords2, Index1, Index2, Index3 = 0;
-
- InFile1.open(InFileName1, ios::in);
- if (InFile1.fail())
- Error("Could not open the file named ", InFileName1);
-
- InFile2.open(InFileName2, ios::in);
- if (InFile2.fail())
- Error("Could not open the file named ", InFileName2);
-
- OutFile.open(OutFileName, ios::out);
- if (OutFile.fail())
- Error("Could not open the file named ", OutFileName);
-
- #ifdef DEBUG
- cout << "Merging " << InFileName1 << " and " << InFileName2 << " to " << OutFileName << endl;
- #endif
-
- // Try to fill a buffer from InFile1.
- InFile1.getline(NextWord1, MaxString);
- if (InFile1.fail())
- MoreData1 = false;
- else
- {
- MoreData1 = true;
- NumWords1 = FillBuffer(InFile1, Buffer1, MoreData1, NextWord1);
- }
-
- // Try to fill a buffer from InFile2.
- InFile2.getline(NextWord2, MaxString);
- if (InFile2.fail())
- MoreData2 = false;
- else
- {
- MoreData2 = true;
- NumWords2 = FillBuffer(InFile2, Buffer2, MoreData2, NextWord2);
- }
-
- Index1 = 0;
- Index2 = 0;
- // Each HaveData boolean indicates if we have more unmerged data. This data could be in
- // the corresponding NextWord variable, the corresponding Buffer, or both.
- HaveData1 = MoreData1 || (Index1 < NumWords1);
- HaveData2 = MoreData2 || (Index2 < NumWords2);
-
- while (HaveData1 && HaveData2)
- {
- if (Index1 == NumWords1)
- {
- NumWords1 = FillBuffer(InFile1, Buffer1, MoreData1, NextWord1);
- if (NumWords1 == 0)
- break; // We are out of data from InFile1.
- else
- Index1 = 0;
- }
- if (Index2 == NumWords2)
- {
- NumWords2 = FillBuffer(InFile2, Buffer2, MoreData2, NextWord2);
- if (NumWords2 == 0)
- break; // We are out of data from InFile2.
- else
- Index2 = 0;
- }
-
- Result = strcmp(Buffer1[Index1], Buffer2[Index2]); // case insensitive compare
-
- if (Result == 0) // Copy both data items.
- {
- Copy(Buffer1[Index1], Buffer3, Index3, OutFile);
- Copy(Buffer2[Index2], Buffer3, Index3, OutFile);
- Index1++;
- Index2++;
- }
- else if (Result < 0) // Copy the first data item.
- {
- Copy(Buffer1[Index1], Buffer3, Index3, OutFile);
- Index1++;
- }
- else // Result > 0, so copy the second data item.
- {
- Copy(Buffer2[Index2], Buffer3, Index3, OutFile);
- Index2++;
- }
-
- HaveData1 = MoreData1 || (Index1 < NumWords1);
- HaveData2 = MoreData2 || (Index2 < NumWords2);
- }
-
- // Handle remaining data in either file.
- while (HaveData1)
- {
- if (Index1 == NumWords1)
- {
- NumWords1 = FillBuffer(InFile1, Buffer1, MoreData1, NextWord1);
- if (NumWords1 == 0)
- break; // We are out of data from InFile1.
- else
- Index1 = 0;
- }
-
- Copy(Buffer1[Index1], Buffer3, Index3, OutFile);
- Index1++;
- HaveData1 = MoreData1 || (Index1 < NumWords1);
- }
-
- while (HaveData2)
- {
- if (Index2 == NumWords2)
- {
- NumWords2 = FillBuffer(InFile2, Buffer2, MoreData2, NextWord2);
- if (NumWords2 == 0)
- break; // We are out of data from InFile2.
- else
- Index2 = 0;
- }
-
- Copy(Buffer2[Index2], Buffer3, Index3, OutFile);
- Index2++;
- HaveData2 = MoreData2 || (Index2 < NumWords2);
- }
-
- // Flush any remaining data from the output buffer.
- for (k = 0; k < Index3; k++)
- OutFile << Buffer3[k] << endl;
-
- #ifdef DEBUG
- OutFile.seekp(0, ios_base::end);
- cout << "Tamaño del archivo '" << OutFileName << "': " << OutFile.tellp() / 1024 << " KB\n";
- #endif
- InFile1.close();
- InFile2.close();
- OutFile.close();
- }
-
-
-/* Given: Word One word of data to be placed into Buffer.
- Buffer A buffer that may already contain words.
- Index The first unused index in Buffer.
- OutFile A text file stream already opened for output.
- Task: To place Word into Buffer. If the Buffer is full, the data
- already in it is first written out to OutFile, then Word
- is placed into Buffer as the only item present.
- Return: Buffer The updated buffer.
- Index The updated index of the first unused location in Buffer.
- OutFile The possibly updated file stream.
-*/
-void Copy(StringType Word, BufType Buffer, int & Index, fstream & OutFile)
- {
- int k;
-
- if (Index == BufSize) // There is no room in Buffer, so first write the Buffer data to OutFile.
- {
- for (k = 0; k < BufSize; k++)
- OutFile << Buffer[k] << endl;
- Index = 0;
- }
-
- strcpy(Buffer[Index], Word);
- Index++;
- }
-
-
-/* Given: First A char array string.
- Second Another char array string.
- Task: To swap these values.
- Return: First Updated string.
- Second Updated string.
-*/
-inline void Swap(StringType First, StringType Second)
- {
- StringType Temp;
-
- strcpy(Temp, First);
- strcpy(First, Second);
- strcpy(Second, Temp);
- }
-
-
-/* Given: Buf Array of char array strings
- Lower An index into the array.
- Upper An index into the array.
- Task: To quicksort the section of the array from index Lower to Upper.
- Return: Buf The sorted array.
-*/
-void QuickSort(BufType Buf, int Lower, int Upper)
- {
- int PivotIndex;
-
- if (Lower < Upper)
- {
- PivotIndex = Partition(Buf, Lower, Upper);
- QuickSort(Buf, Lower, PivotIndex - 1); // sort left side
- QuickSort(Buf, PivotIndex + 1, Upper); // sort right side
- }
- }
-
-
-/* Given: Buf Array of char array strings.
- Lower An index into the array.
- Upper An index into the array.
- Assume: Lower < Upper
- Task: To partition the section of Buf between index Lower and
- index Upper so that everything to the left of the returned
- index is less or equal to the pivot item, which is Buf[Lower],
- everything to the right of the returned index is greater
- than the pivot item, and the pivot item itself is located
- at the returned index.
- Return: Buf The partitioned array.
- In the function name this returns the index of the pivot value.
-*/
-int Partition(BufType Buf, int Lower, int Upper)
- {
- int Left, Right;
- StringType Pivot;
-
- strcpy(Pivot, Buf[Lower]);
- Left = Lower;
- Right = Upper;
-
- while (Left < Right)
- {
- // Scan from left, skipping items that belong there. Use case-insensitive compare.
- while ((strcmp(Buf[Left], Pivot) <= 0) && (Left < Upper))
- Left++;
- // Scan from right, skipping items that belong there. Use case-insensitive compare.
- while (strcmp(Buf[Right], Pivot) > 0)
- Right--;
- if (Left < Right)
- Swap(Buf[Left], Buf[Right]);
- }
-
- strcpy(Buf[Lower], Buf[Right]);
- strcpy(Buf[Right], Pivot);
- return Right; // return the pivot index
- }
-
-
-
-/* Given: MsgA, MsgB Two messages (string objects) to display.
- Task: Display MsgA and MsgB, then exit the program.
- Return: Nothing to the program, but does return an error code of 1 to the operating system.
-*/
-void Error(const string & MsgA, const string & MsgB)
- {
- cerr << "Error: " << MsgA << MsgB << endl;
- exit(1);
- }
-
-/* Filename: extsort.h
-
- Programmer: Br. David Carlson
-
- Date: April 18, 2003
-
- This header file sets up the constants, types, and function prototypes used by the
- ExtSort program.
-*/
-
-#include <iostream>
-#include <fstream>
-#include <string>
-using namespace std;
-
-
-// Comment off the following line if you do not want to see debugging information.
-//#define DEBUG
-
-enum { MaxString = 32, BufSize = 2048, _MAX_PATH = 64 };
-
-
-typedef char StringType[MaxString];
-
-typedef StringType BufType[BufSize]; // A buffer will hold 2048 strings, for a total of 64K bytes.
-
-typedef char PathType[_MAX_PATH];
-
-
-void Error(const string & Msg);
-
-void Error(const string & MsgA, const string & MsgB);
-
-int MakeSortedRuns(fstream & DataFile, PathType FileName);
-
-bool LessThan(StringType First, StringType Second);
-
-void QuickSort(BufType Buf, int Lower, int Upper);
-
-void Swap(StringType First, StringType Second);
-
-int Partition(BufType Buf, int Lower, int Upper);
-
-void Merge(StringType InFileName1, StringType InFileName2, StringType OutFileName);
-
-int FillBuffer(fstream & InFile, BufType Buffer, bool & HaveData, StringType NextWord);
-
-void Copy(StringType Word, BufType Buffer, int & Index, fstream & OutFile);
-
-void HandleMerges(int NumFiles, PathType FileName);
+/* vim: set noexpandtab tabstop=4 shiftwidth=4:
+ *----------------------------------------------------------------------------
+ * emufs
+ *----------------------------------------------------------------------------
+ * This file is part of emufs.
+ *
+ * emufs is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free Software
+ * Foundation; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * emufs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with emufs; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place, Suite 330, Boston, MA 02111-1307 USA
+ *----------------------------------------------------------------------------
+ * Creado: dom may 30 07:48:10 ART 2004
+ * Autores: Leandro Lucarella <llucare@fi.uba.ar>
+ *----------------------------------------------------------------------------
+ *
+ * $Id$
+ *
+ */
+
+/** \file
+ *
+ * Algoritmo de ordenamiento externo.
+ *
+ * Interfaz del algoritmo de ordenamiento externo.
+ *
+ */
+
+#ifndef _EXTSORT_EXTSORT_H_
+#define _EXTSORT_EXTSORT_H_
+
+#include "base.h"
+#include <stdlib.h>
+
+int extsort(const char* filename, size_t buf_size, size_t reg_size, CMP_FUNC cmp);
+
+#endif /* _EXTSORT_EXTSORT_H_ */
* Autores: Leandro Lucarella <llucare@fi.uba.ar>
*----------------------------------------------------------------------------
*
- * $Id: tipo1.h 542 2004-05-28 19:45:02Z rmarkie $
+ * $Id$
*
*/
*
*/
-#ifndef _MERGEFILE_H_
-#define _MERGEFILE_H_
+#ifndef _EXTSORT_MERGEFILE_H_
+#define _EXTSORT_MERGEFILE_H_
#include <stdio.h>
int mergefile_has_more(MERGEFILE* mf);
-#endif /* _MERGEFILE_H_ */
+#endif /* _EXTSORT_MERGEFILE_H_ */
*
*/
-#ifndef _MERGEPOOL_H_
-#define _MERGEPOOL_H_
+#ifndef _EXTSORT_MERGEPOOL_H_
+#define _EXTSORT_MERGEPOOL_H_
#include "mergefile.h"
int mergepool_pop_min(MERGEPOOL* mp, int* min);
-#endif /* _MERGEPOOL_H_ */
+#endif /* _EXTSORT_MERGEPOOL_H_ */
+++ /dev/null
-/* external sort */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-/****************************
- * implementation dependent *
- ****************************/
-
-/* template for workfiles (8.3 format) */
-#define FNAME "_sort%03d.dat"
-#define LNAME 13
-
-/* comparison operators */
-#define compLT(x,y) (x < y)
-#define compGT(x,y) (x > y)
-
-/* define the record to be sorted here */
-#define LRECL 100
-typedef int keyType;
-typedef struct recTypeTag {
- keyType key; /* sort key for record */
- #if LRECL
- char data[LRECL-sizeof(keyType)]; /* other fields */
- #endif
-} recType;
-
-/******************************
- * implementation independent *
- ******************************/
-
-typedef enum {false, true} bool;
-
-typedef struct tmpFileTag {
- FILE *fp; /* file pointer */
- char name[LNAME]; /* filename */
- recType rec; /* last record read */
- int dummy; /* number of dummy runs */
- bool eof; /* end-of-file flag */
- bool eor; /* end-of-run flag */
- bool valid; /* true if rec is valid */
- int fib; /* ideal fibonacci number */
-} tmpFileType;
-
-static tmpFileType **file; /* array of file info for tmp files */
-static int nTmpFiles; /* number of tmp files */
-static char *ifName; /* input filename */
-static char *ofName; /* output filename */
-
-static int level; /* level of runs */
-static int nNodes; /* number of nodes for selection tree */
-
-void deleteTmpFiles(void) {
- int i;
-
- /* delete merge files and free resources */
- if (file) {
- for (i = 0; i < nTmpFiles; i++) {
- if (file[i]) {
- if (file[i]->fp) fclose(file[i]->fp);
- if (*file[i]->name) remove(file[i]->name);
- free (file[i]);
- }
- }
- free (file);
- }
-}
-
-void termTmpFiles(int rc) {
-
- /* cleanup files */
- remove(ofName);
- if (rc == 0) {
- int fileT;
-
- /* file[T] contains results */
- fileT = nTmpFiles - 1;
- fclose(file[fileT]->fp); file[fileT]->fp = NULL;
- if (rename(file[fileT]->name, ofName)) {
- perror("io1");
- deleteTmpFiles();
- exit(1);
- }
- *file[fileT]->name = 0;
- }
- deleteTmpFiles();
-}
-
-void cleanExit(int rc) {
-
- /* cleanup tmp files and exit */
- termTmpFiles(rc);
- exit(rc);
-}
-
-void *safeMalloc(size_t size) {
- void *p;
-
- /* safely allocate memory and initialize to zero */
- if ((p = calloc(1, size)) == NULL) {
- printf("error: malloc failed, size = %d\n", size);
- cleanExit(1);
- }
- return p;
-}
-
-void initTmpFiles(void) {
- int i;
- tmpFileType *fileInfo;
-
- /* initialize merge files */
- if (nTmpFiles < 3) nTmpFiles = 3;
- file = safeMalloc(nTmpFiles * sizeof(tmpFileType*));
- fileInfo = safeMalloc(nTmpFiles * sizeof(tmpFileType));
- for (i = 0; i < nTmpFiles; i++) {
- file[i] = fileInfo + i;
- sprintf(file[i]->name, FNAME, i);
- if ((file[i]->fp = fopen(file[i]->name, "w+b")) == NULL) {
- perror("io2");
- cleanExit(1);
- }
- }
-}
-
-recType *readRec(void) {
-
- typedef struct iNodeTag { /* internal node */
- struct iNodeTag *parent;/* parent of internal node */
- struct eNodeTag *loser; /* external loser */
- } iNodeType;
-
- typedef struct eNodeTag { /* external node */
- struct iNodeTag *parent;/* parent of external node */
- recType rec; /* input record */
- int run; /* run number */
- bool valid; /* input record is valid */
- } eNodeType;
-
- typedef struct nodeTag {
- iNodeType i; /* internal node */
- eNodeType e; /* external node */
- } nodeType;
-
- static nodeType *node; /* array of selection tree nodes */
- static eNodeType *win; /* new winner */
- static FILE *ifp; /* input file */
- static bool eof; /* true if end-of-file, input */
- static int maxRun; /* maximum run number */
- static int curRun; /* current run number */
- iNodeType *p; /* pointer to internal nodes */
- static bool lastKeyValid; /* true if lastKey is valid */
- static keyType lastKey; /* last key written */
-
- /* read next record using replacement selection */
-
- /* check for first call */
- if (node == NULL) {
- int i;
-
- if (nNodes < 2) nNodes = 2;
- node = safeMalloc(nNodes * sizeof(nodeType));
- for (i = 0; i < nNodes; i++) {
- node[i].i.loser = &node[i].e;
- node[i].i.parent = &node[i/2].i;
- node[i].e.parent = &node[(nNodes + i)/2].i;
- node[i].e.run = 0;
- node[i].e.valid = false;
- }
- win = &node[0].e;
- lastKeyValid = false;
-
- if ((ifp = fopen(ifName, "rb")) == NULL) {
- printf("error: file %s, unable to open\n", ifName);
- cleanExit(1);
- }
- }
-
- while (1) {
-
- /* replace previous winner with new record */
- if (!eof) {
- if (fread(&win->rec, sizeof(recType), 1, ifp) == 1) {
- if ((!lastKeyValid || compLT(win->rec.key, lastKey))
- && (++win->run > maxRun))
- maxRun = win->run;
- win->valid = true;
- } else if (feof(ifp)) {
- fclose(ifp);
- eof = true;
- win->valid = false;
- win->run = maxRun + 1;
- } else {
- perror("io4");
- cleanExit(1);
- }
- } else {
- win->valid = false;
- win->run = maxRun + 1;
- }
-
- /* adjust loser and winner pointers */
- p = win->parent;
- do {
- bool swap;
- swap = false;
- if (p->loser->run < win->run) {
- swap = true;
- } else if (p->loser->run == win->run) {
- if (p->loser->valid && win->valid) {
- if (compLT(p->loser->rec.key, win->rec.key))
- swap = true;
- } else {
- swap = true;
- }
- }
- if (swap) {
- /* p should be winner */
- eNodeType *t;
-
- t = p->loser;
- p->loser = win;
- win = t;
- }
- p = p->parent;
- } while (p != &node[0].i);
-
- /* end of run? */
- if (win->run != curRun) {
- /* win->run = curRun + 1 */
- if (win->run > maxRun) {
- /* end of output */
- free(node);
- return NULL;
- }
- curRun = win->run;
- }
-
- /* output top of tree */
- if (win->run) {
- lastKey = win->rec.key;
- lastKeyValid = true;
- return &win->rec;
- }
- }
-}
-
-void makeRuns(void) {
- recType *win; /* winner */
- int fileT; /* last file */
- int fileP; /* next to last file */
- int j; /* selects file[j] */
-
-
- /* Make initial runs using replacement selection.
- * Runs are written using a Fibonacci distintbution.
- */
-
- /* initialize file structures */
- fileT = nTmpFiles - 1;
- fileP = fileT - 1;
- for (j = 0; j < fileT; j++) {
- file[j]->fib = 1;
- file[j]->dummy = 1;
- }
- file[fileT]->fib = 0;
- file[fileT]->dummy = 0;
-
- level = 1;
- j = 0;
-
-
- win = readRec();
- while (win) {
- bool anyrun;
-
- anyrun = false;
- for (j = 0; win && j <= fileP; j++) {
- bool run;
-
- run = false;
- if (file[j]->valid) {
- if (!compLT(win->key, file[j]->rec.key)) {
- /* append to an existing run */
- run = true;
- } else if (file[j]->dummy) {
- /* start a new run */
- file[j]->dummy--;
- run = true;
- }
- } else {
- /* first run in file */
- file[j]->dummy--;
- run = true;
- }
-
- if (run) {
- anyrun = true;
-
- /* flush run */
- while(1) {
- if (fwrite(win, sizeof(recType), 1, file[j]->fp) != 1) {
- perror("io3");
- cleanExit(1);
- }
- file[j]->rec.key = win->key;
- file[j]->valid = true;
- if ((win = readRec()) == NULL) break;
- if (compLT(win->key, file[j]->rec.key)) break;
- }
- }
- }
-
- /* if no room for runs, up a level */
- if (!anyrun) {
- int t;
- level++;
- t = file[0]->fib;
- for (j = 0; j <= fileP; j++) {
- file[j]->dummy = t + file[j+1]->fib - file[j]->fib;
- file[j]->fib = t + file[j+1]->fib;
- }
- }
- }
-}
-
-void rewindFile(int j) {
- /* rewind file[j] and read in first record */
- file[j]->eor = false;
- file[j]->eof = false;
- rewind(file[j]->fp);
- if (fread(&file[j]->rec, sizeof(recType), 1, file[j]->fp) != 1) {
- if (feof(file[j]->fp)) {
- file[j]->eor = true;
- file[j]->eof = true;
- } else {
- perror("io5");
- cleanExit(1);
- }
- }
-}
-
-void mergeSort(void) {
- int fileT;
- int fileP;
- int j;
- tmpFileType *tfile;
-
- /* polyphase merge sort */
-
- fileT = nTmpFiles - 1;
- fileP = fileT - 1;
-
- /* prime the files */
- for (j = 0; j < fileT; j++) {
- rewindFile(j);
- }
-
- /* each pass through loop merges one run */
- while (level) {
- while(1) {
- bool allDummies;
- bool anyRuns;
-
- /* scan for runs */
- allDummies = true;
- anyRuns = false;
- for (j = 0; j <= fileP; j++) {
- if (!file[j]->dummy) {
- allDummies = false;
- if (!file[j]->eof) anyRuns = true;
- }
- }
-
- if (anyRuns) {
- int k;
- keyType lastKey;
-
- /* merge 1 run file[0]..file[P] --> file[T] */
-
- while(1) {
- /* each pass thru loop writes 1 record to file[fileT] */
-
- /* find smallest key */
- k = -1;
- for (j = 0; j <= fileP; j++) {
- if (file[j]->eor) continue;
- if (file[j]->dummy) continue;
- if (k < 0 ||
- (k != j && compGT(file[k]->rec.key, file[j]->rec.key)))
- k = j;
- }
- if (k < 0) break;
-
- /* write record[k] to file[fileT] */
- if (fwrite(&file[k]->rec, sizeof(recType), 1,
- file[fileT]->fp) != 1) {
- perror("io6");
- cleanExit(1);
- }
-
- /* replace record[k] */
- lastKey = file[k]->rec.key;
- if (fread(&file[k]->rec, sizeof(recType), 1,
- file[k]->fp) == 1) {
- /* check for end of run on file[s] */
- if (compLT(file[k]->rec.key, lastKey))
- file[k]->eor = true;
- } else if (feof(file[k]->fp)) {
- file[k]->eof = true;
- file[k]->eor = true;
- } else {
- perror("io7");
- cleanExit(1);
- }
- }
-
- /* fixup dummies */
- for (j = 0; j <= fileP; j++) {
- if (file[j]->dummy) file[j]->dummy--;
- if (!file[j]->eof) file[j]->eor = false;
- }
-
- } else if (allDummies) {
- for (j = 0; j <= fileP; j++)
- file[j]->dummy--;
- file[fileT]->dummy++;
- }
-
- /* end of run */
- if (file[fileP]->eof && !file[fileP]->dummy) {
- /* completed a fibonocci-level */
- level--;
- if (!level) {
- /* we're done, file[fileT] contains data */
- return;
- }
-
- /* fileP is exhausted, reopen as new */
- fclose(file[fileP]->fp);
- if ((file[fileP]->fp = fopen(file[fileP]->name, "w+b"))
- == NULL) {
- perror("io8");
- cleanExit(1);
- }
- file[fileP]->eof = false;
- file[fileP]->eor = false;
-
- rewindFile(fileT);
-
- /* f[0],f[1]...,f[fileT] <-- f[fileT],f[0]...,f[T-1] */
- tfile = file[fileT];
- memmove(file + 1, file, fileT * sizeof(tmpFileType*));
- file[0] = tfile;
-
- /* start new runs */
- for (j = 0; j <= fileP; j++)
- if (!file[j]->eof) file[j]->eor = false;
- }
- }
-
- }
-}
-
-
-void extSort(void) {
- initTmpFiles();
- makeRuns();
- mergeSort();
- termTmpFiles(0);
-}
-
-int main(int argc, char *argv[]) {
-
- /* command-line:
- *
- * ext ifName ofName nTmpFiles nNodes
- *
- * ext in.dat out.dat 5 2000
- * reads in.dat, sorts using 5 files and 2000 nodes, output to out.dat
- */
- if (argc != 5) {
- printf("%s ifName ofName nTmpFiles nNodes\n", argv[0]);
- cleanExit(1);
- }
-
- ifName = argv[1];
- ofName = argv[2];
- nTmpFiles = atoi(argv[3]);
- nNodes = atoi(argv[4]);
-
- printf("extSort: nFiles=%d, nNodes=%d, lrecl=%d\n",
- nTmpFiles, nNodes, sizeof(recType));
-
- extSort();
-
- return 0;
-}
-#include "bufford.h"
-#include "mergepool.h"
+#include "extsort.h"
#include <stdio.h>
-#include <string.h>
-#include <assert.h>
/*
typedef struct
return 0;
}
+/*
void buffer_dump(BUFFORD* b)
{
int* c;
printf("%i ", *c);
printf("\n");
}
+*/
int main(int argc, char* argv[])
{
- MERGEPOOL* pool;
- FILE* fp;
- int i;
- BUFFORD* b = bufford_new(8, sizeof(int), &cmp);
- if (!b) return 1;
- if (!(pool = mergepool_new())) {
- bufford_delete(b);
- return 2;
- }
- if (!(fp = fopen(argv[1], "r"))) {
- mergepool_delete(pool);
- bufford_delete(b);
- return 12;
- }
- /* lleno el buffer */
- while (1) {
- int reg;
- fscanf(fp, "%i", ®);
- if (feof(fp)) break;
- if (!bufford_push(b, ®)) break;
- }
- printf("Buffer lleno, hago el primer lote...\n");
- buffer_dump(b);
- /* Hago lotes */
- i = 0;
- while (!bufford_empty(b)) {
- int* c;
- int x;
- printf("Creando lote %i:\n", i);
- if (!mergepool_add_file(pool)) {
- fclose(fp);
- mergepool_delete(pool);
- bufford_delete(b);
- return 3;
- }
- for (c = bufford_pop_min(b); c; c = bufford_pop_next(b, &x)) {
- /* si hay un valor en la entrada, lo inserto */
- if (!feof(fp)) fscanf(fp, "%i", &x);
- if (!feof(fp) && !bufford_push(b, &x)) {
- /* ERROR: no se pudo insertar */
- fclose(fp);
- mergepool_delete(pool);
- bufford_delete(b);
- return 4;
- }
- x = *c;
- free(c);
- printf("%- 8i\t", x);
- if (!mergepool_append_data(pool, x)) {
- printf("No se pudo escribir en el mergefile %i\n", i);
- fclose(fp);
- mergepool_delete(pool);
- bufford_delete(b);
- return 4;
- }
- buffer_dump(b);
- }
- printf("Lote %i finalizado!\n\n", i);
- i++;
- }
- fclose(fp);
- /* Mezclo lotes */
- {
- int min;
- FILE* fp = fopen("salida.txt", "w");
- assert(fp);
- printf("Abriendo archivos y buscando mínimo...\n");
- printf("Iterando...\n");
- /* voy obteniendo el próximo mínimo y guardándolo en el archivo de salida */
- while (mergepool_pop_min(pool, &min)) fprintf(fp, "%i\n", min);
- fclose(fp);
- }
- mergepool_delete(pool);
- bufford_delete(b);
+ if (extsort(argv[1], 256, sizeof(int), &cmp))
+ printf("Archivo ordenado!\n");
+ else
+ printf("No se pudo ordenar archivo!\n");
return 0;
}