1 /* Filename: extsort.cpp
3 Programmer: Br. David Carlson
7 This program does a case-insensitive sort of the text file that the user supplies
8 when prompted by the program. It is assumed that each line of the file contains a
9 word and is no more than 31 characters in length. No attempt is made to order in any
10 way words that are identical except for capitalization. (For example, there is no
11 guarantee what order the sort will place the words MacIntosh and Macintosh since
12 they are seen as identical. Duplicate words such as these are kept in the file.)
13 The output data is placed back under the original file name.
16 Microsoft Visual C++ 6.0
17 Microsoft Visual C++ .NET
18 This program will not work as is with g++ under Linux due to its use of items such as
19 the constant _MAX_PATH as well as the functions _itoa, and stricmp.
25 void _itoa(int n, char* ext, int size)
27 std::ostringstream oss;
29 strncpy(ext, oss.str().c_str(), size);
38 cout << "Enter the name of the text file to be sorted: ";
39 cin.getline(FileName, _MAX_PATH);
41 DataFile.open(FileName, ios::in);
43 Error("Could not open the file named ", FileName);
45 // Copy sections of the data file into sorted runs.
46 NumRuns = MakeSortedRuns(DataFile, FileName);
49 // Merge the sorted runs until all of the data is in one sorted file (under the original FileName).
50 HandleMerges(NumRuns, FileName);
56 /* Given: DataFile A text file stream already opened for input.
57 FileName The name of this text file (as a char array string).
58 Task: To copy data from this file stream into sorted files (runs).
59 Return: DataFile The modified file stream is trivially modified by reading from it.
60 In the function name the number of runs is returned.
61 The sorted runs themselves are created and stored under the file names:
62 ExtSortTemp.0, ExtSortTemp.1, etc.
64 int MakeSortedRuns(fstream & DataFile, PathType FileName)
67 StringType Word, Extension;
69 int NumWords, k, NumFiles = 0;
73 // Repeatedly fill one buffer and quicksort it, writing the buffer out to a temp file.
74 DataFile.getline(Word, MaxString);
83 NumWords = FillBuffer(DataFile, Buffer, MoreData, Word);
84 QuickSort(Buffer, 0, NumWords - 1);
86 // Construct the temp file name.
87 strcpy(OutFileName, "ExtSortTemp.");
88 _itoa(NumFiles, Extension, 10); // Convert the int in NumFiles to a char array string.
89 strcat(OutFileName, Extension);
91 OutFile.open(OutFileName, ios::out);
93 Error("Could not open the file named ", OutFileName);
95 for (k = 0; k < NumWords; k++)
96 OutFile << Buffer[k] << endl;
99 OutFile.seekp(0, ios_base::end);
100 cout << "Tamaño del archivo '" << OutFileName << "': " << OutFile.tellp() / 1024 << " KB\n";
110 /* Given: NumFiles Number of sorted runs (files) to be merged.
111 FileName The name of the text file to contain the merged data.
112 Indirectly this function is also given the sorted runs, which are assumed to be
113 files named ExtSortTemp.0, ExtSortTemp.1, etc.
114 Task: To merge the data from these sorted runs into one combined, sorted file with
115 the name given by FileName.
116 Return: Nothing directly, but the file named by FileName is modified (by being sorted).
118 void HandleMerges(int NumFiles, PathType FileName)
120 StringType Extension;
121 PathType OutFileName, InFileName1, InFileName2;
122 int k, NumPairs, Count;
125 // Repeatedly merge 2 runs, writing data to a new temp file (but write last one to original file).
126 if (NumFiles == 0) // No data is present, so there is no work to do.
128 else if (NumFiles == 1)
130 // Remove original file and rename the temp file using name of the original.
132 rename("ExtSortTemp.0", FileName);
135 cout << "Renaming ExtSortTemp.0 as " << FileName << endl;
138 return; // The sort is finished.
140 else if (NumFiles == 2)
142 // Merge the 2 sorted temp files into original file.
143 Merge("ExtSortTemp.0", "ExtSortTemp.1", FileName);
144 remove("ExtSortTemp.0");
145 remove("ExtSortTemp.1");
146 return; // The sort is finished.
148 else // We have 3 or more files.
150 // Merge temp files, 2 at a time, until only 2 remain, then proceed as in last case.
153 // Merge ExtSortTemp.0 and ExtSortTemp.1 into ExtSortTempA.0,
154 // merge ExtSortTemp.2 and ExtSortTemp.3 into ExtSortTempA.1, etc.
155 NumPairs = NumFiles / 2;
156 for (k = 0, Count = 0; k < NumPairs; k++)
158 strcpy(InFileName1 , "ExtSortTemp.");
159 _itoa(Count++, Extension, 10);
160 strcat(InFileName1, Extension);
161 strcpy(InFileName2 , "ExtSortTemp.");
162 _itoa(Count++, Extension, 10);
163 strcat(InFileName2, Extension);
164 strcpy(OutFileName , "ExtSortTempA.");
165 _itoa(k, Extension, 10);
166 strcat(OutFileName, Extension);
167 Merge(InFileName1, InFileName2, OutFileName);
169 // If the number of files is odd, just rename the last one.
170 if (2 * NumPairs < NumFiles)
173 strcpy(InFileName1 , "ExtSortTemp.");
174 _itoa(Count, Extension, 10);
175 strcat(InFileName1, Extension);
176 strcpy(OutFileName , "ExtSortTempA.");
177 _itoa(NumPairs, Extension, 10);
178 strcat(OutFileName, Extension);
179 rename(InFileName1, OutFileName);
182 cout << "Renaming " << InFileName1 << " as " << OutFileName << endl;
188 // Remove the temp files from before the merge.
189 for (k = 0; k < NumFiles; k++)
191 strcpy(InFileName1 , "ExtSortTemp.");
192 _itoa(k, Extension, 10);
193 strcat(InFileName1, Extension);
197 // Get the new number of sorted files.
202 // If number of ExtSortTempA files is now 2, merge them with output to original file and return.
205 Merge("ExtSortTempA.0", "ExtSortTempA.1", FileName);
206 remove("ExtSortTempA.0");
207 remove("ExtSortTempA.1");
208 return; // The sort is finished.
211 // Otherwise, merge pairs of files as above, but start at the top end so as to
212 // incorporate any small remainder file. Note that we now merge from the
213 // ExtSortTempA files into ExtSortTemp files.
214 NumPairs = NumFiles / 2;
215 for (k = 0, Count = NumFiles - 1; k < NumPairs; k++)
217 strcpy(InFileName1 , "ExtSortTempA.");
218 _itoa(Count--, Extension, 10);
219 strcat(InFileName1, Extension);
220 strcpy(InFileName2 , "ExtSortTempA.");
221 _itoa(Count--, Extension, 10);
222 strcat(InFileName2, Extension);
223 strcpy(OutFileName , "ExtSortTemp.");
224 _itoa(k, Extension, 10);
225 strcat(OutFileName, Extension);
226 Merge(InFileName1, InFileName2, OutFileName);
228 // If the number of files is odd, just rename the last one.
229 if (2 * NumPairs < NumFiles)
232 strcpy(InFileName1 , "ExtSortTempA.");
233 _itoa(0, Extension, 10);
234 strcat(InFileName1, Extension);
235 strcpy(OutFileName , "ExtSortTemp.");
236 _itoa(NumPairs, Extension, 10);
237 strcat(OutFileName, Extension);
238 rename(InFileName1, OutFileName);
241 cout << "Renaming " << InFileName1 << " as " << OutFileName << endl;
247 // Remove the temp files from before the merge.
248 for (k = 0; k < NumFiles; k++)
250 strcpy(InFileName1 , "ExtSortTempA.");
251 _itoa(k, Extension, 10);
252 strcat(InFileName1, Extension);
256 // Get the new number of sorted files.
261 // If number of ExtSortTemp files is now 2, merge them with output to original file and return.
264 Merge("ExtSortTemp.0", "ExtSortTemp.1", FileName);
265 remove("ExtSortTemp.0");
266 remove("ExtSortTemp.1");
267 return; // The sort is finished.
274 /* Given: InFile Text file stream already opened for input.
275 MoreData Indicates if there is more data from InFile to place into Buffer.
276 At the very least this indicates if there is an unused item in NextWord.
277 NextWord If MoreData is true, then NextWord contains an unprocessed word from
278 InFile that has yet to be placed into Buffer.
279 Task: If MoreData is true, this function places as much data as possible from InFile into
280 Buffer. This data includes NextWord and then as much new data as possible from InFile.
281 The Buffer will be filled if there is enough data to do so.
282 Return: In the function name is returned the number of words placed into Buffer.
283 InFile The modified file stream.
284 Buffer Contains the data that has been copied in from InFile.
285 MoreData Indicates if NextWord contains a word that has not yet been placed into Buffer.
286 NextWord If MoreData is true, this contains the next word from InFile that has
287 not yet been placed into Buffer.
289 int FillBuffer(fstream & InFile, BufType Buffer, bool & MoreData, StringType NextWord)
293 while (MoreData && (k < BufSize))
295 strcpy(Buffer[k], NextWord);
296 InFile.getline(NextWord, MaxString);
308 /* Given: InFileName1 The name of the first text file to be merged.
309 InFileName2 The name of the second text file to be merged.
310 OutFileName The name of the file in which to place the merged data.
311 Task: To merge the sorted text files named InFileName1 and InFileName2 into one
312 named by OutFileName.
313 Return: Nothing directly, but the text file named by OutFileName is created and filled
314 with the sorted data from InFileName1 and InFileName2.
316 void Merge(StringType InFileName1, StringType InFileName2, StringType OutFileName)
318 fstream InFile1, InFile2, OutFile;
319 BufType Buffer1, Buffer2, Buffer3;
320 bool HaveData1, HaveData2, MoreData1, MoreData2;
321 StringType NextWord1, NextWord2;
322 int k, Result, NumWords1, NumWords2, Index1, Index2, Index3 = 0;
324 InFile1.open(InFileName1, ios::in);
326 Error("Could not open the file named ", InFileName1);
328 InFile2.open(InFileName2, ios::in);
330 Error("Could not open the file named ", InFileName2);
332 OutFile.open(OutFileName, ios::out);
334 Error("Could not open the file named ", OutFileName);
337 cout << "Merging " << InFileName1 << " and " << InFileName2 << " to " << OutFileName << endl;
340 // Try to fill a buffer from InFile1.
341 InFile1.getline(NextWord1, MaxString);
347 NumWords1 = FillBuffer(InFile1, Buffer1, MoreData1, NextWord1);
350 // Try to fill a buffer from InFile2.
351 InFile2.getline(NextWord2, MaxString);
357 NumWords2 = FillBuffer(InFile2, Buffer2, MoreData2, NextWord2);
362 // Each HaveData boolean indicates if we have more unmerged data. This data could be in
363 // the corresponding NextWord variable, the corresponding Buffer, or both.
364 HaveData1 = MoreData1 || (Index1 < NumWords1);
365 HaveData2 = MoreData2 || (Index2 < NumWords2);
367 while (HaveData1 && HaveData2)
369 if (Index1 == NumWords1)
371 NumWords1 = FillBuffer(InFile1, Buffer1, MoreData1, NextWord1);
373 break; // We are out of data from InFile1.
377 if (Index2 == NumWords2)
379 NumWords2 = FillBuffer(InFile2, Buffer2, MoreData2, NextWord2);
381 break; // We are out of data from InFile2.
386 Result = strcmp(Buffer1[Index1], Buffer2[Index2]); // case insensitive compare
388 if (Result == 0) // Copy both data items.
390 Copy(Buffer1[Index1], Buffer3, Index3, OutFile);
391 Copy(Buffer2[Index2], Buffer3, Index3, OutFile);
395 else if (Result < 0) // Copy the first data item.
397 Copy(Buffer1[Index1], Buffer3, Index3, OutFile);
400 else // Result > 0, so copy the second data item.
402 Copy(Buffer2[Index2], Buffer3, Index3, OutFile);
406 HaveData1 = MoreData1 || (Index1 < NumWords1);
407 HaveData2 = MoreData2 || (Index2 < NumWords2);
410 // Handle remaining data in either file.
413 if (Index1 == NumWords1)
415 NumWords1 = FillBuffer(InFile1, Buffer1, MoreData1, NextWord1);
417 break; // We are out of data from InFile1.
422 Copy(Buffer1[Index1], Buffer3, Index3, OutFile);
424 HaveData1 = MoreData1 || (Index1 < NumWords1);
429 if (Index2 == NumWords2)
431 NumWords2 = FillBuffer(InFile2, Buffer2, MoreData2, NextWord2);
433 break; // We are out of data from InFile2.
438 Copy(Buffer2[Index2], Buffer3, Index3, OutFile);
440 HaveData2 = MoreData2 || (Index2 < NumWords2);
443 // Flush any remaining data from the output buffer.
444 for (k = 0; k < Index3; k++)
445 OutFile << Buffer3[k] << endl;
448 OutFile.seekp(0, ios_base::end);
449 cout << "Tamaño del archivo '" << OutFileName << "': " << OutFile.tellp() / 1024 << " KB\n";
457 /* Given: Word One word of data to be placed into Buffer.
458 Buffer A buffer that may already contain words.
459 Index The first unused index in Buffer.
460 OutFile A text file stream already opened for output.
461 Task: To place Word into Buffer. If the Buffer is full, the data
462 already in it is first written out to OutFile, then Word
463 is placed into Buffer as the only item present.
464 Return: Buffer The updated buffer.
465 Index The updated index of the first unused location in Buffer.
466 OutFile The possibly updated file stream.
468 void Copy(StringType Word, BufType Buffer, int & Index, fstream & OutFile)
472 if (Index == BufSize) // There is no room in Buffer, so first write the Buffer data to OutFile.
474 for (k = 0; k < BufSize; k++)
475 OutFile << Buffer[k] << endl;
479 strcpy(Buffer[Index], Word);
484 /* Given: First A char array string.
485 Second Another char array string.
486 Task: To swap these values.
487 Return: First Updated string.
488 Second Updated string.
490 inline void Swap(StringType First, StringType Second)
495 strcpy(First, Second);
496 strcpy(Second, Temp);
500 /* Given: Buf Array of char array strings
501 Lower An index into the array.
502 Upper An index into the array.
503 Task: To quicksort the section of the array from index Lower to Upper.
504 Return: Buf The sorted array.
506 void QuickSort(BufType Buf, int Lower, int Upper)
512 PivotIndex = Partition(Buf, Lower, Upper);
513 QuickSort(Buf, Lower, PivotIndex - 1); // sort left side
514 QuickSort(Buf, PivotIndex + 1, Upper); // sort right side
519 /* Given: Buf Array of char array strings.
520 Lower An index into the array.
521 Upper An index into the array.
522 Assume: Lower < Upper
523 Task: To partition the section of Buf between index Lower and
524 index Upper so that everything to the left of the returned
525 index is less or equal to the pivot item, which is Buf[Lower],
526 everything to the right of the returned index is greater
527 than the pivot item, and the pivot item itself is located
528 at the returned index.
529 Return: Buf The partitioned array.
530 In the function name this returns the index of the pivot value.
532 int Partition(BufType Buf, int Lower, int Upper)
537 strcpy(Pivot, Buf[Lower]);
543 // Scan from left, skipping items that belong there. Use case-insensitive compare.
544 while ((strcmp(Buf[Left], Pivot) <= 0) && (Left < Upper))
546 // Scan from right, skipping items that belong there. Use case-insensitive compare.
547 while (strcmp(Buf[Right], Pivot) > 0)
550 Swap(Buf[Left], Buf[Right]);
553 strcpy(Buf[Lower], Buf[Right]);
554 strcpy(Buf[Right], Pivot);
555 return Right; // return the pivot index
560 /* Given: MsgA, MsgB Two messages (string objects) to display.
561 Task: Display MsgA and MsgB, then exit the program.
562 Return: Nothing to the program, but does return an error code of 1 to the operating system.
564 void Error(const string & MsgA, const string & MsgB)
566 cerr << "Error: " << MsgA << MsgB << endl;