Hello,
I am implementing a spellchecker program that utilized vectors and lists but will now utilize sets. Here is what I have given:
correction.h
#ifndef CORRECTION_H
#define CORRECTION_H
#include "wordoccurrence.h"
class Correction
//
// A correction consists of a word occurrence that has been judged to
// be misspelled and a string representing the replacement spelling.
// (Note that the replacement is not necessarily a "word" as the
// correction might involve other characters, for example correcting "tothe"
// to "to the".)
{
public:
Correction();
//post: getMisspelling() == WordOccurrence()
// && getReplacement() == ""
Correction (const WordOccurrence& wo,
const std::string& replacement);
//post: getMisspelling() == wo
// && getReplacement() == replacement
WordOccurrence getMisspelling() const {return _misspelling;}
void putMisspelling (const WordOccurrence& missp) {_misspelling = missp;}
std::string getReplacement() const {return _replacement;}
void putReplacement (const std::string& repl) {_replacement = repl;}
// Output (mainly for debugging purposes)
void put (std::ostream&) const;
private:
WordOccurrence _misspelling;
std::string _replacement;
};
inline
std::ostream& operator<< (std::ostream& out,
const Correction& correction)
{
correction.put (out);
return out;
}
bool operator< (const Correction& left, const Correction& right);
bool operator== (const Correction& left, const Correction& right);
#endif
correction.cpp
#include "correction.h"
using namespace std;
Correction::Correction()
//post: getMisspelling() == WordOccurrence()
// && getReplacement() == ""
{}
Correction::Correction (const WordOccurrence& wo,
const string& replacement)
//post: getMisspelling() == wo
// && getReplacement() == replacement
: _misspelling(wo), _replacement(replacement)
{}
// Output (mainly for debugging purposes)
void Correction::put (std::ostream& out) const
{
out << _misspelling << "=>" << _replacement;
}
bool operator< (const Correction& left, const Correction& right)
{
return (left.getMisspelling() < right.getMisspelling())
|| ((left.getMisspelling() == right.getMisspelling())
&& (left.getReplacement() < right.getReplacement()));
}
bool operator== (const Correction& left, const Correction& right)
{
return (left.getMisspelling() == right.getMisspelling())
&& (left.getReplacement() == right.getReplacement());
}
replacement.h
#ifndef REPLACEMENT_H
#define REPLACEMENT_H
#include <iostream>
#include <string>
class Replacement
{
public:
Replacement () {}
Replacement (const std::string& missp, const std::string& repl);
void setMisspelledWord (const std::string& mw) {_misspelledWord = mw;}
std::string getMisspelledWord() const {return _misspelledWord;}
void setReplacement (const std::string& r) {_replacement = r;}
std::string getReplacement() const {return _replacement;}
void put (std::ostream&) const;
private:
std::string _misspelledWord;
std::string _replacement;
};
inline
std::ostream& operator<< (std::ostream& out, const Replacement& r)
{
r.put (out);
return out;
}
bool operator== (const Replacement& left, const Replacement& right);
bool operator< (const Replacement& left, const Replacement& right);
#endif
replacement.cpp
#include "replacement.h"
using namespace std;
Replacement::Replacement (const std::string& missp, const std::string& repl)
: _misspelledWord(missp), _replacement(repl)
{}
void Replacement::put (ostream& out) const
{
out << _misspelledWord << "=>" << _replacement;
}
bool operator== (const Replacement& left, const Replacement& right)
{
return (left.getMisspelledWord() == right.getMisspelledWord());
}
bool operator< (const Replacement& left, const Replacement& right)
{
return (left.getMisspelledWord() < right.getMisspelledWord());
}
sequential.h
#ifndef SEQUENTIAL_H
#define SEQUENTIAL_H
template <class Etype>
int sequentialSearch (const Etype* a, unsigned n, const Etype& x)
//
// Search the array a[] for x, given that a contains n elements.
// Return the position where x is found, or -1 if not found.
// Assumes a[] is sorted, i.e., for all i in 0..n-2, a[i] <= a[i+1]
//
{
int i = 0;
while ((i < n) && (a[i] < x))
++i;
if ((i >= n) || (x < a[i]))
return -1;
else
return i;
}
template <class Etype>
int sequentialInsert (Etype* a, unsigned n, const Etype& x)
//
// Insert x into the array a[], given that a contains n elements.
// Assumes a[] is sorted, i.e., for all i in 0..n-2, a[i] <= a[i+1]
// x is inserted into a position that leaves a still sorted.
// Return the position where x was inserted.
//
{
int i;
for (i = n; (i > 0) && (x < a[i]); --i)
a[i] = a[i-1];
a[i] = x;
return i;
}
#endif
token.h
#ifndef TOKEN_H
#define TOKEN_H
#include <string>
#include <iostream>
class Token
//
// A "token" is a meaningful unit extracted from a text file.
// For example, a C++ compile would consider the keyword "class"
// and the identifier "Token" in the line above to each be a single
// token. In most applications, tokens can be formed from different numbers
// of characters, usually do not include white space (blanks, new lines,
// etc.), but the exact definition of a "token" is highly
// application-dependent.
{
public:
typedef long unsigned Location;
Token();
//post: getLexeme()=="" && getLocation()==0
Token (const std::string& lexeme,
Location location);
// A "lexeme" is the string of characters that has been identified
// as constituting a token.
std::string getLexeme() const {return _lexeme;}
void putLexeme (const std::string& lex) {_lexeme = lex;}
// The location indicates where in the original file the first character of
// a token's lexeme was found.
Location getLocation() const;
void putLocation (const Location&);
// Read a token from a stream. A token is recognized as a consecutive
// string of 1 or more characters beginning with some character from
// startingCharacters, followed by zero or more characters from
// middleCharacters, and ending with a character from endingCharacters (which
// may be the same character as the beginning character).
// Returns true if a token was found. Returns false if no token could
// be found (i.e., an input error occurred or end-of-file was reached
// before finding an acceptable token).
//
// Warning: this function may read and discard an arbitrary number of extra
// startingCharacters and middleCharacters until finding a valid token (followed
// by a non-middleCharacter).
virtual bool readToken(std::istream& input,
const std::string& startingCharacters,
const std::string& middleCharacters,
const std::string& endingCharacters);
// Output (mainly for debugging purposes)
void put (std::ostream&) const;
private:
std::string _lexeme;
Location _location;
};
bool operator< (const Token& left, const Token& right);
bool operator== (const Token& left, const Token& right);
inline
std::ostream& operator<< (std::ostream& out, const Token& t)
{
t.put(out);
return out;
}
inline
Token::Location Token::getLocation() const
{
return _location;
}
inline
void Token::putLocation (const Token::Location& loc)
{
_location = loc;
}
#endif
token.cpp
#include "token.h"
//
// A "token" is a meaningful unit extracted from a text file.
// For example, a C++ compile would consider the keyword "class"
// and the identifier "Token" in the line above to each be a single
// token. In most applications, tokens can be formed from different numbers
// of characters, usually do not include white space (blanks, new lines,
// etc.), but the exact definition of a "token" is highly
// application-dependent.
using namespace std;
Token::Token(): _location(0)
//post: getLexeme()=="" && getLocation()==0
{}
Token::Token (const std::string& lexeme,
Location location)
: _lexeme(lexeme), _location(location)
{}
// Read a token from a stream. A token is recognized as a consecutive
// string of 1 or more characters beginning with some character from
// startingCharacters, followed by zero or more characters from
// middleCharacters, and ending with a character from endingCharacters.
// Returns true if a token was found. Returns false if no token could
// be found (i.e., an input error occurred or end-of-file was reached
// before finding an acceptable token).
bool Token::readToken(std::istream& input,
const std::string& startingCharacters,
const std::string& middleCharacters,
const std::string& endingCharacters)
{
string lexeme;
Location location;
char c;
bool done = false;
while (!done)
{
// Hunt for a starting character
c = input.get();
// cerr << c << " is at " << startingCharacters.find(c) << endl;
while ((input) && (startingCharacters.find(c) == string::npos))
{
c = input.get();
}
if (!input) return false; // Could not find starting character
lexeme = c;
location = (Location)input.tellg() - (Location)1;
// Now read forward until we come to something that is not a middle character
c = input.get();
while ((input) && (middleCharacters.find(c) != string::npos))
{
lexeme += c;
c = input.get();
}
if ((input) && (endingCharacters.find(c) == string::npos))
{
// If the last character we read was not an endingCharacter, put
// it back into the input stream.
input.unget();
} else if (input) {
lexeme += c;
}
// Did we find an ending character anywhere in the characters we read?
int endingPos = lexeme.find_last_of(endingCharacters);
if (endingPos == string::npos) {
// No: keep trying (if there's more input available)
done = !input;
}
else
{
// Yes: return the lexeme we have found
_location = location;
_lexeme = lexeme.substr(0, endingPos+1);
return true;
}
}
return false;
}
// Output (mainly for debugging purposes)
void Token::put (std::ostream& out) const
{
out << _lexeme << '@' << _location;
}
bool operator< (const Token& left, const Token& right)
{
if (left.getLocation() < right.getLocation())
return true;
else if (left.getLocation() == right.getLocation())
return (left.getLexeme() < right.getLexeme());
else
return false;
}
bool operator== (const Token& left, const Token& right)
{
return ((left.getLocation() == right.getLocation())
&& (left.getLexeme() == right.getLexeme()));
}
wordoccurrence.h
#ifndef WORDOCCURRENCE_H
#define WORDOCCURRENCE_H
#include "token.h"
class WordOccurrence
//
// A "word" is a string that beings with an alphnumberic character,
// continues with 0 or more of the same characters, hyphens, or apostrophes,
// and ends with an alphanumeric character. For example,
// the 2nd wasn't how-do-you-do
// are all words, but
// ./? Yes? 123 multi-
// are not.
//
// A word occurrence describes a word within a document, combining a
// "lexeme" (the string of characters) that comprise a word with the
// location within the document where the word was found.
{
public:
typedef Token::Location Location;
WordOccurrence();
//post: getLexeme()=="" && getLocation()==0
WordOccurrence (const std::string& lexeme,
Location location);
// A "lexeme" is the string of characters that has been identified
// as constituting a token.
std::string getLexeme() const {return tok.getLexeme();}
void putLexeme (const std::string& lex) {tok.putLexeme (lex);}
// The location indicates where in the original file the first character of
// a token's lexeme was found.
Location getLocation() const {return tok.getLocation();}
void putLocation (const Location& loc) {tok.putLocation(loc);}
// Read a word from a stream.
bool read(std::istream& input);
// Output (mainly for debugging purposes)
void put (std::ostream& out) const {tok.put(out);}
private:
Token tok;
static const std::string startingChars;
static const std::string middleChars;
static const std::string endingChars;
friend bool operator< (const WordOccurrence& left,
const WordOccurrence& right);
friend bool operator== (const WordOccurrence& left,
const WordOccurrence& right);
};
inline
bool operator< (const WordOccurrence& left, const WordOccurrence& right)
{
return left.tok < right.tok;
}
inline
bool operator== (const WordOccurrence& left, const WordOccurrence& right)
{
return left.tok == right.tok;
}
inline
std::ostream& operator<< (std::ostream& out, const WordOccurrence& t)
{
t.put(out);
return out;
}
#endif
wordoccurrence.cpp
#include "wordoccurrence.h"
using namespace std;
const string WordOccurrence::endingChars
= string("abcdefghijklmnopqrstuvwxyz")
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const string WordOccurrence::startingChars
= endingChars + "0123456789";
const string WordOccurrence::middleChars
= startingChars + "-'";
WordOccurrence::WordOccurrence()
//post: getLexeme()=="" && getLocation()==0
{}
WordOccurrence::WordOccurrence (const std::string& lexeme,
Location location)
: tok(lexeme, location)
{}
// Read a word from a stream.
bool WordOccurrence::read(std::istream& input)
{
return tok.readToken(input, startingChars, middleChars, endingChars);
}
and finally the file that will hold all of my code edits:
spellcheck.cpp
// Spellcheck program
/*#include <algorithm>
#include <fstream>
#include <list>
#include <vector>
#include "wordoccurrence.h"
#include "correction.h"
#include "replacement.h"
#include "vectorUtils.h"*/
#include <algorithm>
#include <fstream>
#include "wordoccurrence.h"
#include "correction.h"
#include "replacement.h"
#include <set>
using namespace std;
/*
typedef list<string> WordSet;
typedef list<Replacement> ReplacementSet;
typedef vector<WordOccurrence> WordOccurrenceOrderedSequence;
typedef vector<Correction> CorrectionOrderedSequence;
*/
typedef set<string> WordSet;
typedef set<Replacement> ReplacementSet;
typedef set<WordOccurrence> WordOccurrenceOrderedSequence;
typedef set<Correction> CorrectionOrderedSequence;
std::string promptForAction (const WordOccurrence& misspelled);
std::string get_replacement_spelling(const std::string& word,
const WordSet& dictionary);
std::string getSaveFileName();
using namespace std;
#ifdef GRADING
string saveFileName;
#endif
void readDictionary (const string& dictionaryFileName,
WordSet& dictionary);
void collectMisspelledWords (const string& targetFileName,
const WordSet& dictionary,
WordOccurrenceOrderedSequence& misspellings);
void promptUserForCorrections
(const string& targetFileName,
const WordSet& dictionary,
const WordOccurrenceOrderedSequence& misspellings,
CorrectionOrderedSequence& corrections);
void produceCorrectedDocument(const string& targetFileName,
/*const*/CorrectionOrderedSequence& corrections,
const string& correctedFileName);
int main (int argc, char** argv)
{
#ifndef GRADING
if (argc != 3) {
cerr << "Usage: " << argv[0]
<< " dictionaryFileName documentFileName"
<< endl;
return 1;
}
#else
if (argc != 4) {
cerr << "Usage: " << argv[0]
<< " dictionaryFileName documentFileName saveFileName"
<< endl;
return 1;
}
saveFileName = argv[3];
#endif
string dictionaryFileName = argv[1];
string targetFileName = argv[2];
// main spellcheck routine
WordSet dictionary;
readDictionary (dictionaryFileName, dictionary);
WordOccurrenceOrderedSequence misspellings;
collectMisspelledWords (targetFileName, dictionary, misspellings);
CorrectionOrderedSequence corrections;
promptUserForCorrections(targetFileName, dictionary,
misspellings, corrections);
#ifndef GRADING
if (!corrections.empty())
#endif
{
string correctedFileName = getSaveFileName();
produceCorrectedDocument(targetFileName, corrections,
correctedFileName);
}
return 0;
}
void readDictionary (const string& dictionaryFileName,
WordSet& dictionary)
{
dictionary.clear();
WordSet temp;
ifstream dictin (dictionaryFileName.c_str());
string word;
while (dictin >> word)
{
temp.insert (word);
}
//temp.sort();
/*string count = 0;
for (int i=0; i< temp.size(); ++i)
{
count = temp.insert(count, word);
++count;
}*/
unique_copy (temp.begin(), temp.end(), back_inserter(dictionary));
}
bool contains (const WordSet& words, const string& s)
{
return find(words.begin(), words.end(), s) != words.end();
}
void collectMisspelledWords (const string& targetFileName,
const WordSet& dictionary,
WordOccurrenceOrderedSequence& misspellings)
{
ifstream targetFile (targetFileName.c_str());
cout << "Checking " << targetFileName << endl;
WordOccurrence w;
while (w.read(targetFile))
{
if (!contains(dictionary, w.getLexeme()))
{
// int count = 0;
//misspellings.size() = count;
//count++;
//addInOrder (misspellings, w);
}
}
cout << misspellings.size() << " possible misspellings found." << endl;
}
void display_in_context (const WordOccurrence& occur, istream& inFile)
{
static const WordOccurrence::Location ContextSize = 20;
WordOccurrence::Location start = (occur.getLocation() >= ContextSize) ?
occur.getLocation() - ContextSize : (WordOccurrence::Location)0;
WordOccurrence::Location stop = occur.getLocation() + ContextSize
+ (WordOccurrence::Location)occur.getLexeme().length();
string beforeMisspelling;
string afterMisspelling;
inFile.seekg(start);
for (; start < occur.getLocation(); start+=1)
beforeMisspelling += (char)inFile.get();
start = occur.getLocation() + (WordOccurrence::Location)occur.getLexeme().length();
inFile.seekg(start);
for (; start < stop; start+=1)
afterMisspelling += (char)inFile.get();
// If there is a line break (\c or \n) more than 2 chars from
// this misspelling, terminate the context there.
int loc = beforeMisspelling.substr(0, beforeMisspelling.length()-2)
.find_last_of("\r\n");
if (loc != string::npos) {
beforeMisspelling = beforeMisspelling.substr(loc+1);
}
loc = afterMisspelling.find_first_of("\r\n", 2);
if (loc != string::npos) {
afterMisspelling = afterMisspelling.substr(0, loc-1);
}
cout << beforeMisspelling << "\n "
<< occur.getLexeme() << "\n"
<< afterMisspelling << endl;
}
void promptUserForCorrections
(const string& targetFileName,
const WordSet& dictionary,
const WordOccurrenceOrderedSequence& misspellings,
CorrectionOrderedSequence& corrections)
{
// assume misspellings are in order of occurrence within the
// target file (if not, sort them)
ifstream targetFile (targetFileName.c_str());
WordSet globalIgnoredSet;
ReplacementSet globalReplacementSet;
for (WordOccurrenceOrderedSequence::const_iterator m = misspellings.begin();
m != misspellings.end(); ++m)
{
WordOccurrence misspelling = *m;
string mword = misspelling.getLexeme();
WordOccurrence::Location mloc = misspelling.getLocation();
ReplacementSet::iterator repl =
find(globalReplacementSet.begin(), globalReplacementSet.end(),
Replacement(mword,""));
if (repl != globalReplacementSet.end())
{
// addInOrder(corrections,Correction(misspelling,
// (*repl).getReplacement()));
}
else
{
display_in_context(misspelling, targetFile);
string response = promptForAction(misspelling);
if (response == "ignore")
{
// do nothing
}
else if (response == "ignore every time")
{
globalIgnoredSet.insert(mword);
}
else if (response == "replace")
{
string r = get_replacement_spelling(mword, dictionary);
// addInOrder (corrections, Correction(misspelling, r));
}
else if (response == "replace every time")
{
string r = get_replacement_spelling(mword, dictionary);
addInOrder (corrections, Correction(misspelling, r));
globalReplacementSet.push_back (Replacement(mword, r));
}
else if (response == "quit")
{
break;
}
}
}
}
void produceCorrectedDocument(const string& targetFileName,
/*const*/CorrectionOrderedSequence& corrections,
const string& correctedFileName)
{
cout << "Corrected document saving to " << correctedFileName << endl;
// assume corrections are in order of occurrence within the
// target file (if not, sort them)
ifstream targetFile (targetFileName.c_str());
ofstream correctedFile (correctedFileName.c_str());
for (CorrectionOrderedSequence::iterator c = corrections.begin();
c != corrections.end(); ++c)
{
// copy into the correctedFile all characters from
// current location of the targetFile up to
// the location of c;
for (long offset = (*c).getMisspelling().getLocation()
- targetFile.tellg();
offset > 0; --offset) {
correctedFile.put ((char)targetFile.get());
}
// read and discard the misspelled word from the targetFile;
WordOccurrence misspelled;
misspelled.read(targetFile);
// write the corrected spelling from c into the correctedFile;
correctedFile << (*c).getReplacement();
}
// copy any remaining characters from the targetFile into the
// correctedFile;
char ch = targetFile.get();
while ((targetFile) && (ch >= 0))
{
correctedFile.put(ch);
ch = targetFile.get();
}
cout << "Corrected document saved in " << correctedFileName << endl;
}
/***************************
Interactive routines for the spellcheck program
When compiled with GRADING flag set, these are replaced by
noninteractive code for auto-testing purposes.
****************************/
string promptForAction (const WordOccurrence& misspelled)
{
#ifndef GRADING
cout << "\n" << misspelled.getLexeme() << ":\n"
<< " (r)eplace this word, just this once\n"
<< " (R)eplace this word every time\n"
<< " (i)gnore this word, just this once\n"
<< " (I)gnore this word every time\n"
<< " (Q)uit\n"
<< ">" << flush;
char response;
bool OK = false;
while (1)
{
cin >> response;
switch (response) {
case 'r': return "replace";
case 'R': return "replace every time";
case 'i': return "ignore";
case 'I': return "ignore every time";
case 'Q': return "quit";
}
if (response > ' ')
cout << "Please respond with one of: rRiIQ\n>" << flush;
}
#else
return "replace every time";
#endif
}
string get_replacement_spelling(const string& word,
const WordSet& dictionary)
{
#ifndef GRADING
cout << "\n" << word << ":" << endl;
// Get a list of potential replacement strings
// (not yet implemented)
string* replacements = 0;
int nReplacements = 0;
if (nReplacements > 0)
{
// display the various possible replacements, let user pick from
// among them (not yet implemented)
}
else
{
// Prompt user directly for corrected spelling
string corrected;
while (corrected == "")
{
cout << "\nEnter correct spelling: " << flush;
cin >> corrected;
}
return corrected;
}
#else
return "[" + word + "]";
#endif
}
string getSaveFileName()
{
#ifndef GRADING
string fileName;
cout << "Enter name of file to contain the corrected document, or blank\n"
<< "to discard all corrections:\n"
<< "> " << flush;
while (fileName == "")
{
getline(cin, fileName);
}
return fileName;
#else
return saveFileName;
#endif
}
The boundaries are that i cannot make any changes to code that lies between an #ifndef GRADING...#endif
I was given the last file as a vector and list implementation. I've attempted to tweak it to fit the std::set but I'm getting a compile error for:
globalReplacementSet.push_back (Replacement(mword, r));
I know push_back isn't in the set library so what is the set equivalent for push_back?