// Oldham, Jeffrey D. // 2000 Jan 25 // CS1321 // Modified by: Massingill, Berna L. // 2000 Jan 27 // CS1321 Homework 3: Search Engine Code #include #include // has EXIT_SUCCESS #include "types.h" #include #include // We require a very strict (too strict) format for the database file: // one line of keywords separated by _one_ space end ending with one // space followed by a newline character // one line per document beginning with the document's name and // followed by as many doubles as there are keywords. These doubles // are separated by spaces, and the last double is followed by a space. // ==== FUNCTION DECLARATIONS ==== // Read the document names and vectors stored in the specified file. // precondition: "databaseFileName" is the name of a file containing // documents and vector contents. // postconditions: "vdv" is filled with the documents and vectors // specified in "databaseFileName". // "km" is filled with the keywords // specified in "databaseFileName". // returns true only if succeeds (otherwise exits or // returns false). bool obtainDatabase(const char databaseFileName[], vector & vdv, KeywordMapping & km); // Read the keyword mapping stored in the specified file. // precondition: "databaseFileStream" has been successfully opened. // postconditions: "km" is filled with the keywords. // returns true if succeeds, false otherwise. bool obtainKM(ifstream & databaseFileStream, KeywordMapping & km); // Read the document names and vectors stored in the specified file. // preconditions: "databaseFileStream" has been successfully opened. // sz is the number of vector entries. // postconditions: "vdv" is filled with the documents and vectors. // returns true if succeeds, false otherwise. bool obtainVDV(ifstream & databaseFileStream, const std::vector::size_type & sz, vector & vdv); // Read one document name and its vector stored on one line of the specified file. // preconditions: "databaseFileStream" has been successfully opened. // sz is the number of vector entries. // postconditions: "dv" is filled with a document and a vector. // returns true if succeeds, false otherwise. bool obtainDV(ifstream & databaseFileStream, const std::vector::size_type & sz, DocVec & dv); // Ask the user for a list of keywords and the number of desired documents. // preconditions: "km" has been filled using a call to obtainDatabase(). // postconditions: "searchVector" is filled with values corresponding to // what the user wants to search for. // "nuDesiredDocs" is the number of desired documents // the user wants. For example, she may want the // top 10 documents. // returns true if succeeds, false otherwise. bool obtainSearchWords(const KeywordMapping & km, vector & searchVector, int& nuDesiredDocs); // Prompt for and obtain a list of words. // preconditions: "prompt" contains text for the prompt. // postconditions: user is prompted for a list of words, all on one line. // "words" contains words user enters in response. // returns true if succeeds, false otherwise. bool obtainWords(const string & prompt, vector & words); // Prompt for and obtain an integer. // preconditions: "prompt" contains text for the prompt. // postconditions: user is prompted for an integer. // "i" contains integer user enters in response. // returns true if succeeds, false otherwise. bool obtainInteger(const string & prompt, int & i); // Convert keywords to search vector. // preconditions: "km" has been filled using a call to obtainDatabase(). // "words" contains words to convert. // postconditions: "searchVector" contains result of converting "words" // and has unit length. // Words not appearing in km are omitted from searchVector. void convertWordsToSV(const KeywordMapping & km, const vector & words, vector & searchVector); // 2000Feb04 JDO adds declaration: // Convert to a unit vector. // preconditions: "v" has nonzero length. // type T can be divided by a double and multiplied with itself. // postcondition: "v" has been normalized to have unit length. // template void unitVector(vector & v); // Feel free to ADD function declarations here. ============================ // ==== MAIN PROGRAM ==== // Command-line argument should be: // 1. the filename of the database of documents and vectors int main(int argc, char * argv[]) { KeywordMapping km; // mapping from keywords to vector components // no need to look inside this code vector vdv; // collection of documents and their vectors int nuDesiredDocs; // Each search should produce this // many documents, e.g., the top 10 documents. if (argc != 2) { cerr << argv[0] << ": name-of-database-file\n"; return EXIT_FAILURE; } // ADD code here ========================================================== return EXIT_SUCCESS; } // ==== FUNCTION DEFINITIONS ==== bool obtainDatabase(const char databaseFileName[], vector & vdv, KeywordMapping & km) { ifstream dbfile; dbfile.open(databaseFileName); if (dbfile.fail()) { cerr << "File " << databaseFileName << " not found.\n"; exit(EXIT_FAILURE); } bool returnValue = obtainKM(dbfile, km) && obtainVDV(dbfile, km.size(), vdv); dbfile.close(); return returnValue; } // ----------------- bool obtainKM(ifstream & databaseFileStream, KeywordMapping & km) { string s; for (std::vector::size_type index = 0; databaseFileStream.peek() != '\n' && getline(databaseFileStream,s,' '); ++index) km[s] = index; return databaseFileStream; } // ----------------- bool obtainVDV(ifstream & databaseFileStream, const std::vector::size_type & sz, vector & vdv) { DocVec dv; while (obtainDV(databaseFileStream, sz, dv)) vdv.push_back(dv); return databaseFileStream.eof(); } // ----------------- bool obtainDV(ifstream & databaseFileStream, const std::vector::size_type & sz, DocVec & dv) { dv.second.resize(sz); if (databaseFileStream >> dv.first) for (std::vector::size_type index = 0; index < sz && databaseFileStream >> dv.second[index]; ++index) ; return databaseFileStream; } // ----------------- bool obtainSearchWords(const KeywordMapping & km, vector & searchVector, int& nuDesiredDocs) { vector keyWords; bool returnValue = obtainWords("keywords to search for", keyWords) && obtainInteger("number of results you want to see", nuDesiredDocs); convertWordsToSV(km, keyWords, searchVector); return returnValue; } // ---------------- bool obtainWords(const string & prompt, vector & words) { string line, word; cout << "Please enter " << prompt << ", all on one line:\n"; if (!getline(cin, line)) { cerr << "failed to enter the search words\n"; return false; } istrstream linestream(line.c_str()); while (linestream >> word) words.push_back(word); return true; } // ---------------- bool obtainInteger(const string & prompt, int & i) { cout << "Please enter " << prompt << ": "; return (cin >> i); } // ---------------- void convertWordsToSV(const KeywordMapping & km, const vector & words, vector & searchVector) { KeywordMapping::const_iterator kmpos; searchVector.resize(km.size()); for (vector::const_iterator pos = words.begin(); pos != words.end(); ++pos) if ((kmpos = km.find(*pos)) != km.end()) // clearest code I ever ++searchVector[(*kmpos).second]; // wrote, JDO :) unitVector(searchVector); return; } // ---------------- void unitVector(vector & v) { // ADD code here return; } // ADD code here ============================================================