X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=fastamap.cpp;h=bf55493faece21cbd6c0043ac89149826c83c166;hp=1cc9abf08da58bad5497079d46185413e0c23ff4;hb=d1c97b8c04bb75faca1e76ffad60b37a4d789d3d;hpb=74c78f9abd9e733f0c2f812efec97a76632fcbf8 diff --git a/fastamap.cpp b/fastamap.cpp index 1cc9abf..bf55493 100644 --- a/fastamap.cpp +++ b/fastamap.cpp @@ -8,182 +8,190 @@ */ #include "fastamap.h" +#include "sequence.hpp" /*******************************************************************************/ -void FastaMap::readFastaFile(ifstream& in) { + +void FastaMap::readFastaFile(string inFileName) { try { + ifstream in; + m->openInputFile(inFileName, in); string name, sequence, line; sequence = ""; - int c; string temp; + map::iterator itName; - //read through file - while ((c = in.get()) != EOF) { - name = ""; sequence = ""; - //is this a name - if (c == '>') { - name = readName(in); - sequence = readSequence(in); - }else { cout << "Error fasta in your file. Please correct." << endl; } - - //store info in map - //input sequence info into map - seqmap[name] = sequence; - it = data.find(sequence); - if (it == data.end()) { //it's unique. - data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found. - data[sequence].groupnumber = 1; - data[sequence].names = name; - }else { // its a duplicate. - data[sequence].names += "," + name; - data[sequence].groupnumber++; - } + while(!in.eof()){ + if (m->control_pressed) { break; } + + Sequence currSeq(in); + name = currSeq.getName(); - gobble(in); + if (name != "") { + if(currSeq.getIsAligned()) { sequence = currSeq.getAligned(); } + else { sequence = currSeq.getUnaligned(); } + + itName = seqmap.find(name); + if (itName == seqmap.end()) { seqmap[name] = sequence; } + else { m->mothurOut("You already have a sequence named " + name + ", sequence names must be unique, please correct."); m->mothurOutEndLine(); } + + map::iterator it = data.find(sequence); + if (it == data.end()) { //it's unique. + data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found. + // data[sequence].groupnumber = 1; + data[sequence].names = name; + }else { // its a duplicate. + data[sequence].names += "," + name; + // data[sequence].groupnumber++; + } + } + m->gobble(in); } - + in.close(); } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the FastaMap class function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "FastaMap", "readFastaFile"); exit(1); } } + /*******************************************************************************/ -string FastaMap::readName(ifstream& in) { - try{ - string name = ""; - int c; - string temp; + +void FastaMap::readFastaFile(string inFastaFile, string oldNameFileName){ //prints data + + ifstream oldNameFile; + m->openInputFile(oldNameFileName, oldNameFile); + + map oldNameMap; + map::iterator itName; + string name, list; + while(!oldNameFile.eof()){ + if (m->control_pressed) { break; } - while ((c = in.get()) != EOF) { - //if c is not a line return - if (c != 10) { - name += c; - }else { break; } - } - - return name; - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readName. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); + oldNameFile >> name; m->gobble(oldNameFile); + oldNameFile >> list; + oldNameMap[name] = list; + m->gobble(oldNameFile); } - catch(...) { - cout << "An unknown error has occurred in the FastaMap class function readName. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} - -/*******************************************************************************/ -string FastaMap::readSequence(ifstream& in) { - try{ - string sequence = ""; - string line; - int pos, c; + oldNameFile.close(); + + ifstream inFASTA; + m->openInputFile(inFastaFile, inFASTA); + string sequence; + while(!inFASTA.eof()){ + if (m->control_pressed) { break; } - while (!in.eof()) { - //save position in file in case next line is a new name. - pos = in.tellg(); - line = ""; - in >> line; - //if you are at a new name - if (line[0] == '>') { - //put file pointer back since you are now at a new name - in.seekg(pos, ios::beg); - c = in.get(); //because you put it back to a newline char - break; - }else { sequence += line; } - } + Sequence currSeq(inFASTA); + name = currSeq.getName(); + + if (name != "") { + if(currSeq.getIsAligned()) { sequence = currSeq.getAligned(); } + else { sequence = currSeq.getUnaligned(); } - return sequence; - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readSequence. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the FastaMap class function readSequence. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); + itName = seqmap.find(name); + if (itName == seqmap.end()) { seqmap[name] = sequence; } + else { m->mothurOut("You already have a sequence named " + name + ", sequence names must be unique, please correct."); m->mothurOutEndLine(); } + + seqmap[name] = sequence; + map::iterator it = data.find(sequence); + if (it == data.end()) { //it's unique. + data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found. + // data[sequence].groupnumber = 1; + data[sequence].names = oldNameMap[name]; + }else { // its a duplicate. + data[sequence].names += "," + oldNameMap[name]; + // data[sequence].groupnumber++; + } + } + m->gobble(inFASTA); } + + + inFASTA.close(); } + /*******************************************************************************/ + string FastaMap::getGroupName(string seq) { //pass a sequence name get its group return data[seq].groupname; } + /*******************************************************************************/ + string FastaMap::getNames(string seq) { //pass a sequence get the string of names in the group separated by ','s. return data[seq].names; } + /*******************************************************************************/ -int FastaMap::getGroupNumber(string seq) { //pass a sequence get the number of identical sequences. - return data[seq].groupnumber; -} -/*******************************************************************************/ + string FastaMap::getSequence(string name) { - it2 = seqmap.find(name); - if (it2 == seqmap.end()) { //it's not found - return "not found"; - }else { // found it - return it2->second; - } + + map::iterator it = seqmap.find(name); + if (it == seqmap.end()) { return "not found"; } + else { return it->second; } + } + /*******************************************************************************/ + void FastaMap::push_back(string name, string seq) { - it = data.find(seq); + + map::iterator it = data.find(seq); if (it == data.end()) { //it's unique. data[seq].groupname = name; //group name will be the name of the first duplicate sequence found. - data[seq].groupnumber = 1; data[seq].names = name; }else { // its a duplicate. data[seq].names += "," + name; - data[seq].groupnumber++; } - seqmap[name] = seq; } + /*******************************************************************************/ + int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences return data.size(); } + /*******************************************************************************/ -void FastaMap::printNamesFile(ostream& out){ //prints data + +void FastaMap::printNamesFile(string outFileName){ //prints data try { + ofstream outFile; + m->openOutputFile(outFileName, outFile); + // two column file created with groupname and them list of identical sequence names - for (it = data.begin(); it != data.end(); it++) { - out << it->second.groupname << '\t' << it->second.names << endl; + for (map::iterator it = data.begin(); it != data.end(); it++) { + if (m->control_pressed) { break; } + outFile << it->second.groupname << '\t' << it->second.names << endl; } + outFile.close(); } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "FastaMap", "printNamesFile"); exit(1); } } + /*******************************************************************************/ -void FastaMap::printCondensedFasta(ostream& out){ //prints data + +void FastaMap::printCondensedFasta(string outFileName){ //prints data try { + ofstream out; + m->openOutputFile(outFileName, out); //creates a fasta file - for (it = data.begin(); it != data.end(); it++) { + for (map::iterator it = data.begin(); it != data.end(); it++) { + if (m->control_pressed) { break; } out << ">" << it->second.groupname << endl; out << it->first << endl; } + out.close(); } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "FastaMap", "printCondensedFasta"); exit(1); } } + /*******************************************************************************/