X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=fastamap.cpp;h=a0652d996fa648a7130928e93c7f3c6b0fae583a;hb=df82ee669d7eb9ae9a1a334339dfab7961cb16c6;hp=ebfecedb860bafb6385ca2f75a3752702354498b;hpb=5b7ac70116137b52dd7884b76c5bca660a5fea38;p=mothur.git diff --git a/fastamap.cpp b/fastamap.cpp index ebfeced..a0652d9 100644 --- a/fastamap.cpp +++ b/fastamap.cpp @@ -8,43 +8,47 @@ */ #include "fastamap.h" +#include "sequence.hpp" /*******************************************************************************/ + void FastaMap::readFastaFile(ifstream& in) { try { string name, sequence, line; sequence = ""; - - in >> line; - name = line.substr(1, line.length()); //rips off '>' - +// int c; + string temp; + + //read through file - while (!in.eof()) { - in >> line; - if (line != "") { - if (isalnum(line.at(0))) { //if it's a sequence line - sequence += line; - } - else{ - //input sequence info into map - seqmap[name] = sequence; - it = data.find(sequence); - if (it == data.end()) { //it's unique. - data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found. - data[sequence].groupnumber = 1; - data[sequence].names = name; - }else { // its a duplicate. - data[sequence].names += "," + name; - data[sequence].groupnumber++; - } - name = (line.substr(1, (line.npos))); //The line you just read is a new name so rip off '>' - sequence = ""; - } - } +// while ((c = in.get()) != EOF) { +// name = ""; sequence = ""; +// //is this a name +// if (c == '>') { +// name = readName(in); +// sequence = readSequence(in); +// }else { cout << "Error fasta in your file. Please correct." << endl; } + + //store info in map + //input sequence info into map + while(!in.eof()){ + Sequence currSeq(in); + name = currSeq.getName(); + sequence = currSeq.getUnaligned(); + seqmap[name] = sequence; + it = data.find(sequence); + if (it == data.end()) { //it's unique. + data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found. + data[sequence].groupnumber = 1; + data[sequence].names = name; + }else { // its a duplicate. + data[sequence].names += "," + name; + data[sequence].groupnumber++; + } + gobble(in); } - - + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -55,19 +59,27 @@ void FastaMap::readFastaFile(ifstream& in) { exit(1); } } + /*******************************************************************************/ + string FastaMap::getGroupName(string seq) { //pass a sequence name get its group return data[seq].groupname; } + /*******************************************************************************/ + string FastaMap::getNames(string seq) { //pass a sequence get the string of names in the group separated by ','s. return data[seq].names; } + /*******************************************************************************/ + int FastaMap::getGroupNumber(string seq) { //pass a sequence get the number of identical sequences. return data[seq].groupnumber; } + /*******************************************************************************/ + string FastaMap::getSequence(string name) { it2 = seqmap.find(name); if (it2 == seqmap.end()) { //it's not found @@ -76,7 +88,9 @@ string FastaMap::getSequence(string name) { return it2->second; } } + /*******************************************************************************/ + void FastaMap::push_back(string name, string seq) { it = data.find(seq); if (it == data.end()) { //it's unique. @@ -90,11 +104,15 @@ void FastaMap::push_back(string name, string seq) { seqmap[name] = seq; } + /*******************************************************************************/ + int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences return data.size(); } + /*******************************************************************************/ + void FastaMap::printNamesFile(ostream& out){ //prints data try { // two column file created with groupname and them list of identical sequence names @@ -111,10 +129,12 @@ void FastaMap::printNamesFile(ostream& out){ //prints data exit(1); } } + /*******************************************************************************/ + void FastaMap::printCondensedFasta(ostream& out){ //prints data try { - // two column file created with groupname and them list of identical sequence names + //creates a fasta file for (it = data.begin(); it != data.end(); it++) { out << ">" << it->second.groupname << endl; out << it->first << endl; @@ -129,5 +149,6 @@ void FastaMap::printCondensedFasta(ostream& out){ //prints data exit(1); } } + /*******************************************************************************/