*/
#include "fastamap.h"
+#include "sequence.hpp"
/*******************************************************************************/
-void FastaMap::readFastaFile(ifstream& in) {
+
+void FastaMap::readFastaFile(string inFileName) {
try {
+ ifstream in;
+ openInputFile(inFileName, in);
string name, sequence, line;
sequence = "";
-
- in >> line;
- name = line.substr(1, line.length()); //rips off '>'
-
- //read through file
- while (!in.eof()) {
- in >> line;
-
- if (line[0] != '>') { //if it's a sequence line
- sequence += line;
- }
- else{
- //input sequence info into map
- seqmap[name] = sequence;
-
- it = data.find(sequence);
- if (it == data.end()) { //it's unique.
- data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found.
- data[sequence].groupnumber = 1;
- data[sequence].names = name;
- }else { // its a duplicate.
- data[sequence].names += "," + name;
- data[sequence].groupnumber++;
- }
- name = (line.substr(1, (line.npos))); //The line you just read is a new name so rip off '>'
- sequence = "";
- }
+ string temp;
+
+ while(!in.eof()){
+ Sequence currSeq(in);
+ name = currSeq.getName();
+
+ if(currSeq.getIsAligned()) { sequence = currSeq.getAligned(); }
+ else { sequence = currSeq.getUnaligned(); }
+
+ seqmap[name] = sequence;
+ map<string,group>::iterator it = data.find(sequence);
+ if (it == data.end()) { //it's unique.
+ data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found.
+// data[sequence].groupnumber = 1;
+ data[sequence].names = name;
+ }else { // its a duplicate.
+ data[sequence].names += "," + name;
+// data[sequence].groupnumber++;
+ }
gobble(in);
}
- it = data.find(sequence);
- if (it == data.end()) { //it's unique.
- data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found.
- data[sequence].groupnumber = 1;
- data[sequence].names = name;
- }else { // its a duplicate.
- data[sequence].names += "," + name;
- data[sequence].groupnumber++;
- }
-
-
+ in.close();
}
catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ errorOut(e, "FastaMap", "readFastaFile");
exit(1);
}
- catch(...) {
- cout << "An unknown error has occurred in the FastaMap class function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
+}
+
+/*******************************************************************************/
+
+void FastaMap::readFastaFile(string inFastaFile, string oldNameFileName){ //prints data
+
+ ifstream oldNameFile;
+ openInputFile(oldNameFileName, oldNameFile);
+
+ map<string,string> oldNameMap;
+ string name, list;
+ while(!oldNameFile.eof()){
+ oldNameFile >> name >> list;
+ oldNameMap[name] = list;
+ gobble(oldNameFile);
+ }
+ oldNameFile.close();
+
+ ifstream inFASTA;
+ openInputFile(inFastaFile, inFASTA);
+ string sequence;
+ while(!inFASTA.eof()){
+ Sequence currSeq(inFASTA);
+ name = currSeq.getName();
+
+ if(currSeq.getIsAligned()) { sequence = currSeq.getAligned(); }
+ else { sequence = currSeq.getUnaligned(); }
+
+ seqmap[name] = sequence;
+ map<string,group>::iterator it = data.find(sequence);
+ if (it == data.end()) { //it's unique.
+ data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found.
+// data[sequence].groupnumber = 1;
+ data[sequence].names = oldNameMap[name];
+ }else { // its a duplicate.
+ data[sequence].names += "," + oldNameMap[name];
+// data[sequence].groupnumber++;
+ }
+
+ gobble(inFASTA);
}
+
+
+ inFASTA.close();
}
+
/*******************************************************************************/
+
string FastaMap::getGroupName(string seq) { //pass a sequence name get its group
return data[seq].groupname;
}
+
/*******************************************************************************/
+
string FastaMap::getNames(string seq) { //pass a sequence get the string of names in the group separated by ','s.
return data[seq].names;
}
+
/*******************************************************************************/
-int FastaMap::getGroupNumber(string seq) { //pass a sequence get the number of identical sequences.
- return data[seq].groupnumber;
-}
-/*******************************************************************************/
+
string FastaMap::getSequence(string name) {
- it2 = seqmap.find(name);
- if (it2 == seqmap.end()) { //it's not found
- return "not found";
- }else { // found it
- return it2->second;
- }
+
+ map<string,string>::iterator it = seqmap.find(name);
+ if (it == seqmap.end()) { return "not found"; }
+ else { return it->second; }
+
}
+
/*******************************************************************************/
+
void FastaMap::push_back(string name, string seq) {
- it = data.find(seq);
+
+ map<string,group>::iterator it = data.find(seq);
if (it == data.end()) { //it's unique.
data[seq].groupname = name; //group name will be the name of the first duplicate sequence found.
- data[seq].groupnumber = 1;
data[seq].names = name;
}else { // its a duplicate.
data[seq].names += "," + name;
- data[seq].groupnumber++;
}
-
seqmap[name] = seq;
}
+
/*******************************************************************************/
+
int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences
return data.size();
}
+
/*******************************************************************************/
-void FastaMap::printNamesFile(ostream& out){ //prints data
+
+void FastaMap::printNamesFile(string outFileName){ //prints data
try {
+ ofstream outFile;
+ openOutputFile(outFileName, outFile);
+
// two column file created with groupname and them list of identical sequence names
- for (it = data.begin(); it != data.end(); it++) {
- out << it->second.groupname << '\t' << it->second.names << endl;
+ for (map<string,group>::iterator it = data.begin(); it != data.end(); it++) {
+ outFile << it->second.groupname << '\t' << it->second.names << endl;
}
+ outFile.close();
}
catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
- catch(...) {
- cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ errorOut(e, "FastaMap", "printNamesFile");
exit(1);
}
}
+
/*******************************************************************************/
-void FastaMap::printCondensedFasta(ostream& out){ //prints data
+
+void FastaMap::printCondensedFasta(string outFileName){ //prints data
try {
- // two column file created with groupname and them list of identical sequence names
- for (it = data.begin(); it != data.end(); it++) {
+ ofstream out;
+ openOutputFile(outFileName, out);
+ //creates a fasta file
+ for (map<string,group>::iterator it = data.begin(); it != data.end(); it++) {
out << ">" << it->second.groupname << endl;
out << it->first << endl;
}
+ out.close();
}
catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
- catch(...) {
- cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ errorOut(e, "FastaMap", "printCondensedFasta");
exit(1);
}
}
+
/*******************************************************************************/