2 * getrepseqscommand.cpp
5 * Created by Sarah Westcott on 5/19/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "getrepseqscommand.h"
12 //**********************************************************************************************************************
13 GetRepSeqsCommand::GetRepSeqsCommand(){
15 globaldata = GlobalData::getInstance();
16 fastafile = globaldata->getFastaFile();
17 namesfile = globaldata->getNameFile();
18 openInputFile(fastafile, in);
20 fasta = new FastaMap();
22 //read in group map info.
23 groupMap = new GroupMap(globaldata->getGroupFile());
26 //fill filehandles with neccessary ofstreams
30 for (i=0; i<groupMap->getNumGroups(); i++) {
32 filehandles[groupMap->namesOfGroups[i]] = temp;
38 filehandles[s] = temp;
42 cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function GetRepSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
46 cout << "An unknown error has occurred in the GetRepSeqsCommand class function GetRepSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
51 //**********************************************************************************************************************
53 GetRepSeqsCommand::~GetRepSeqsCommand(){
60 //**********************************************************************************************************************
62 int GetRepSeqsCommand::execute(){
65 string binnames, name, sequence;
68 fasta->readFastaFile(in);
70 //set format to list so input can get listvector
71 globaldata->setFormat("list");
73 //if user gave a namesfile then use it
74 if (namesfile != "") {
79 read = new ReadOTUFile(globaldata->getListFile());
80 read->read(&*globaldata);
82 input = globaldata->ginput;
83 list = globaldata->gListVector;
87 if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
89 cout << list->getLabel() << '\t' << count << endl;
91 //open output list files
92 for (int i=0; i<groupMap->getNumGroups(); i++) {//opens an output file for each group
93 openOutputFile(fastafile + groupMap->namesOfGroups[i] + list->getLabel() + ".fasta", *(filehandles[groupMap->namesOfGroups[i]]));
94 used[groupMap->namesOfGroups[i]] = false;
97 openOutputFile(fastafile + s + list->getLabel() + ".fasta", *(filehandles[s]));
101 //for each bin in the list vector
102 for (int i = 0; i < list->size(); i++) {
104 //uses this to determine if the bin is unique to one group or if it is shared
105 map<string, string> groups;
107 //determine if this otu is unique to one group or not
108 binnames = list->get(i);
109 while (binnames.find_first_of(',') != -1) {
110 //parse out each name in bin
111 name = binnames.substr(0,binnames.find_first_of(','));
112 binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
114 //do work for that name
115 sequence = fasta->getSequence(name);
116 if (sequence != "not found") {
117 string group = groupMap->getGroup(name);
118 if (group != "not found") { groups[group] = group; } //add group to list of groups in this bin
120 cout << "error sequence " << name << " is not assigned a group in your groupfile. Please correct." << endl;
121 removeFiles(list->getLabel());
124 name = ">" + name + "|" + toString(i+1);
125 seq[name] = sequence;
127 cout << name << " is missing from your fasta or name file. Please correct. " << endl;
128 removeFiles(list->getLabel());
135 sequence = fasta->getSequence(binnames);
136 if (sequence != "not found") {
137 string group = groupMap->getGroup(binnames);
138 if (group != "not found") { groups[group] = group; } //add group to list of groups in this bin
140 cout << "error sequence " << binnames << " is not assigned a group in your groupfile. Please correct." << endl;
141 removeFiles(list->getLabel());
144 binnames = ">" + binnames + "|" + toString(i+1); //attach bin number to name
145 seq[binnames] = sequence;
147 cout << binnames << " is missing from your fasta or name file. Please correct. " << endl;
148 removeFiles(list->getLabel());
152 //output each bin to files
153 //what file does this bin need to be outputted to
154 if (groups.size() == 1) { //this bin is unique to one group
155 it3 = groups.begin();
156 string uniqueGroup = it3->first;
157 used[uniqueGroup] = true;
158 //print out sequences from that bin to shared file
159 for (it3 = seq.begin(); it3 != seq.end(); it3++){
160 *(filehandles[uniqueGroup]) << it3->first << endl;
161 *(filehandles[uniqueGroup]) << it3->second << endl;
163 }else {//this bin has sequences from multiple groups in it
165 //print out sequences from that bin to shared file
166 for (it3 = seq.begin(); it3 != seq.end(); it3++){
167 *(filehandles[s]) << it3->first << endl;
168 *(filehandles[s]) << it3->second << endl;
173 //close ostreams and remove unused files
174 for (it = filehandles.begin(); it != filehandles.end(); it++) {
176 if (used[it->first] == false) { string filename = fastafile + it->first + list->getLabel() + ".fasta"; remove(filename.c_str()); }
182 list = input->getListVector();
188 catch(exception& e) {
189 cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
193 cout << "An unknown error has occurred in the GetRepSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
198 //**********************************************************************************************************************
199 void GetRepSeqsCommand::readNamesFile() {
201 vector<string> dupNames;
202 openInputFile(namesfile, inNames);
204 string name, names, sequence;
207 inNames >> name; //read from first column A
208 inNames >> names; //read from second column A,B,C,D
212 //parse names into vector
213 splitAtComma(names, dupNames);
215 //store names in fasta map
216 sequence = fasta->getSequence(name);
217 for (int i = 0; i < dupNames.size(); i++) {
218 fasta->push_back(dupNames[i], sequence);
226 catch(exception& e) {
227 cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
231 cout << "An unknown error has occurred in the GetRepSeqsCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
235 //**********************************************************************************************************************
236 void GetRepSeqsCommand::removeFiles(string label) {
239 for (it = filehandles.begin(); it != filehandles.end(); it++) {
243 //remove output files because there was an error
244 for (int i=0; i<groupMap->getNumGroups(); i++) {
245 string outputFileName = fastafile + groupMap->namesOfGroups[i] + label + ".fasta";
246 remove(outputFileName.c_str());
248 string outputFileName = fastafile + "shared"+ label + ".fasta";
249 remove(outputFileName.c_str());
252 catch(exception& e) {
253 cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function removeFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
257 cout << "An unknown error has occurred in the GetRepSeqsCommand class function removeFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
262 //**********************************************************************************************************************