]> git.donarmstrong.com Git - mothur.git/blob - binsequencecommand.cpp
modified bin.seqs and get.oturep commands to include use of a groupfile if provided...
[mothur.git] / binsequencecommand.cpp
1 /*
2  *  binsequencecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/3/09.
6  *  Copyright 2009 Schloss Lab UMASS Amhers. All rights reserved.
7  *
8  */
9
10 #include "binsequencecommand.h"
11
12 //**********************************************************************************************************************
13 BinSeqCommand::BinSeqCommand(){
14         try {
15                 globaldata = GlobalData::getInstance();
16                 fastafile = globaldata->getFastaFile();
17                 namesfile = globaldata->getNameFile();
18                 groupfile = globaldata->getGroupFile();
19                 openInputFile(fastafile, in);
20                 
21                 if (groupfile != "") {
22                         //read in group map info.
23                         groupMap = new GroupMap(groupfile);
24                         groupMap->readMap();
25                 }
26                 
27                 fasta = new FastaMap();
28         }
29         catch(exception& e) {
30                 cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function BinSeqCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
31                 exit(1);
32         }
33         catch(...) {
34                 cout << "An unknown error has occurred in the BinSeqCommand class function BinSeqCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
35                 exit(1);
36         }       
37 }
38
39 //**********************************************************************************************************************
40
41 BinSeqCommand::~BinSeqCommand(){
42         delete input;
43         delete read;
44         delete fasta;
45         delete list;
46         if (groupfile != "") {
47                 delete groupMap;
48         }
49 }
50
51 //**********************************************************************************************************************
52
53 int BinSeqCommand::execute(){
54         try {
55                 int count = 1;
56                 string binnames, name, sequence;
57                 
58                 //read fastafile
59                 fasta->readFastaFile(in);
60                 
61                 //set format to list so input can get listvector
62                 globaldata->setFormat("list");
63                 
64                 //if user gave a namesfile then use it
65                 if (namesfile != "") {
66                         readNamesFile();
67                 }
68                 
69                 //read list file
70                 read = new ReadOTUFile(globaldata->getListFile());      
71                 read->read(&*globaldata); 
72                 
73                 input = globaldata->ginput;
74                 list = globaldata->gListVector;
75                                 
76                 while(list != NULL){
77                         
78                         if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
79                                 
80                                 string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".fasta";
81                                 openOutputFile(outputFileName, out);
82
83                                 cout << list->getLabel() << '\t' << count << endl;
84                                 
85                                 //for each bin in the list vector
86                                 for (int i = 0; i < list->size(); i++) {
87
88                                         binnames = list->get(i);
89                                         while (binnames.find_first_of(',') != -1) { 
90                                                 name = binnames.substr(0,binnames.find_first_of(','));
91                                                 binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
92                                                 
93                                                 //do work for that name
94                                                 sequence = fasta->getSequence(name);
95                                                 if (sequence != "not found") {
96                                                         //if you don't have groups
97                                                         if (groupfile == "") {
98                                                                 name = name + "|" + toString(i+1);
99                                                                 out << ">" << name << endl;
100                                                                 out << sequence << endl;
101                                                         }else {//if you do have groups
102                                                                 string group = groupMap->getGroup(name);
103                                                                 if (group == "not found") {  
104                                                                         cout << name << " is missing from your group file. Please correct. " << endl;
105                                                                         remove(outputFileName.c_str());
106                                                                         return 0;
107                                                                 }else{
108                                                                         name = name + "|" + group + "|" + toString(i+1);
109                                                                         out << ">" << name << endl;
110                                                                         out << sequence << endl;
111                                                                 }
112                                                         }
113                                                 }else { 
114                                                         cout << name << " is missing from your fasta or name file. Please correct. " << endl; 
115                                                         remove(outputFileName.c_str());
116                                                         return 0;
117                                                 }
118                                                 
119                                         }
120                                         
121                                         //get last name
122                                         sequence = fasta->getSequence(binnames);
123                                         if (sequence != "not found") {
124                                                 //if you don't have groups
125                                                 if (groupfile == "") {
126                                                         binnames = binnames + "|" + toString(i+1);
127                                                         out << ">" << binnames << endl;
128                                                         out << sequence << endl;
129                                                 }else {//if you do have groups
130                                                         string group = groupMap->getGroup(binnames);
131                                                         if (group == "not found") {  
132                                                                 cout << binnames << " is missing from your group file. Please correct. " << endl;
133                                                                 remove(outputFileName.c_str());
134                                                                 return 0;
135                                                         }else{
136                                                                 binnames = binnames + "|" + group + "|" + toString(i+1);
137                                                                 out << ">" << binnames << endl;
138                                                                 out << sequence << endl;
139                                                         }
140                                                 }
141                                         }else { 
142                                                 cout << binnames << " is missing from your fasta or name file. Please correct. " << endl; 
143                                                 remove(outputFileName.c_str());
144                                                 return 0;
145                                         }
146                                         
147                                 }
148                                 out.close();
149                         }
150                         
151                         delete list;
152                         list = input->getListVector();
153                         count++;
154                 }
155                 
156                 return 0;
157         }
158         catch(exception& e) {
159                 cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
160                 exit(1);
161         }
162         catch(...) {
163                 cout << "An unknown error has occurred in the BinSeqCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
164                 exit(1);
165         }       
166 }
167
168 //**********************************************************************************************************************
169 void BinSeqCommand::readNamesFile() {
170         try {
171                 vector<string> dupNames;
172                 openInputFile(namesfile, inNames);
173                 
174                 string name, names, sequence;
175         
176                 while(inNames){
177                         inNames >> name;                        //read from first column  A
178                         inNames >> names;               //read from second column  A,B,C,D
179                         
180                         dupNames.clear();
181                         
182                         //parse names into vector
183                         splitAtComma(names, dupNames);
184                         
185                         //store names in fasta map
186                         sequence = fasta->getSequence(name);
187                         for (int i = 0; i < dupNames.size(); i++) {
188                                 fasta->push_back(dupNames[i], sequence);
189                         }
190                 
191                         gobble(inNames);
192                 }
193                 inNames.close();
194
195         }
196         catch(exception& e) {
197                 cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
198                 exit(1);
199         }
200         catch(...) {
201                 cout << "An unknown error has occurred in the BinSeqCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
202                 exit(1);
203         }       
204 }
205 //**********************************************************************************************************************
206
207
208