]> git.donarmstrong.com Git - mothur.git/blob - screenseqscommand.cpp
added trim.seqs command
[mothur.git] / screenseqscommand.cpp
1 /*
2  *  screenseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/3/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "screenseqscommand.h"
11 #include "sequence.hpp"
12
13 //***************************************************************************************************************
14
15 ScreenSeqsCommand::ScreenSeqsCommand(){
16         try {
17                 globaldata = GlobalData::getInstance();
18                 if(globaldata->getFastaFile() == "")            {       cout << "you must provide a fasta formatted file" << endl;      }
19         }
20         catch(exception& e) {
21                 cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
22                 exit(1);
23         }
24         catch(...) {
25                 cout << "An unknown error has occurred in the ScreenSeqsCommand class function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
26                 exit(1);
27         }       
28 }
29
30 //***************************************************************************************************************
31
32 ScreenSeqsCommand::~ScreenSeqsCommand(){        /*      do nothing      */      }
33
34 //***************************************************************************************************************
35
36 int ScreenSeqsCommand::execute(){
37         try{
38                 int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
39                 convert(globaldata->getStartPos(), startPos);
40                 convert(globaldata->getEndPos(), endPos);
41                 convert(globaldata->getMaxAmbig(), maxAmbig);
42                 convert(globaldata->getMaxHomoPolymer(), maxHomoP);
43                 convert(globaldata->getMinLength(), minLength);
44                 convert(globaldata->getMaxLength(), maxLength);
45                 
46                 ifstream inFASTA;
47                 openInputFile(globaldata->getFastaFile(), inFASTA);
48                 
49                 set<string> badSeqNames;
50                 
51                 string goodSeqFile = getRootName(globaldata->getFastaFile()) + "good" + getExtension(globaldata->getFastaFile());
52                 string badSeqFile = getRootName(globaldata->getFastaFile()) + "bad" + getExtension(globaldata->getFastaFile());
53                 
54                 ofstream goodSeqOut;    openOutputFile(goodSeqFile, goodSeqOut);
55                 ofstream badSeqOut;             openOutputFile(badSeqFile, badSeqOut);          
56                 
57                 while(!inFASTA.eof()){
58                         Sequence currSeq(inFASTA);
59                         bool goodSeq = 1;               //      innocent until proven guilty
60                         if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
61                         if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
62                         if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
63                         if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
64                         if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
65                         if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
66                         
67                         if(goodSeq == 1){
68                                 currSeq.printSequence(goodSeqOut);      
69                         }
70                         else{
71                                 currSeq.printSequence(badSeqOut);       
72                                 badSeqNames.insert(currSeq.getName());
73                         }
74                         gobble(inFASTA);
75                 }       
76                 if(globaldata->getNameFile() != ""){
77                         screenNameGroupFile(badSeqNames);
78                 }
79                 else if(globaldata->getGroupFile() != ""){
80                         screenGroupFile(badSeqNames);
81                 }
82                 
83                 return 0;
84         }
85         catch(exception& e) {
86                 cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
87                 exit(1);
88         }
89         catch(...) {
90                 cout << "An unknown error has occurred in the ScreenSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
91                 exit(1);
92         }
93         
94 }
95
96 //***************************************************************************************************************
97
98 void ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
99
100         ifstream inputNames;
101         openInputFile(globaldata->getNameFile(), inputNames);
102         set<string> badSeqGroups;
103         string seqName, seqList, group;
104         set<string>::iterator it;
105
106         string goodNameFile = getRootName(globaldata->getNameFile()) + "good" + getExtension(globaldata->getNameFile());
107         string badNameFile = getRootName(globaldata->getNameFile()) + "bad" + getExtension(globaldata->getNameFile());
108         
109         ofstream goodNameOut;   openOutputFile(goodNameFile, goodNameOut);
110         ofstream badNameOut;    openOutputFile(badNameFile, badNameOut);                
111         
112         while(!inputNames.eof()){
113                 inputNames >> seqName >> seqList;
114                 it = badSeqNames.find(seqName);
115                 
116                 if(it != badSeqNames.end()){
117                         badSeqNames.erase(it);
118                         badNameOut << seqName << '\t' << seqList << endl;
119                         if(globaldata->getNameFile() != ""){
120                                 int start = 0;
121                                 for(int i=0;i<seqList.length();i++){
122                                         if(seqList[i] == ','){
123                                                 badSeqGroups.insert(seqList.substr(start,i-start));
124                                                 start = i+1;
125                                         }                                       
126                                 }
127                                 badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
128                         }
129                 }
130                 else{
131                         goodNameOut << seqName << '\t' << seqList << endl;
132                 }
133                 gobble(inputNames);
134         }
135         inputNames.close();
136         goodNameOut.close();
137         badNameOut.close();
138         
139         if(globaldata->getGroupFile() != ""){
140                 
141                 ifstream inputGroups;
142                 openInputFile(globaldata->getGroupFile(), inputGroups);
143
144                 string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
145                 string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
146                 
147                 ofstream goodGroupOut;  openOutputFile(goodGroupFile, goodGroupOut);
148                 ofstream badGroupOut;   openOutputFile(badGroupFile, badGroupOut);              
149                 
150                 while(!inputGroups.eof()){
151                         inputGroups >> seqName >> group;
152
153                         it = badSeqGroups.find(seqName);
154                         
155                         if(it != badSeqGroups.end()){
156                                 badSeqGroups.erase(it);
157                                 badGroupOut << seqName << '\t' << group << endl;
158                         }
159                         else{
160                                 goodGroupOut << seqName << '\t' << group << endl;
161                         }
162                         gobble(inputGroups);
163                 }
164                 inputGroups.close();
165                 goodGroupOut.close();
166                 badGroupOut.close();
167         }
168 }
169
170 //***************************************************************************************************************
171
172 void ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
173
174         ifstream inputGroups;
175         openInputFile(globaldata->getGroupFile(), inputGroups);
176         string seqName, group;
177         set<string>::iterator it;
178         
179         string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
180         string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
181         
182         ofstream goodGroupOut;  openOutputFile(goodGroupFile, goodGroupOut);
183         ofstream badGroupOut;   openOutputFile(badGroupFile, badGroupOut);              
184         
185         while(!inputGroups.eof()){
186                 inputGroups >> seqName >> group;
187                 it = badSeqNames.find(seqName);
188                 
189                 if(it != badSeqNames.end()){
190                         badSeqNames.erase(it);
191                         badGroupOut << seqName << '\t' << group << endl;
192                 }
193                 else{
194                         goodGroupOut << seqName << '\t' << group << endl;
195                 }
196                 gobble(inputGroups);
197         }
198         inputGroups.close();
199         goodGroupOut.close();
200         badGroupOut.close();
201         
202 }
203
204 //***************************************************************************************************************
205
206