]> git.donarmstrong.com Git - mothur.git/blob - screenseqscommand.cpp
removed readseqs, readfasta, readnexus, readclustal, readseqsphylip and updated seque...
[mothur.git] / screenseqscommand.cpp
1 /*
2  *  screenseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/3/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "screenseqscommand.h"
11 #include "sequence.hpp"
12
13 //***************************************************************************************************************
14
15 ScreenSeqsCommand::ScreenSeqsCommand(){
16         try {
17                 globaldata = GlobalData::getInstance();
18                 if(globaldata->getFastaFile() == "")            {       cout << "you must provide a fasta formatted file" << endl;      }
19         }
20         catch(exception& e) {
21                 cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
22                 exit(1);
23         }
24         catch(...) {
25                 cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
26                 exit(1);
27         }       
28 }
29
30 //***************************************************************************************************************
31
32 ScreenSeqsCommand::~ScreenSeqsCommand(){        /*      do nothing      */      }
33
34 //***************************************************************************************************************
35
36 int ScreenSeqsCommand::execute(){
37         try{
38                 int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
39                 convert(globaldata->getStartPos(), startPos);
40                 convert(globaldata->getEndPos(), endPos);
41                 convert(globaldata->getMaxAmbig(), maxAmbig);
42                 convert(globaldata->getMaxHomoPolymer(), maxHomoP);
43                 convert(globaldata->getMinLength(), minLength);
44                 convert(globaldata->getMaxLength(), maxLength);
45                 
46                 ifstream inFASTA;
47                 openInputFile(globaldata->getFastaFile(), inFASTA);
48                 
49                 set<string> badSeqNames;
50                 
51                 string goodSeqFile = getRootName(globaldata->inputFileName) + "good" + getExtension(globaldata->inputFileName);
52                 string badSeqFile = getRootName(globaldata->inputFileName) + "bad" + getExtension(globaldata->inputFileName);
53                 
54                 ofstream goodSeqOut;    openOutputFile(goodSeqFile, goodSeqOut);
55                 ofstream badSeqOut;             openOutputFile(badSeqFile, badSeqOut);          
56                 
57                 while(!inFASTA.eof()){
58                         Sequence currSeq(inFASTA);
59                         bool goodSeq = 1;               //      innocent until proven guilty
60                         if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
61                         if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
62                         if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
63                         if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
64                         if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
65                         if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
66                         
67                         if(goodSeq == 1){
68                                 currSeq.printSequence(goodSeqOut);      
69                         }
70                         else{
71                                 currSeq.printSequence(badSeqOut);       
72                                 badSeqNames.insert(currSeq.getName());
73                         }
74                         gobble(inFASTA);
75                 }       
76                 
77                 return 0;
78         }
79         catch(exception& e) {
80                 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
81                 exit(1);
82         }
83         catch(...) {
84                 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
85                 exit(1);
86         }
87         
88 }
89
90 //***************************************************************************************************************
91
92 void ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
93
94         ifstream inputNames;
95         openInputFile(globaldata->getNameFile(), inputNames);
96         set<string> badSeqGroups;
97         string seqName, seqList, group;
98         set<string>::iterator it;
99
100         string goodNameFile = getRootName(globaldata->getNameFile()) + "good" + getExtension(globaldata->getNameFile());
101         string badNameFile = getRootName(globaldata->getNameFile()) + "bad" + getExtension(globaldata->getNameFile());
102         
103         ofstream goodNameOut;   openOutputFile(goodNameFile, goodNameOut);
104         ofstream badNameOut;    openOutputFile(badNameFile, badNameOut);                
105         
106         while(!inputNames.eof()){
107                 inputNames >> seqName >> seqList;
108                 it = badSeqNames.find(seqName);
109                 
110                 if(it != badSeqNames.end()){
111                         badSeqNames.erase(it);
112                         badNameOut << seqName << '\t' << seqList << endl;
113                         if(globaldata->getNameFile() != ""){
114                                 int start = 0;
115                                 for(int i=0;i<seqList.length();i++){
116                                         if(seqList[i] == ','){
117                                                 badSeqGroups.insert(seqList.substr(start,i-start));
118                                                 start = i+1;
119                                         }                                       
120                                 }
121                                 badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
122                         }
123                 }
124                 else{
125                         goodNameOut << seqName << '\t' << seqList << endl;
126                 }
127                 gobble(inputNames);
128         }
129         inputNames.close();
130         goodNameOut.close();
131         badNameOut.close();
132         
133         if(globaldata->getGroupFile() != ""){
134                 
135                 ifstream inputGroups;
136                 openInputFile(globaldata->getGroupFile(), inputGroups);
137
138                 string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
139                 string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
140                 
141                 ofstream goodGroupOut;  openOutputFile(goodGroupFile, goodGroupOut);
142                 ofstream badGroupOut;   openOutputFile(badGroupFile, badGroupOut);              
143                 
144                 while(!inputGroups.eof()){
145                         inputGroups >> seqName >> group;
146
147                         it = badSeqGroups.find(seqName);
148                         
149                         if(it != badSeqGroups.end()){
150                                 badSeqGroups.erase(it);
151                                 badGroupOut << seqName << '\t' << group << endl;
152                         }
153                         else{
154                                 goodGroupOut << seqName << '\t' << group << endl;
155                         }
156                         gobble(inputGroups);
157                 }
158                 inputGroups.close();
159                 goodGroupOut.close();
160                 badGroupOut.close();
161         }
162 }
163
164 //***************************************************************************************************************
165
166 void ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
167
168         ifstream inputGroups;
169         openInputFile(globaldata->getGroupFile(), inputGroups);
170         string seqName, group;
171         set<string>::iterator it;
172         
173         string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
174         string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
175         
176         ofstream goodGroupOut;  openOutputFile(goodGroupFile, goodGroupOut);
177         ofstream badGroupOut;   openOutputFile(badGroupFile, badGroupOut);              
178         
179         while(!inputGroups.eof()){
180                 inputGroups >> seqName >> group;
181                 it = badSeqNames.find(seqName);
182                 
183                 if(it != badSeqNames.end()){
184                         badSeqNames.erase(it);
185                         badGroupOut << seqName << '\t' << group << endl;
186                 }
187                 else{
188                         goodGroupOut << seqName << '\t' << group << endl;
189                 }
190                 gobble(inputGroups);
191         }
192         inputGroups.close();
193         goodGroupOut.close();
194         badGroupOut.close();
195         
196 }
197
198 //***************************************************************************************************************
199
200