]> git.donarmstrong.com Git - mothur.git/blob - screenseqscommand.cpp
pat's edits of screen.seqs and summary.seqs
[mothur.git] / screenseqscommand.cpp
1 /*
2  *  screenseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/3/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "screenseqscommand.h"
11
12 //***************************************************************************************************************
13
14 ScreenSeqsCommand::ScreenSeqsCommand(){
15         try {
16                 globaldata = GlobalData::getInstance();
17                 if(globaldata->getFastaFile() == "")            {       cout << "you must provide a fasta formatted file" << endl;      }
18         }
19         catch(exception& e) {
20                 cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
21                 exit(1);
22         }
23         catch(...) {
24                 cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
25                 exit(1);
26         }       
27 }
28
29 //***************************************************************************************************************
30
31 ScreenSeqsCommand::~ScreenSeqsCommand(){        /*      do nothing      */      }
32
33 //***************************************************************************************************************
34
35 int ScreenSeqsCommand::execute(){
36         try{
37                 int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
38                 convert(globaldata->getStartPos(), startPos);
39                 convert(globaldata->getEndPos(), endPos);
40                 convert(globaldata->getMaxAmbig(), maxAmbig);
41                 convert(globaldata->getMaxHomoPolymer(), maxHomoP);
42                 convert(globaldata->getMinLength(), minLength);
43                 convert(globaldata->getMaxLength(), maxLength);
44                 
45                 ifstream inFASTA;
46                 openInputFile(globaldata->getFastaFile(), inFASTA);
47                 
48                 set<string> badSeqNames;
49                 
50                 string goodSeqFile = getRootName(globaldata->inputFileName) + "good" + getExtension(globaldata->inputFileName);
51                 string badSeqFile = getRootName(globaldata->inputFileName) + "bad" + getExtension(globaldata->inputFileName);
52                 
53                 ofstream goodSeqOut;    openOutputFile(goodSeqFile, goodSeqOut);
54                 ofstream badSeqOut;             openOutputFile(badSeqFile, badSeqOut);          
55                 
56                 while(!inFASTA.eof()){
57                         Sequence currSeq(inFASTA);
58                         bool goodSeq = 1;               //      innocent until proven guilty
59                         if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
60                         if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
61                         if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
62                         if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
63                         if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
64                         if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
65                         
66                         if(goodSeq == 1){
67                                 currSeq.printSequence(goodSeqOut);      
68                         }
69                         else{
70                                 currSeq.printSequence(badSeqOut);       
71                                 badSeqNames.insert(currSeq.getName());
72                         }
73                         gobble(inFASTA);
74                 }       
75                 
76                 return 0;
77         }
78         catch(exception& e) {
79                 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
80                 exit(1);
81         }
82         catch(...) {
83                 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
84                 exit(1);
85         }
86         
87 }
88
89 //***************************************************************************************************************
90
91 void ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
92
93         ifstream inputNames;
94         openInputFile(globaldata->getNameFile(), inputNames);
95         set<string> badSeqGroups;
96         string seqName, seqList, group;
97         set<string>::iterator it;
98
99         string goodNameFile = getRootName(globaldata->getNameFile()) + "good" + getExtension(globaldata->getNameFile());
100         string badNameFile = getRootName(globaldata->getNameFile()) + "bad" + getExtension(globaldata->getNameFile());
101         
102         ofstream goodNameOut;   openOutputFile(goodNameFile, goodNameOut);
103         ofstream badNameOut;    openOutputFile(badNameFile, badNameOut);                
104         
105         while(!inputNames.eof()){
106                 inputNames >> seqName >> seqList;
107                 it = badSeqNames.find(seqName);
108                 
109                 if(it != badSeqNames.end()){
110                         badSeqNames.erase(it);
111                         badNameOut << seqName << '\t' << seqList << endl;
112                         if(globaldata->getNameFile() != ""){
113                                 int start = 0;
114                                 for(int i=0;i<seqList.length();i++){
115                                         if(seqList[i] == ','){
116                                                 badSeqGroups.insert(seqList.substr(start,i-start));
117                                                 start = i+1;
118                                         }                                       
119                                 }
120                                 badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
121                         }
122                 }
123                 else{
124                         goodNameOut << seqName << '\t' << seqList << endl;
125                 }
126                 gobble(inputNames);
127         }
128         inputNames.close();
129         goodNameOut.close();
130         badNameOut.close();
131         
132         if(globaldata->getGroupFile() != ""){
133                 
134                 ifstream inputGroups;
135                 openInputFile(globaldata->getGroupFile(), inputGroups);
136
137                 string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
138                 string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
139                 
140                 ofstream goodGroupOut;  openOutputFile(goodGroupFile, goodGroupOut);
141                 ofstream badGroupOut;   openOutputFile(badGroupFile, badGroupOut);              
142                 
143                 while(!inputGroups.eof()){
144                         inputGroups >> seqName >> group;
145
146                         it = badSeqGroups.find(seqName);
147                         
148                         if(it != badSeqGroups.end()){
149                                 badSeqGroups.erase(it);
150                                 badGroupOut << seqName << '\t' << group << endl;
151                         }
152                         else{
153                                 goodGroupOut << seqName << '\t' << group << endl;
154                         }
155                         gobble(inputGroups);
156                 }
157                 inputGroups.close();
158                 goodGroupOut.close();
159                 badGroupOut.close();
160         }
161 }
162
163 //***************************************************************************************************************
164
165 void ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
166
167         ifstream inputGroups;
168         openInputFile(globaldata->getGroupFile(), inputGroups);
169         string seqName, group;
170         set<string>::iterator it;
171         
172         string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
173         string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
174         
175         ofstream goodGroupOut;  openOutputFile(goodGroupFile, goodGroupOut);
176         ofstream badGroupOut;   openOutputFile(badGroupFile, badGroupOut);              
177         
178         while(!inputGroups.eof()){
179                 inputGroups >> seqName >> group;
180                 it = badSeqNames.find(seqName);
181                 
182                 if(it != badSeqNames.end()){
183                         badSeqNames.erase(it);
184                         badGroupOut << seqName << '\t' << group << endl;
185                 }
186                 else{
187                         goodGroupOut << seqName << '\t' << group << endl;
188                 }
189                 gobble(inputGroups);
190         }
191         inputGroups.close();
192         goodGroupOut.close();
193         badGroupOut.close();
194         
195 }
196
197 //***************************************************************************************************************
198
199