]> git.donarmstrong.com Git - mothur.git/blob - screenseqscommand.cpp
added screen.seqs command - pds
[mothur.git] / screenseqscommand.cpp
1 /*
2  *  screenseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/3/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "screenseqscommand.h"
11
12 //***************************************************************************************************************
13
14 ScreenSeqsCommand::ScreenSeqsCommand(){
15         try {
16                 globaldata = GlobalData::getInstance();
17                 
18                 if(globaldata->getFastaFile() != "")            {       readSeqs = new ReadFasta(globaldata->inputFileName);    }
19                 else if(globaldata->getNexusFile() != "")       {       readSeqs = new ReadNexus(globaldata->inputFileName);    }
20                 else if(globaldata->getClustalFile() != "") {   readSeqs = new ReadClustal(globaldata->inputFileName);  }
21                 else if(globaldata->getPhylipFile() != "")      {       readSeqs = new ReadPhylip(globaldata->inputFileName);   }
22                 
23                 readSeqs->read();
24                 db = readSeqs->getDB();
25                 numSeqs = db->size();
26         }
27         catch(exception& e) {
28                 cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
29                 exit(1);
30         }
31         catch(...) {
32                 cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
33                 exit(1);
34         }       
35 }
36
37 //***************************************************************************************************************
38
39 ScreenSeqsCommand::~ScreenSeqsCommand(){
40         delete readSeqs;
41 }
42
43 //***************************************************************************************************************
44
45 int ScreenSeqsCommand::execute(){
46         try{
47                 int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
48                 convert(globaldata->getStartPos(), startPos);
49                 convert(globaldata->getEndPos(), endPos);
50                 convert(globaldata->getMaxAmbig(), maxAmbig);
51                 convert(globaldata->getMaxHomoPolymer(), maxHomoP);
52                 convert(globaldata->getMinLength(), minLength);
53                 convert(globaldata->getMaxLength(), maxLength);
54                 
55                 set<string> badSeqNames;
56                 
57                 string goodSeqFile = getRootName(globaldata->inputFileName) + "good" + getExtension(globaldata->inputFileName);
58                 string badSeqFile = getRootName(globaldata->inputFileName) + "bad" + getExtension(globaldata->inputFileName);
59                 
60                 ofstream goodSeqOut;    openOutputFile(goodSeqFile, goodSeqOut);
61                 ofstream badSeqOut;             openOutputFile(badSeqFile, badSeqOut);          
62
63                 for(int i=0;i<numSeqs;i++){
64                         Sequence currSeq = db->get(i);
65                         bool goodSeq = 1;               //      innocent until proven guilty
66                         if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())          {       goodSeq = 0;    }
67                         if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                        {       goodSeq = 0;    }
68                         if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())        {       goodSeq = 0;    }
69                         if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()){  goodSeq = 0;    }
70                         if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())        {       goodSeq = 0;    }
71                         if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())        {       goodSeq = 0;    }
72                         
73                         if(goodSeq == 1){
74                                 currSeq.printSequence(goodSeqOut);      
75                         }
76                         else{
77                                 currSeq.printSequence(badSeqOut);       
78                                 badSeqNames.insert(currSeq.getName());
79                         }
80                 }
81                 
82                 if(globaldata->getNameFile() != ""){
83                         screenNameGroupFile(badSeqNames);
84                         
85                 }
86                 
87                 return 0;
88         }
89         catch(exception& e) {
90                 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
91                 exit(1);
92         }
93         catch(...) {
94                 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
95                 exit(1);
96         }
97         
98 }
99
100 //***************************************************************************************************************
101
102 void ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
103
104         ifstream inputNames;
105         openInputFile(globaldata->getNameFile(), inputNames);
106         set<string> badSeqGroups;
107         string seqName, seqList, group;
108         set<string>::iterator it;
109
110         string goodNameFile = getRootName(globaldata->getNameFile()) + "good" + getExtension(globaldata->getNameFile());
111         string badNameFile = getRootName(globaldata->getNameFile()) + "bad" + getExtension(globaldata->getNameFile());
112         
113         ofstream goodNameOut;   openOutputFile(goodNameFile, goodNameOut);
114         ofstream badNameOut;    openOutputFile(badNameFile, badNameOut);                
115         
116         while(!inputNames.eof()){
117                 inputNames >> seqName >> seqList;
118                 it = badSeqNames.find(seqName);
119                 
120                 if(it != badSeqNames.end()){
121                         badSeqNames.erase(it);
122                         badNameOut << seqName << '\t' << seqList << endl;
123                         if(globaldata->getNameFile() != ""){
124                                 int start = 0;
125                                 for(int i=0;i<seqList.length();i++){
126                                         if(seqList[i] == ','){
127                                                 badSeqGroups.insert(seqList.substr(start,i-start));
128                                                 start = i+1;
129                                         }                                       
130                                 }
131                                 badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
132                         }
133                 }
134                 else{
135                         goodNameOut << seqName << '\t' << seqList << endl;
136                 }
137                 gobble(inputNames);
138         }
139         inputNames.close();
140         goodNameOut.close();
141         badNameOut.close();
142         
143         if(globaldata->getGroupFile() != ""){
144                 
145                 ifstream inputGroups;
146                 openInputFile(globaldata->getGroupFile(), inputGroups);
147
148                 string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
149                 string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
150                 
151                 ofstream goodGroupOut;  openOutputFile(goodGroupFile, goodGroupOut);
152                 ofstream badGroupOut;   openOutputFile(badGroupFile, badGroupOut);              
153                 
154                 while(!inputGroups.eof()){
155                         inputGroups >> seqName >> group;
156
157                         it = badSeqGroups.find(seqName);
158                         
159                         if(it != badSeqGroups.end()){
160                                 badSeqGroups.erase(it);
161                                 badGroupOut << seqName << '\t' << group << endl;
162                         }
163                         else{
164                                 goodGroupOut << seqName << '\t' << group << endl;
165                         }
166                         gobble(inputGroups);
167                 }
168                 inputGroups.close();
169                 goodGroupOut.close();
170                 badGroupOut.close();
171         }
172 }
173
174 //***************************************************************************************************************
175
176