2 * screenseqscommand.cpp
5 * Created by Pat Schloss on 6/3/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "screenseqscommand.h"
12 //***************************************************************************************************************
14 ScreenSeqsCommand::ScreenSeqsCommand(){
16 globaldata = GlobalData::getInstance();
18 if(globaldata->getFastaFile() != "") { readSeqs = new ReadFasta(globaldata->inputFileName); }
19 else if(globaldata->getNexusFile() != "") { readSeqs = new ReadNexus(globaldata->inputFileName); }
20 else if(globaldata->getClustalFile() != "") { readSeqs = new ReadClustal(globaldata->inputFileName); }
21 else if(globaldata->getPhylipFile() != "") { readSeqs = new ReadPhylip(globaldata->inputFileName); }
24 db = readSeqs->getDB();
28 cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
32 cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
37 //***************************************************************************************************************
39 ScreenSeqsCommand::~ScreenSeqsCommand(){
43 //***************************************************************************************************************
45 int ScreenSeqsCommand::execute(){
47 int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
48 convert(globaldata->getStartPos(), startPos);
49 convert(globaldata->getEndPos(), endPos);
50 convert(globaldata->getMaxAmbig(), maxAmbig);
51 convert(globaldata->getMaxHomoPolymer(), maxHomoP);
52 convert(globaldata->getMinLength(), minLength);
53 convert(globaldata->getMaxLength(), maxLength);
55 set<string> badSeqNames;
57 string goodSeqFile = getRootName(globaldata->inputFileName) + "good" + getExtension(globaldata->inputFileName);
58 string badSeqFile = getRootName(globaldata->inputFileName) + "bad" + getExtension(globaldata->inputFileName);
60 ofstream goodSeqOut; openOutputFile(goodSeqFile, goodSeqOut);
61 ofstream badSeqOut; openOutputFile(badSeqFile, badSeqOut);
63 for(int i=0;i<numSeqs;i++){
64 Sequence currSeq = db->get(i);
65 bool goodSeq = 1; // innocent until proven guilty
66 if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; }
67 if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; }
68 if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; }
69 if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()){ goodSeq = 0; }
70 if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; }
71 if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; }
74 currSeq.printSequence(goodSeqOut);
77 currSeq.printSequence(badSeqOut);
78 badSeqNames.insert(currSeq.getName());
82 if(globaldata->getNameFile() != ""){
83 screenNameGroupFile(badSeqNames);
90 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
94 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
100 //***************************************************************************************************************
102 void ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
105 openInputFile(globaldata->getNameFile(), inputNames);
106 set<string> badSeqGroups;
107 string seqName, seqList, group;
108 set<string>::iterator it;
110 string goodNameFile = getRootName(globaldata->getNameFile()) + "good" + getExtension(globaldata->getNameFile());
111 string badNameFile = getRootName(globaldata->getNameFile()) + "bad" + getExtension(globaldata->getNameFile());
113 ofstream goodNameOut; openOutputFile(goodNameFile, goodNameOut);
114 ofstream badNameOut; openOutputFile(badNameFile, badNameOut);
116 while(!inputNames.eof()){
117 inputNames >> seqName >> seqList;
118 it = badSeqNames.find(seqName);
120 if(it != badSeqNames.end()){
121 badSeqNames.erase(it);
122 badNameOut << seqName << '\t' << seqList << endl;
123 if(globaldata->getNameFile() != ""){
125 for(int i=0;i<seqList.length();i++){
126 if(seqList[i] == ','){
127 badSeqGroups.insert(seqList.substr(start,i-start));
131 badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
135 goodNameOut << seqName << '\t' << seqList << endl;
143 if(globaldata->getGroupFile() != ""){
145 ifstream inputGroups;
146 openInputFile(globaldata->getGroupFile(), inputGroups);
148 string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
149 string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
151 ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut);
152 ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut);
154 while(!inputGroups.eof()){
155 inputGroups >> seqName >> group;
157 it = badSeqGroups.find(seqName);
159 if(it != badSeqGroups.end()){
160 badSeqGroups.erase(it);
161 badGroupOut << seqName << '\t' << group << endl;
164 goodGroupOut << seqName << '\t' << group << endl;
169 goodGroupOut.close();
174 //***************************************************************************************************************