5 // Created by Sarah Westcott on 2/3/12.
6 // Copyright (c) 2012 Schloss Lab. All rights reserved.
9 #include "sortseqscommand.h"
10 #include "sequence.hpp"
11 #include "qualityscores.h"
13 //**********************************************************************************************************************
14 vector<string> SortSeqsCommand::setParameters(){
16 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
17 CommandParameter pflow("flow", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pflow);
18 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
19 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
20 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
21 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
22 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pqfile);
23 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
24 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "SortSeqsCommand", "setParameters");
37 //**********************************************************************************************************************
38 string SortSeqsCommand::getHelpString(){
40 string helpString = "";
41 helpString += "The sort.seqs command puts the sequences in the same order for the following file types: accnos fasta, name, group, count, taxonomy, flow or quality file.\n";
42 helpString += "The sort.seqs command parameters are accnos, fasta, name, group, count, taxonomy, flow, qfile and large.\n";
43 helpString += "The accnos file allows you to specify the order you want the files in. If none is provided, mothur will use the order of the first file it reads.\n";
44 helpString += "The large parameters is used to indicate your files are too large to fit in RAM.\n";
45 helpString += "The sort.seqs command should be in the following format: sort.seqs(fasta=yourFasta).\n";
46 helpString += "Example sort.seqs(fasta=amazon.fasta).\n";
47 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
51 m->errorOut(e, "SortSeqsCommand", "getHelpString");
56 //**********************************************************************************************************************
57 string SortSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
59 string outputFileName = "";
60 map<string, vector<string> >::iterator it;
62 //is this a type this command creates
63 it = outputTypes.find(type);
64 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
66 if (type == "fasta") { outputFileName = "sorted" + m->getExtension(inputName); }
67 else if (type == "taxonomy") { outputFileName = "sorted" + m->getExtension(inputName); }
68 else if (type == "name") { outputFileName = "sorted" + m->getExtension(inputName); }
69 else if (type == "count") { outputFileName = "sorted" + m->getExtension(inputName); }
70 else if (type == "group") { outputFileName = "sorted" + m->getExtension(inputName); }
71 else if (type == "flow") { outputFileName = "sorted" + m->getExtension(inputName); }
72 else if (type == "qfile") { outputFileName = "sorted" + m->getExtension(inputName); }
73 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
75 return outputFileName;
78 m->errorOut(e, "SortSeqsCommand", "getOutputFileNameTag");
83 //**********************************************************************************************************************
84 SortSeqsCommand::SortSeqsCommand(){
86 abort = true; calledHelp = true;
88 vector<string> tempOutNames;
89 outputTypes["fasta"] = tempOutNames;
90 outputTypes["taxonomy"] = tempOutNames;
91 outputTypes["name"] = tempOutNames;
92 outputTypes["count"] = tempOutNames;
93 outputTypes["group"] = tempOutNames;
94 outputTypes["qfile"] = tempOutNames;
95 outputTypes["flow"] = tempOutNames;
98 m->errorOut(e, "SortSeqsCommand", "SortSeqsCommand");
102 //**********************************************************************************************************************
103 SortSeqsCommand::SortSeqsCommand(string option) {
105 abort = false; calledHelp = false;
107 //allow user to run help
108 if(option == "help") { help(); abort = true; calledHelp = true; }
109 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
112 vector<string> myArray = setParameters();
114 OptionParser parser(option);
115 map<string,string> parameters = parser.getParameters();
117 ValidParameters validParameter;
118 map<string,string>::iterator it;
120 //check to make sure all parameters are valid for command
121 for (it = parameters.begin(); it != parameters.end(); it++) {
122 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
125 //initialize outputTypes
126 vector<string> tempOutNames;
127 outputTypes["fasta"] = tempOutNames;
128 outputTypes["taxonomy"] = tempOutNames;
129 outputTypes["name"] = tempOutNames;
130 outputTypes["group"] = tempOutNames;
131 outputTypes["qfile"] = tempOutNames;
132 outputTypes["flow"] = tempOutNames;
133 outputTypes["count"] = tempOutNames;
135 //if the user changes the output directory command factory will send this info to us in the output parameter
136 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
138 //if the user changes the input directory command factory will send this info to us in the output parameter
139 string inputDir = validParameter.validFile(parameters, "inputdir", false);
140 if (inputDir == "not found"){ inputDir = ""; }
143 it = parameters.find("fasta");
144 //user has given a template file
145 if(it != parameters.end()){
146 path = m->hasPath(it->second);
147 //if the user has not given a path then, add inputdir. else leave path alone.
148 if (path == "") { parameters["fasta"] = inputDir + it->second; }
151 it = parameters.find("name");
152 //user has given a template file
153 if(it != parameters.end()){
154 path = m->hasPath(it->second);
155 //if the user has not given a path then, add inputdir. else leave path alone.
156 if (path == "") { parameters["name"] = inputDir + it->second; }
159 it = parameters.find("group");
160 //user has given a template file
161 if(it != parameters.end()){
162 path = m->hasPath(it->second);
163 //if the user has not given a path then, add inputdir. else leave path alone.
164 if (path == "") { parameters["group"] = inputDir + it->second; }
167 it = parameters.find("taxonomy");
168 //user has given a template file
169 if(it != parameters.end()){
170 path = m->hasPath(it->second);
171 //if the user has not given a path then, add inputdir. else leave path alone.
172 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
175 it = parameters.find("qfile");
176 //user has given a template file
177 if(it != parameters.end()){
178 path = m->hasPath(it->second);
179 //if the user has not given a path then, add inputdir. else leave path alone.
180 if (path == "") { parameters["qfile"] = inputDir + it->second; }
183 it = parameters.find("accnos");
184 //user has given a template file
185 if(it != parameters.end()){
186 path = m->hasPath(it->second);
187 //if the user has not given a path then, add inputdir. else leave path alone.
188 if (path == "") { parameters["accnos"] = inputDir + it->second; }
191 it = parameters.find("flow");
192 //user has given a template file
193 if(it != parameters.end()){
194 path = m->hasPath(it->second);
195 //if the user has not given a path then, add inputdir. else leave path alone.
196 if (path == "") { parameters["flow"] = inputDir + it->second; }
199 it = parameters.find("count");
200 //user has given a template file
201 if(it != parameters.end()){
202 path = m->hasPath(it->second);
203 //if the user has not given a path then, add inputdir. else leave path alone.
204 if (path == "") { parameters["count"] = inputDir + it->second; }
209 //check for parameters
210 accnosfile = validParameter.validFile(parameters, "accnos", true);
211 if (accnosfile == "not open") { accnosfile = ""; abort = true; }
212 else if (accnosfile == "not found") { accnosfile = ""; }
213 else { m->setAccnosFile(accnosfile); }
215 fastafile = validParameter.validFile(parameters, "fasta", true);
216 if (fastafile == "not open") { fastafile = ""; abort = true; }
217 else if (fastafile == "not found") { fastafile = ""; }
218 else { m->setFastaFile(fastafile); }
220 flowfile = validParameter.validFile(parameters, "flow", true);
221 if (flowfile == "not open") { flowfile = ""; abort = true; }
222 else if (flowfile == "not found") { flowfile = ""; }
223 else { m->setFlowFile(flowfile); }
225 namefile = validParameter.validFile(parameters, "name", true);
226 if (namefile == "not open") { namefile = ""; abort = true; }
227 else if (namefile == "not found") { namefile = ""; }
228 else { m->setNameFile(namefile); }
230 groupfile = validParameter.validFile(parameters, "group", true);
231 if (groupfile == "not open") { abort = true; }
232 else if (groupfile == "not found") { groupfile = ""; }
233 else { m->setGroupFile(groupfile); }
235 taxfile = validParameter.validFile(parameters, "taxonomy", true);
236 if (taxfile == "not open") { abort = true; }
237 else if (taxfile == "not found") { taxfile = ""; }
238 else { m->setTaxonomyFile(taxfile); }
240 qualfile = validParameter.validFile(parameters, "qfile", true);
241 if (qualfile == "not open") { abort = true; }
242 else if (qualfile == "not found") { qualfile = ""; }
243 else { m->setQualFile(qualfile); }
245 countfile = validParameter.validFile(parameters, "count", true);
246 if (countfile == "not open") { countfile = ""; abort = true; }
247 else if (countfile == "not found") { countfile = ""; }
248 else { m->setCountTableFile(countfile); }
250 if ((namefile != "") && (countfile != "")) {
251 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
254 if ((groupfile != "") && (countfile != "")) {
255 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
258 string temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "f"; }
259 large = m->isTrue(temp);
261 if ((fastafile == "") && (namefile == "") && (countfile == "") && (groupfile == "") && (taxfile == "") && (flowfile == "") && (qualfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, group, count, taxonomy, flow or quality."); m->mothurOutEndLine(); abort = true; }
263 if (countfile == "") {
264 if ((fastafile != "") && (namefile == "")) {
265 vector<string> files; files.push_back(fastafile);
266 parser.getNameFile(files);
272 catch(exception& e) {
273 m->errorOut(e, "SortSeqsCommand", "SortSeqsCommand");
277 //**********************************************************************************************************************
279 int SortSeqsCommand::execute(){
282 if (abort == true) { if (calledHelp) { return 0; } return 2; }
284 //read through the correct file and output lines you want to keep
285 if (accnosfile != "") {
287 m->readAccnos(accnosfile, temp);
288 for (int i = 0; i < temp.size(); i++) { names[temp[i]] = i; }
289 m->mothurOut("\nUsing " + accnosfile + " to determine the order. It contains " + toString(temp.size()) + " representative sequences.\n");
292 if (fastafile != "") { readFasta(); }
293 if (flowfile != "") { readFlow(); }
294 if (qualfile != "") { readQual(); }
295 if (namefile != "") { readName(); }
296 if (groupfile != "") { readGroup(); }
297 if (countfile != "") { readCount(); }
298 if (taxfile != "") { readTax(); }
300 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
302 if (outputNames.size() != 0) {
303 m->mothurOutEndLine();
304 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
305 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
306 m->mothurOutEndLine();
308 //set fasta file as new current fastafile
310 itTypes = outputTypes.find("fasta");
311 if (itTypes != outputTypes.end()) {
312 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
315 itTypes = outputTypes.find("name");
316 if (itTypes != outputTypes.end()) {
317 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
320 itTypes = outputTypes.find("group");
321 if (itTypes != outputTypes.end()) {
322 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
326 itTypes = outputTypes.find("taxonomy");
327 if (itTypes != outputTypes.end()) {
328 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
331 itTypes = outputTypes.find("qfile");
332 if (itTypes != outputTypes.end()) {
333 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
336 itTypes = outputTypes.find("flow");
337 if (itTypes != outputTypes.end()) {
338 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFlowFile(current); }
341 itTypes = outputTypes.find("count");
342 if (itTypes != outputTypes.end()) {
343 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
350 catch(exception& e) {
351 m->errorOut(e, "SortSeqsCommand", "execute");
356 //**********************************************************************************************************************
357 int SortSeqsCommand::readFasta(){
359 string thisOutputDir = outputDir;
360 if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
361 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
362 outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName);
365 m->openOutputFile(outputFileName, out);
368 m->openInputFile(fastafile, in);
371 if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
373 if (large) { //if the file is too large to fit in memory we can still process it, but the io will be very time consuming.
374 //read through the file looking for 1000 seqs at a time. Once we find them output them and start looking for the next 1000.
375 //this way we only store 1000 seqs in memory at a time.
377 int numNames = names.size();
378 int numNamesInFile = 0;
380 //to make sure we dont miss any seqs, add any seqs that are not in names but in the file to the end of names
382 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
384 Sequence currSeq(in);
385 name = currSeq.getName();
389 map<string, int>::iterator it = names.find(name);
390 if (it == names.end()) {
391 names[name] = numNames; numNames++;
392 m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
400 int numLeft = names.size();
401 if (numNamesInFile < numLeft) { numLeft = numNamesInFile; }
403 int size = 1000; //assume that user can hold 1000 seqs in memory
404 if (numLeft < size) { size = numLeft; }
407 vector<Sequence> seqs; seqs.resize(size);
408 for (int i = 0; i < seqs.size(); i++) { seqs[i].setName(""); } //this is so if some of the seqs are missing we dont print out garbage
410 while (numLeft > 0) {
413 m->openInputFile(fastafile, in2);
415 if (m->control_pressed) { in2.close(); m->mothurRemove(outputFileName); return 0; }
418 int needToFind = size;
419 if (numLeft < size) { needToFind = numLeft; }
422 if (m->control_pressed) { in2.close(); m->mothurRemove(outputFileName); return 0; }
424 //stop reading if we already found the seqs we are looking for
425 if (found >= needToFind) { break; }
427 Sequence currSeq(in2);
428 name = currSeq.getName();
431 map<string, int>::iterator it = names.find(name);
432 if (it != names.end()) { //we found it, so put it in the vector in the right place.
433 //is it in the set of seqs we are looking for this time around
434 int thisSeqsPlace = it->second;
435 thisSeqsPlace -= (times * size);
436 if ((thisSeqsPlace < size) && (thisSeqsPlace >= 0)) {
437 seqs[thisSeqsPlace] = currSeq;
440 }else { m->mothurOut("[ERROR]: in logic of readFasta function.\n"); m->control_pressed = true; }
447 m->openOutputFileAppend(outputFileName, out2);
449 int output = seqs.size();
450 if (numLeft < seqs.size()) { output = numLeft; }
452 for (int i = 0; i < output; i++) {
453 if (seqs[i].getName() != "") { seqs[i].printSequence(out2); }
461 m->mothurOut("Ordered " + toString(numNamesInFile) + " sequences from " + fastafile + ".\n");
464 vector<Sequence> seqs; seqs.resize(names.size());
465 for (int i = 0; i < seqs.size(); i++) { seqs[i].setName(""); } //this is so if some of the seqs are missing we dont print out garbage
468 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
470 Sequence currSeq(in);
471 name = currSeq.getName();
474 map<string, int>::iterator it = names.find(name);
475 if (it != names.end()) { //we found it, so put it in the vector in the right place.
476 seqs[it->second] = currSeq;
477 }else { //if we cant find it then add it to the end
478 names[name] = seqs.size();
479 seqs.push_back(currSeq);
480 m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
488 for (int i = 0; i < seqs.size(); i++) {
489 if (seqs[i].getName() != "") {
490 seqs[i].printSequence(out); count++;
495 m->mothurOut("Ordered " + toString(count) + " sequences from " + fastafile + ".\n");
498 }else { //read in file to fill names
502 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
504 Sequence currSeq(in);
505 name = currSeq.getName();
508 //if this name is in the accnos file
511 currSeq.printSequence(out);
518 m->mothurOut("\nUsing " + fastafile + " to determine the order. It contains " + toString(count) + " sequences.\n");
524 catch(exception& e) {
525 m->errorOut(e, "SortSeqsCommand", "readFasta");
529 //**********************************************************************************************************************
530 int SortSeqsCommand::readFlow(){
532 string thisOutputDir = outputDir;
533 if (outputDir == "") { thisOutputDir += m->hasPath(flowfile); }
534 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowfile)) + getOutputFileNameTag("flow", flowfile);
535 outputTypes["flow"].push_back(outputFileName); outputNames.push_back(outputFileName);
538 m->openOutputFile(outputFileName, out);
541 m->openInputFile(flowfile, in);
545 in >> numFlows; m->gobble(in);
547 if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
549 if (large) { //if the file is too large to fit in memory we can still process it, but the io will be very time consuming.
550 //read through the file looking for 1000 seqs at a time. Once we find them output them and start looking for the next 1000.
551 //this way we only store 1000 seqs in memory at a time.
553 int numNames = names.size();
554 int numNamesInFile = 0;
556 //to make sure we dont miss any seqs, add any seqs that are not in names but in the file to the end of names
558 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
561 string rest = m->getline(in);
565 map<string, int>::iterator it = names.find(name);
566 if (it == names.end()) {
567 names[name] = numNames; numNames++;
568 m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
576 int numLeft = names.size();
577 if (numNamesInFile < numLeft) { numLeft = numNamesInFile; }
579 int size = 1000; //assume that user can hold 1000 seqs in memory
580 if (numLeft < size) { size = numLeft; }
583 vector<string> seqs; seqs.resize(size, "");
585 while (numLeft > 0) {
588 m->openInputFile(flowfile, in2); in2 >> numFlows; m->gobble(in2);
590 if (m->control_pressed) { in2.close(); m->mothurRemove(outputFileName); return 0; }
593 int needToFind = size;
594 if (numLeft < size) { needToFind = numLeft; }
597 if (m->control_pressed) { in2.close(); m->mothurRemove(outputFileName); return 0; }
599 //stop reading if we already found the seqs we are looking for
600 if (found >= needToFind) { break; }
603 string rest = m->getline(in2);
606 map<string, int>::iterator it = names.find(name);
607 if (it != names.end()) { //we found it, so put it in the vector in the right place.
608 //is it in the set of seqs we are looking for this time around
609 int thisSeqsPlace = it->second;
610 thisSeqsPlace -= (times * size);
611 if ((thisSeqsPlace < size) && (thisSeqsPlace >= 0)) {
612 seqs[thisSeqsPlace] = (name +'\t' + rest);
615 }else { m->mothurOut("[ERROR]: in logic of readFlow function.\n"); m->control_pressed = true; }
622 m->openOutputFileAppend(outputFileName, out2);
624 int output = seqs.size();
625 if (numLeft < seqs.size()) { output = numLeft; }
627 for (int i = 0; i < output; i++) {
629 out2 << seqs[i] << endl;
638 m->mothurOut("Ordered " + toString(numNamesInFile) + " flows from " + flowfile + ".\n");
641 vector<string> seqs; seqs.resize(names.size(), "");
644 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
647 string rest = m->getline(in);
650 map<string, int>::iterator it = names.find(name);
651 if (it != names.end()) { //we found it, so put it in the vector in the right place.
652 seqs[it->second] = (name + '\t' + rest);
653 }else { //if we cant find it then add it to the end
654 names[name] = seqs.size();
655 seqs.push_back((name + '\t' + rest));
656 m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
664 for (int i = 0; i < seqs.size(); i++) {
666 out << seqs[i] << endl;
672 m->mothurOut("Ordered " + toString(count) + " flows from " + flowfile + ".\n");
675 }else { //read in file to fill names
679 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
682 string rest = m->getline(in);
685 //if this name is in the accnos file
688 out << name << '\t' << rest << endl;
695 m->mothurOut("\nUsing " + flowfile + " to determine the order. It contains " + toString(count) + " flows.\n");
701 catch(exception& e) {
702 m->errorOut(e, "SortSeqsCommand", "readFlow");
707 //**********************************************************************************************************************
708 int SortSeqsCommand::readQual(){
710 string thisOutputDir = outputDir;
711 if (outputDir == "") { thisOutputDir += m->hasPath(qualfile); }
712 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + getOutputFileNameTag("qfile", qualfile);
713 outputTypes["qfile"].push_back(outputFileName); outputNames.push_back(outputFileName);
716 m->openOutputFile(outputFileName, out);
719 m->openInputFile(qualfile, in);
722 if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
724 if (large) { //if the file is too large to fit in memory we can still process it, but the io will be very time consuming.
725 //read through the file looking for 1000 seqs at a time. Once we find them output them and start looking for the next 1000.
726 //this way we only store 1000 seqs in memory at a time.
728 int numNames = names.size();
729 int numNamesInFile = 0;
731 //to make sure we dont miss any seqs, add any seqs that are not in names but in the file to the end of names
733 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
735 QualityScores currQual;
736 currQual = QualityScores(in);
737 name = currQual.getName();
741 map<string, int>::iterator it = names.find(name);
742 if (it == names.end()) {
743 names[name] = numNames; numNames++;
744 m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
752 int numLeft = names.size();
753 if (numNamesInFile < numLeft) { numLeft = numNamesInFile; }
755 int size = 1000; //assume that user can hold 1000 seqs in memory
756 if (numLeft < size) { size = numLeft; }
760 vector<QualityScores> seqs; seqs.resize(size);
761 for (int i = 0; i < seqs.size(); i++) { seqs[i].setName(""); } //this is so if some of the seqs are missing we dont print out garbage
763 while (numLeft > 0) {
766 m->openInputFile(qualfile, in2);
768 if (m->control_pressed) { in2.close(); m->mothurRemove(outputFileName); return 0; }
771 int needToFind = size;
772 if (numLeft < size) { needToFind = numLeft; }
775 if (m->control_pressed) { in2.close(); m->mothurRemove(outputFileName); return 0; }
777 //stop reading if we already found the seqs we are looking for
778 if (found >= needToFind) { break; }
780 QualityScores currQual;
781 currQual = QualityScores(in2);
782 name = currQual.getName();
785 map<string, int>::iterator it = names.find(name);
786 if (it != names.end()) { //we found it, so put it in the vector in the right place.
787 //is it in the set of seqs we are looking for this time around
788 int thisSeqsPlace = it->second;
789 thisSeqsPlace -= (times * size);
790 if ((thisSeqsPlace < size) && (thisSeqsPlace >= 0)) {
791 seqs[thisSeqsPlace] = currQual;
794 }else { m->mothurOut("[ERROR]: in logic of readQual function.\n"); m->control_pressed = true; }
801 m->openOutputFileAppend(outputFileName, out2);
803 int output = seqs.size();
804 if (numLeft < seqs.size()) { output = numLeft; }
806 for (int i = 0; i < output; i++) {
807 if (seqs[i].getName() != "") {
808 seqs[i].printQScores(out2);
817 m->mothurOut("Ordered " + toString(numNamesInFile) + " sequences from " + qualfile + ".\n");
821 vector<QualityScores> seqs; seqs.resize(names.size());
822 for (int i = 0; i < seqs.size(); i++) { seqs[i].setName(""); } //this is so if some of the seqs are missing we dont print out garbage
825 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
827 QualityScores currQual;
828 currQual = QualityScores(in);
829 name = currQual.getName();
832 map<string, int>::iterator it = names.find(name);
833 if (it != names.end()) { //we found it, so put it in the vector in the right place.
834 seqs[it->second] = currQual;
835 }else { //if we cant find it then add it to the end
836 names[name] = seqs.size();
837 seqs.push_back(currQual);
838 m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
846 for (int i = 0; i < seqs.size(); i++) {
847 if (seqs[i].getName() != "") { seqs[i].printQScores(out); count++; }
851 m->mothurOut("Ordered " + toString(count) + " sequences from " + qualfile + ".\n");
854 }else { //read in file to fill names
858 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
860 QualityScores currQual;
861 currQual = QualityScores(in);
865 if (currQual.getName() != "") {
866 //if this name is in the accnos file
867 names[currQual.getName()] = count;
869 currQual.printQScores(out);
876 m->mothurOut("\nUsing " + qualfile + " to determine the order. It contains " + toString(count) + " sequences.\n");
882 catch(exception& e) {
883 m->errorOut(e, "SortSeqsCommand", "readQual");
887 //**********************************************************************************************************************
888 int SortSeqsCommand::readName(){
890 string thisOutputDir = outputDir;
891 if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
892 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
893 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
896 m->openOutputFile(outputFileName, out);
899 m->openInputFile(namefile, in);
900 string name, firstCol, secondCol;
902 if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
904 vector<string> seqs; seqs.resize(names.size(), "");
907 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
909 in >> firstCol; m->gobble(in);
910 in >> secondCol; m->gobble(in);
912 if (firstCol != "") {
913 map<string, int>::iterator it = names.find(firstCol);
914 if (it != names.end()) { //we found it, so put it in the vector in the right place.
915 seqs[it->second] = firstCol + '\t' + secondCol;
916 }else { //if we cant find it then add it to the end
917 names[firstCol] = seqs.size();
918 seqs.push_back((firstCol + '\t' + secondCol));
919 m->mothurOut(firstCol + " was not in the contained the file which determined the order, adding it to the end.\n");
926 for (int i = 0; i < seqs.size(); i++) {
927 if (seqs[i] != "") { out << seqs[i] << endl; count++; }
931 m->mothurOut("Ordered " + toString(count) + " sequences from " + namefile + ".\n");
933 }else { //read in file to fill names
937 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
939 in >> firstCol; m->gobble(in);
940 in >> secondCol; m->gobble(in);
942 if (firstCol != "") {
943 //if this name is in the accnos file
944 names[firstCol] = count;
946 out << firstCol << '\t' << secondCol << endl;
953 m->mothurOut("\nUsing " + namefile + " to determine the order. It contains " + toString(count) + " representative sequences.\n");
958 catch(exception& e) {
959 m->errorOut(e, "SortSeqsCommand", "readName");
963 //**********************************************************************************************************************
964 int SortSeqsCommand::readCount(){
966 string thisOutputDir = outputDir;
967 if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
968 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
969 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
972 m->openOutputFile(outputFileName, out);
975 m->openInputFile(countfile, in);
976 string firstCol, rest;
978 if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
980 vector<string> seqs; seqs.resize(names.size(), "");
982 string headers = m->getline(in); m->gobble(in);
985 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
987 in >> firstCol; m->gobble(in);
988 rest = m->getline(in); m->gobble(in);
990 if (firstCol != "") {
991 map<string, int>::iterator it = names.find(firstCol);
992 if (it != names.end()) { //we found it, so put it in the vector in the right place.
993 seqs[it->second] = firstCol + '\t' + rest;
994 }else { //if we cant find it then add it to the end
995 names[firstCol] = seqs.size();
996 seqs.push_back((firstCol + '\t' + rest));
997 m->mothurOut(firstCol + " was not in the contained the file which determined the order, adding it to the end.\n");
1004 out << headers << endl;
1005 for (int i = 0; i < seqs.size(); i++) {
1006 if (seqs[i] != "") { out << seqs[i] << endl; count++; }
1010 m->mothurOut("Ordered " + toString(count) + " sequences from " + countfile + ".\n");
1012 }else { //read in file to fill names
1015 string headers = m->getline(in); m->gobble(in);
1016 out << headers << endl;
1019 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
1021 in >> firstCol; m->gobble(in);
1022 rest = m->getline(in); m->gobble(in);
1024 if (firstCol != "") {
1025 //if this name is in the accnos file
1026 names[firstCol] = count;
1028 out << firstCol << '\t' << rest << endl;
1035 m->mothurOut("\nUsing " + countfile + " to determine the order. It contains " + toString(count) + " representative sequences.\n");
1040 catch(exception& e) {
1041 m->errorOut(e, "SortSeqsCommand", "readCount");
1045 //**********************************************************************************************************************
1046 int SortSeqsCommand::readGroup(){
1048 string thisOutputDir = outputDir;
1049 if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
1050 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
1051 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
1054 m->openOutputFile(outputFileName, out);
1057 m->openInputFile(groupfile, in);
1060 if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
1062 vector<string> seqs; seqs.resize(names.size(), "");
1065 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
1067 in >> name; m->gobble(in);
1068 in >> group; m->gobble(in);
1071 map<string, int>::iterator it = names.find(name);
1072 if (it != names.end()) { //we found it, so put it in the vector in the right place.
1073 seqs[it->second] = name + '\t' + group;
1074 }else { //if we cant find it then add it to the end
1075 names[name] = seqs.size();
1076 seqs.push_back((name + '\t' + group));
1077 m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
1084 for (int i = 0; i < seqs.size(); i++) {
1085 if (seqs[i] != "") { out << seqs[i] << endl; count++; }
1089 m->mothurOut("Ordered " + toString(count) + " sequences from " + groupfile + ".\n");
1091 }else { //read in file to fill names
1095 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
1097 in >> name; m->gobble(in);
1098 in >> group; m->gobble(in);
1101 //if this name is in the accnos file
1102 names[name] = count;
1104 out << name << '\t' << group << endl;
1111 m->mothurOut("\nUsing " + groupfile + " to determine the order. It contains " + toString(count) + " sequences.\n");
1116 catch(exception& e) {
1117 m->errorOut(e, "SortSeqsCommand", "readGroup");
1121 //**********************************************************************************************************************
1122 int SortSeqsCommand::readTax(){
1124 string thisOutputDir = outputDir;
1125 if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
1126 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
1127 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
1130 m->openOutputFile(outputFileName, out);
1133 m->openInputFile(taxfile, in);
1136 if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
1138 vector<string> seqs; seqs.resize(names.size(), "");
1141 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
1143 in >> name; m->gobble(in);
1144 in >> tax; m->gobble(in);
1147 map<string, int>::iterator it = names.find(name);
1148 if (it != names.end()) { //we found it, so put it in the vector in the right place.
1149 seqs[it->second] = name + '\t' + tax;
1150 }else { //if we cant find it then add it to the end
1151 names[name] = seqs.size();
1152 seqs.push_back((name + '\t' + tax));
1153 m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
1160 for (int i = 0; i < seqs.size(); i++) {
1161 if (seqs[i] != "") { out << seqs[i] << endl; count++; }
1165 m->mothurOut("Ordered " + toString(count) + " sequences from " + taxfile + ".\n");
1167 }else { //read in file to fill names
1171 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
1173 in >> name; m->gobble(in);
1174 in >> tax; m->gobble(in);
1177 //if this name is in the accnos file
1178 names[name] = count;
1180 out << name << '\t' << tax << endl;
1187 m->mothurOut("\nUsing " + taxfile + " to determine the order. It contains " + toString(count) + " sequences.\n");
1193 catch(exception& e) {
1194 m->errorOut(e, "SortSeqsCommand", "readTax");
1198 //**********************************************************************************************************************