]> git.donarmstrong.com Git - mothur.git/blob - sortseqscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / sortseqscommand.cpp
1 //
2 //  sortseqscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 2/3/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "sortseqscommand.h"
10 #include "sequence.hpp"
11 #include "qualityscores.h"
12
13 //**********************************************************************************************************************
14 vector<string> SortSeqsCommand::setParameters(){        
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
17                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
18                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
19                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
20                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pqfile);
21                 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
22                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
23         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "SortSeqsCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string SortSeqsCommand::getHelpString(){        
37         try {
38                 string helpString = "";
39                 helpString += "The sort.seqs command puts the sequences in the same order for the following file types: accnos fasta, name, group, taxonomy or quality file.\n";
40         helpString += "The sort.seqs command parameters are accnos, fasta, name, group, taxonomy, qfile and large.\n";
41         helpString += "The accnos file allows you to specify the order you want the files in.  If none is provided, mothur will use the order of the first file it reads.\n";
42         helpString += "The large parameters is used to indicate your files are too large to fit in RAM.\n";
43                 helpString += "The sort.seqs command should be in the following format: sort.seqs(fasta=yourFasta).\n";
44                 helpString += "Example sort.seqs(fasta=amazon.fasta).\n";
45                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
46                 return helpString;
47         }
48         catch(exception& e) {
49                 m->errorOut(e, "SortSeqsCommand", "getHelpString");
50                 exit(1);
51         }
52 }
53
54
55 //**********************************************************************************************************************
56 SortSeqsCommand::SortSeqsCommand(){     
57         try {
58                 abort = true; calledHelp = true; 
59                 setParameters();
60                 vector<string> tempOutNames;
61                 outputTypes["fasta"] = tempOutNames;
62                 outputTypes["taxonomy"] = tempOutNames;
63                 outputTypes["name"] = tempOutNames;
64                 outputTypes["group"] = tempOutNames;
65                 outputTypes["qfile"] = tempOutNames;
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "SortSeqsCommand", "SortSeqsCommand");
69                 exit(1);
70         }
71 }
72 //**********************************************************************************************************************
73 SortSeqsCommand::SortSeqsCommand(string option)  {
74         try {
75                 abort = false; calledHelp = false;   
76                 
77                 //allow user to run help
78                 if(option == "help") { help(); abort = true; calledHelp = true; }
79                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
80                 
81                 else {
82                         vector<string> myArray = setParameters();
83                         
84                         OptionParser parser(option);
85                         map<string,string> parameters = parser.getParameters();
86                         
87                         ValidParameters validParameter;
88                         map<string,string>::iterator it;
89                         
90                         //check to make sure all parameters are valid for command
91                         for (it = parameters.begin(); it != parameters.end(); it++) { 
92                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
93                         }
94                         
95                         //initialize outputTypes
96                         vector<string> tempOutNames;
97                         outputTypes["fasta"] = tempOutNames;
98                         outputTypes["taxonomy"] = tempOutNames;
99                         outputTypes["name"] = tempOutNames;
100                         outputTypes["group"] = tempOutNames;
101                         outputTypes["qfile"] = tempOutNames;
102                         
103                         //if the user changes the output directory command factory will send this info to us in the output parameter 
104                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
105                         
106                         //if the user changes the input directory command factory will send this info to us in the output parameter 
107                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
108                         if (inputDir == "not found"){   inputDir = "";          }
109                         else {
110                                 string path;
111                                 it = parameters.find("fasta");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
117                                 }
118                                 
119                                 it = parameters.find("name");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
125                                 }
126                                 
127                                 it = parameters.find("group");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
133                                 }
134                                 
135                                 it = parameters.find("taxonomy");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
141                                 }
142                                 
143                                 it = parameters.find("qfile");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
149                                 }
150                 
151                 it = parameters.find("accnos");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
157                                 }
158                         }
159             
160                         
161                         //check for parameters
162             accnosfile = validParameter.validFile(parameters, "accnos", true);
163                         if (accnosfile == "not open") { accnosfile = ""; abort = true; }
164                         else if (accnosfile == "not found") {  accnosfile = "";  }      
165                         else { m->setAccnosFile(accnosfile); }
166             
167                         fastafile = validParameter.validFile(parameters, "fasta", true);
168                         if (fastafile == "not open") { fastafile = ""; abort = true; }
169                         else if (fastafile == "not found") {  fastafile = "";  }        
170                         else { m->setFastaFile(fastafile); }
171             
172                         namefile = validParameter.validFile(parameters, "name", true);
173                         if (namefile == "not open") { namefile = ""; abort = true; }
174                         else if (namefile == "not found") {  namefile = "";  }  
175                         else { m->setNameFile(namefile); } 
176             
177                         groupfile = validParameter.validFile(parameters, "group", true);
178                         if (groupfile == "not open") { abort = true; }
179                         else if (groupfile == "not found") {  groupfile = "";  }
180                         else { m->setGroupFile(groupfile); }
181                         
182                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
183                         if (taxfile == "not open") { abort = true; }
184                         else if (taxfile == "not found") {  taxfile = "";  }
185                         else { m->setTaxonomyFile(taxfile); }
186                         
187                         qualfile = validParameter.validFile(parameters, "qfile", true);
188                         if (qualfile == "not open") { abort = true; }
189                         else if (qualfile == "not found") {  qualfile = "";  }                  
190                         else { m->setQualFile(qualfile); }
191                         
192             string temp = validParameter.validFile(parameters, "large", false);         if (temp == "not found") { temp = "f"; }
193                         large = m->isTrue(temp);
194             
195                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy or quality."); m->mothurOutEndLine(); abort = true; }
196                         
197                         if ((fastafile != "") && (namefile == "")) {
198                                 vector<string> files; files.push_back(fastafile);
199                                 parser.getNameFile(files);
200                         }
201                 }
202         
203         }
204         catch(exception& e) {
205                 m->errorOut(e, "SortSeqsCommand", "SortSeqsCommand");
206                 exit(1);
207         }
208 }
209 //**********************************************************************************************************************
210
211 int SortSeqsCommand::execute(){
212         try {
213                 
214                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
215                 
216                 //read through the correct file and output lines you want to keep
217         if (accnosfile != "")           {               readAccnos();   }
218                 if (fastafile != "")            {               readFasta();    }
219         if (qualfile != "")                     {               readQual();             }
220         if (namefile != "")                     {               readName();             }
221                 if (groupfile != "")            {               readGroup();    }
222         if (taxfile != "")                      {               readTax();              }
223                 
224                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
225         
226                 if (outputNames.size() != 0) {
227                         m->mothurOutEndLine();
228                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
229                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
230                         m->mothurOutEndLine();
231                         
232                         //set fasta file as new current fastafile
233                         string current = "";
234                         itTypes = outputTypes.find("fasta");
235                         if (itTypes != outputTypes.end()) {
236                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
237                         }
238                         
239                         itTypes = outputTypes.find("name");
240                         if (itTypes != outputTypes.end()) {
241                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
242                         }
243                         
244                         itTypes = outputTypes.find("group");
245                         if (itTypes != outputTypes.end()) {
246                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
247                         }
248                         
249                         
250                         itTypes = outputTypes.find("taxonomy");
251                         if (itTypes != outputTypes.end()) {
252                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
253                         }
254                         
255                         itTypes = outputTypes.find("qfile");
256                         if (itTypes != outputTypes.end()) {
257                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
258                         }                       
259                 }
260                 
261                 return 0;               
262         }
263     
264         catch(exception& e) {
265                 m->errorOut(e, "SortSeqsCommand", "execute");
266                 exit(1);
267         }
268 }
269
270 //**********************************************************************************************************************
271 int SortSeqsCommand::readFasta(){
272         try {
273                 string thisOutputDir = outputDir;
274                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
275                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "sorted" + m->getExtension(fastafile);
276                 outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
277         
278                 ofstream out;
279                 m->openOutputFile(outputFileName, out);
280                 
281                 ifstream in;
282                 m->openInputFile(fastafile, in);
283                 string name;
284                 
285         if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
286             
287             if (large) { //if the file is too large to fit in memory we can still process it, but the io will be very time consuming.
288                 //read through the file looking for 1000 seqs at a time. Once we find them output them and start looking for the next 1000.
289                 //this way we only store 1000 seqs in memory at a time.
290                 
291                 int numNames = names.size();
292                 int numNamesInFile = 0;
293                 
294                 //to make sure we dont miss any seqs, add any seqs that are not in names but in the file to the end of names
295                 while(!in.eof()){
296                     if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
297                     
298                     Sequence currSeq(in);
299                     name = currSeq.getName();
300                     
301                     if (name != "") {
302                         numNamesInFile++;
303                         map<string, int>::iterator it = names.find(name);
304                         if (it == names.end()) { 
305                             names[name] = numNames; numNames++;
306                             m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
307                         }
308                     }
309                     m->gobble(in);
310                 }
311                 in.close();
312                 out.close();
313                 
314                 int numLeft = names.size();
315                 if (numNamesInFile < numLeft) { numLeft = numNamesInFile; }
316                 
317                 int size = 1000; //assume that user can hold 1000 seqs in memory
318                 if (numLeft < size) { size = numLeft; }
319                 int times = 0;
320                 
321                 vector<Sequence> seqs; seqs.resize(size);
322                 
323                 while (numLeft > 0) {
324                     
325                     ifstream in2;
326                     m->openInputFile(fastafile, in2);
327                     
328                     if (m->control_pressed) { in2.close();  m->mothurRemove(outputFileName);  return 0; }
329                     
330                     int found = 0;
331                     int needToFind = size;
332                     if (numLeft < size) { needToFind = numLeft; }
333                     
334                     while(!in2.eof()){
335                         if (m->control_pressed) { in2.close();   m->mothurRemove(outputFileName);  return 0; }
336                         
337                         //stop reading if we already found the seqs we are looking for
338                         if (found >= needToFind) { break; }
339                         
340                         Sequence currSeq(in2);
341                         name = currSeq.getName();
342                         
343                         if (name != "") {
344                             map<string, int>::iterator it = names.find(name);
345                             if (it != names.end()) { //we found it, so put it in the vector in the right place.
346                                 //is it in the set of seqs we are looking for this time around
347                                 int thisSeqsPlace = it->second;
348                                 thisSeqsPlace -= (times * size);
349                                 if ((thisSeqsPlace < size) && (thisSeqsPlace >= 0)) {
350                                     seqs[thisSeqsPlace] = currSeq; 
351                                     found++;
352                                 }
353                             }else { m->mothurOut("[ERROR]: in logic of readFasta function.\n"); m->control_pressed = true; }
354                         }
355                         m->gobble(in2);
356                     }
357                     in2.close();        
358
359                     ofstream out2;
360                     m->openOutputFileAppend(outputFileName, out2);
361                     
362                     int output = seqs.size();
363                     if (numLeft < seqs.size()) { output = numLeft; }
364                         
365                     for (int i = 0; i < output; i++) {
366                         seqs[i].printSequence(out2);
367                     }
368                     out2.close();
369                     
370                     times++;
371                     numLeft -= output;
372                 }
373                 
374                 m->mothurOut("Ordered " + toString(numNamesInFile) + " sequences from " + fastafile + ".\n");
375             }else {
376                 
377                 vector<Sequence> seqs; seqs.resize(names.size());
378                 
379                 while(!in.eof()){
380                     if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
381                     
382                     Sequence currSeq(in);
383                     name = currSeq.getName();
384                     
385                     if (name != "") {
386                         map<string, int>::iterator it = names.find(name);
387                         if (it != names.end()) { //we found it, so put it in the vector in the right place.
388                             seqs[it->second] = currSeq;  
389                         }else { //if we cant find it then add it to the end
390                             names[name] = seqs.size();
391                             seqs.push_back(currSeq);
392                             m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
393                         }
394                     }
395                     m->gobble(in);
396                 }
397                 in.close();     
398                 
399                 for (int i = 0; i < seqs.size(); i++) {
400                     seqs[i].printSequence(out);
401                 }
402                 out.close();
403                 
404                 m->mothurOut("Ordered " + toString(seqs.size()) + " sequences from " + fastafile + ".\n");
405             }
406                         
407         }else { //read in file to fill names
408             int count = 0;
409             
410             while(!in.eof()){
411                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
412                 
413                 Sequence currSeq(in);
414                 name = currSeq.getName();
415                 
416                 if (name != "") {
417                     //if this name is in the accnos file
418                     names[name] = count;
419                     count++;
420                     currSeq.printSequence(out);
421                 }
422                 m->gobble(in);
423             }
424             in.close(); 
425             out.close();
426             
427             m->mothurOut("\nUsing " + fastafile + " to determine the order. It contains " + toString(count) + " sequences.\n");
428         }
429                                 
430                 return 0;
431                 
432         }
433         catch(exception& e) {
434                 m->errorOut(e, "SortSeqsCommand", "readFasta");
435                 exit(1);
436         }
437 }
438 //**********************************************************************************************************************
439 int SortSeqsCommand::readQual(){
440         try {
441                 string thisOutputDir = outputDir;
442                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
443                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "sorted" +  m->getExtension(qualfile);
444         outputTypes["qfile"].push_back(outputFileName);  outputNames.push_back(outputFileName);
445         
446                 ofstream out;
447                 m->openOutputFile(outputFileName, out);
448                 
449                 ifstream in;
450                 m->openInputFile(qualfile, in);
451                 string name;
452                 
453         if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
454             
455             if (large) { //if the file is too large to fit in memory we can still process it, but the io will be very time consuming.
456                 //read through the file looking for 1000 seqs at a time. Once we find them output them and start looking for the next 1000.
457                 //this way we only store 1000 seqs in memory at a time.
458                 
459                 int numNames = names.size();
460                 int numNamesInFile = 0;
461                 
462                 //to make sure we dont miss any seqs, add any seqs that are not in names but in the file to the end of names
463                 while(!in.eof()){
464                     if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
465                     
466                     QualityScores currQual;
467                     currQual = QualityScores(in); 
468                     name = currQual.getName();
469                     
470                     if (name != "") {
471                         numNamesInFile++;
472                         map<string, int>::iterator it = names.find(name);
473                         if (it == names.end()) { 
474                             names[name] = numNames; numNames++;
475                             m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
476                         }
477                     }
478                     m->gobble(in);
479                 }
480                 in.close();
481                 out.close();
482                 
483                 int numLeft = names.size();
484                 if (numNamesInFile < numLeft) { numLeft = numNamesInFile; }
485                 
486                 int size = 1000; //assume that user can hold 1000 seqs in memory
487                 if (numLeft < size) { size = numLeft; }
488                 int times = 0;
489
490                 
491                 vector<QualityScores> seqs; seqs.resize(size);
492                 
493                 while (numLeft > 0) {
494                     
495                     ifstream in2;
496                     m->openInputFile(qualfile, in2);
497                     
498                     if (m->control_pressed) { in2.close();  m->mothurRemove(outputFileName);  return 0; }
499                     
500                     int found = 0;
501                     int needToFind = size;
502                     if (numLeft < size) { needToFind = numLeft; }
503                     
504                     while(!in2.eof()){
505                         if (m->control_pressed) { in2.close();   m->mothurRemove(outputFileName);  return 0; }
506                         
507                         //stop reading if we already found the seqs we are looking for
508                         if (found >= needToFind) { break; }
509                         
510                         QualityScores currQual;
511                         currQual = QualityScores(in2); 
512                         name = currQual.getName();
513                         
514                         if (name != "") {
515                             map<string, int>::iterator it = names.find(name);
516                             if (it != names.end()) { //we found it, so put it in the vector in the right place.
517                                 //is it in the set of seqs we are looking for this time around
518                                 int thisSeqsPlace = it->second;
519                                 thisSeqsPlace -= (times * size);
520                                 if ((thisSeqsPlace < size) && (thisSeqsPlace >= 0)) {
521                                     seqs[thisSeqsPlace] = currQual; 
522                                     found++;
523                                 }
524                             }else { m->mothurOut("[ERROR]: in logic of readQual function.\n"); m->control_pressed = true; }
525                         }
526                         m->gobble(in2);
527                     }
528                     in2.close();        
529                     
530                     ofstream out2;
531                     m->openOutputFileAppend(outputFileName, out2);
532                     
533                     int output = seqs.size();
534                     if (numLeft < seqs.size()) { output = numLeft; }
535                     
536                     for (int i = 0; i < output; i++) {
537                         seqs[i].printQScores(out2);
538                     }
539                     out2.close();
540                     
541                     times++;
542                     numLeft -= output;
543                 }
544                 
545                  m->mothurOut("Ordered " + toString(numNamesInFile) + " sequences from " + qualfile + ".\n");
546                 
547             }else {
548                 
549                 vector<QualityScores> seqs; seqs.resize(names.size());
550                 
551                 while(!in.eof()){
552                     if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
553                     
554                     QualityScores currQual;
555                     currQual = QualityScores(in); 
556                     name = currQual.getName();
557                     
558                     if (name != "") {
559                         map<string, int>::iterator it = names.find(name);
560                         if (it != names.end()) { //we found it, so put it in the vector in the right place.
561                             seqs[it->second] = currQual;  
562                         }else { //if we cant find it then add it to the end
563                             names[name] = seqs.size();
564                             seqs.push_back(currQual);
565                             m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
566                         }
567                     }
568                     m->gobble(in);
569                 }
570                 in.close();     
571                 
572                 for (int i = 0; i < seqs.size(); i++) {
573                     seqs[i].printQScores(out);
574                 }
575                 out.close();
576                 
577                 m->mothurOut("Ordered " + toString(seqs.size()) + " sequences from " + qualfile + ".\n");
578             }
579             
580         }else { //read in file to fill names
581             int count = 0;
582             
583             while(!in.eof()){
584                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
585                 
586                 QualityScores currQual;
587                 currQual = QualityScores(in);  
588                                
589                 m->gobble(in);
590                 
591                 if (currQual.getName() != "") {
592                     //if this name is in the accnos file
593                     names[currQual.getName()] = count;
594                     count++;
595                     currQual.printQScores(out);
596                 }
597                 m->gobble(in);
598             }
599             in.close(); 
600             out.close();
601             
602             m->mothurOut("\nUsing " + qualfile + " to determine the order. It contains " + toString(count) + " sequences.\n");
603         }
604                 
605                 return 0;
606                 
607         }
608         catch(exception& e) {
609                 m->errorOut(e, "SortSeqsCommand", "readQual");
610                 exit(1);
611         }
612 }
613 //**********************************************************************************************************************
614 int SortSeqsCommand::readName(){
615         try {
616                 string thisOutputDir = outputDir;
617                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
618                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "sorted" + m->getExtension(namefile);
619         outputTypes["name"].push_back(outputFileName);  outputNames.push_back(outputFileName);
620         
621                 ofstream out;
622                 m->openOutputFile(outputFileName, out);
623         
624                 ifstream in;
625                 m->openInputFile(namefile, in);
626                 string name, firstCol, secondCol;
627                 
628         if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
629         
630                 vector<string> seqs; seqs.resize(names.size());
631                 
632                 while(!in.eof()){
633                     if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
634                     
635                     in >> firstCol;             m->gobble(in);          
636                     in >> secondCol;    m->gobble(in);
637                     
638                     if (firstCol != "") {
639                         map<string, int>::iterator it = names.find(firstCol);
640                         if (it != names.end()) { //we found it, so put it in the vector in the right place.
641                             seqs[it->second] = firstCol + '\t' + secondCol;  
642                         }else { //if we cant find it then add it to the end
643                             names[firstCol] = seqs.size();
644                             seqs.push_back((firstCol + '\t' + secondCol));
645                             m->mothurOut(firstCol + " was not in the contained the file which determined the order, adding it to the end.\n");
646                         }
647                     }
648                 }
649                 in.close();     
650                 
651                 for (int i = 0; i < seqs.size(); i++) {
652                     out << seqs[i] << endl;
653                 }
654                 out.close();
655                 
656                 m->mothurOut("Ordered " + toString(seqs.size()) + " sequences from " + namefile + ".\n");
657             
658         }else { //read in file to fill names
659             int count = 0;
660             
661             while(!in.eof()){
662                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
663                 
664                 in >> firstCol;         m->gobble(in);          
665                 in >> secondCol;    m->gobble(in);
666                 
667                 if (firstCol != "") {
668                     //if this name is in the accnos file
669                     names[firstCol] = count;
670                     count++;
671                     out << firstCol << '\t' << secondCol << endl;
672                 }
673                 m->gobble(in);
674             }
675             in.close(); 
676             out.close();
677             
678             m->mothurOut("\nUsing " + namefile + " to determine the order. It contains " + toString(count) + " representative sequences.\n");
679         }
680                                 
681                 return 0;
682         }
683         catch(exception& e) {
684                 m->errorOut(e, "SortSeqsCommand", "readName");
685                 exit(1);
686         }
687 }
688
689 //**********************************************************************************************************************
690 int SortSeqsCommand::readGroup(){
691         try {
692                 string thisOutputDir = outputDir;
693                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
694                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
695                 outputTypes["group"].push_back(outputFileName);  outputNames.push_back(outputFileName);
696         
697                 ofstream out;
698                 m->openOutputFile(outputFileName, out);
699         
700                 ifstream in;
701                 m->openInputFile(groupfile, in);
702                 string name, group;
703                 
704                 if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
705             
706             vector<string> seqs; seqs.resize(names.size());
707             
708             while(!in.eof()){
709                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
710                 
711                 in >> name;             m->gobble(in);          
712                 in >> group;    m->gobble(in);
713                 
714                 if (name != "") {
715                     map<string, int>::iterator it = names.find(name);
716                     if (it != names.end()) { //we found it, so put it in the vector in the right place.
717                         seqs[it->second] = name + '\t' + group;  
718                     }else { //if we cant find it then add it to the end
719                         names[name] = seqs.size();
720                         seqs.push_back((name + '\t' + group));
721                         m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
722                     }
723                 }
724             }
725             in.close(); 
726             
727             for (int i = 0; i < seqs.size(); i++) {
728                 out << seqs[i] << endl;
729             }
730             out.close();
731             
732             m->mothurOut("Ordered " + toString(seqs.size()) + " sequences from " + groupfile + ".\n");
733             
734         }else { //read in file to fill names
735             int count = 0;
736             
737             while(!in.eof()){
738                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
739                 
740                 in >> name;             m->gobble(in);          
741                 in >> group;    m->gobble(in);
742                 
743                 if (name != "") {
744                     //if this name is in the accnos file
745                     names[name] = count;
746                     count++;
747                     out << name << '\t' << group << endl;
748                 }
749                 m->gobble(in);
750             }
751             in.close(); 
752             out.close();
753             
754             m->mothurOut("\nUsing " + groupfile + " to determine the order. It contains " + toString(count) + " sequences.\n");
755         }
756         
757                 return 0;
758         }
759         catch(exception& e) {
760                 m->errorOut(e, "SortSeqsCommand", "readGroup");
761                 exit(1);
762         }
763 }
764 //**********************************************************************************************************************
765 int SortSeqsCommand::readTax(){
766         try {
767                 string thisOutputDir = outputDir;
768                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
769                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
770         outputTypes["taxonomy"].push_back(outputFileName);  outputNames.push_back(outputFileName);
771         
772                 ofstream out;
773                 m->openOutputFile(outputFileName, out);
774         
775                 ifstream in;
776                 m->openInputFile(taxfile, in);
777                 string name, tax;
778                 
779                 if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
780             
781             vector<string> seqs; seqs.resize(names.size());
782             
783             while(!in.eof()){
784                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
785                 
786                 in >> name;             m->gobble(in);          
787                 in >> tax;    m->gobble(in);
788                 
789                 if (name != "") {
790                     map<string, int>::iterator it = names.find(name);
791                     if (it != names.end()) { //we found it, so put it in the vector in the right place.
792                         seqs[it->second] = name + '\t' + tax;  
793                     }else { //if we cant find it then add it to the end
794                         names[name] = seqs.size();
795                         seqs.push_back((name + '\t' + tax));
796                         m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
797                     }
798                 }
799             }
800             in.close(); 
801             
802             for (int i = 0; i < seqs.size(); i++) {
803                 out << seqs[i] << endl;
804             }
805             out.close();
806             
807             m->mothurOut("Ordered " + toString(seqs.size()) + " sequences from " + taxfile + ".\n");
808             
809         }else { //read in file to fill names
810             int count = 0;
811             
812             while(!in.eof()){
813                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
814                 
815                 in >> name;             m->gobble(in);          
816                 in >> tax;    m->gobble(in);
817                 
818                 if (name != "") {
819                     //if this name is in the accnos file
820                     names[name] = count;
821                     count++;
822                     out << name << '\t' << tax << endl;
823                 }
824                 m->gobble(in);
825             }
826             in.close(); 
827             out.close();
828             
829             m->mothurOut("\nUsing " + taxfile + " to determine the order. It contains " + toString(count) + " sequences.\n");
830         }
831         
832                 return 0;
833                 return 0;
834         }
835         catch(exception& e) {
836                 m->errorOut(e, "SortSeqsCommand", "readTax");
837                 exit(1);
838         }
839 }
840 //**********************************************************************************************************************
841 int SortSeqsCommand::readAccnos(){
842         try {
843                 
844                 ifstream in;
845                 m->openInputFile(accnosfile, in);
846                 string name;
847         int count = 0;
848                 
849                 while(!in.eof()){
850             
851             if (m->control_pressed) { break; }
852             
853                         in >> name; m->gobble(in);
854             
855             if (name != "") {
856                 names[name] = count;
857                 count++;
858             }
859                 }
860                 in.close();             
861         
862         m->mothurOut("\nUsing " + accnosfile + " to determine the order. It contains " + toString(count) + " representative sequences.\n");
863         
864         return 0;
865         }
866         catch(exception& e) {
867                 m->errorOut(e, "SortSeqsCommand", "readAccnos");
868                 exit(1);
869         }
870 }
871
872 //**********************************************************************************************************************
873
874
875
876
877