]> git.donarmstrong.com Git - mothur.git/blob - rarefactsharedcommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / rarefactsharedcommand.cpp
1 /*
2  *  rarefactsharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "rarefactsharedcommand.h"
11 #include "sharedsobs.h"
12 #include "sharednseqs.h"
13 #include "sharedutilities.h"
14
15 //**********************************************************************************************************************
16 vector<string> RareFactSharedCommand::setParameters(){  
17         try {
18                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
19         CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pdesign);
20                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
21                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
22                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23                 CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "",true,false); parameters.push_back(pcalc);
24                 CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pjumble);
25                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
26         CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
27                 CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode);
28         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
29                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30                 
31                 vector<string> myArray;
32                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
33                 return myArray;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "RareFactSharedCommand", "setParameters");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 string RareFactSharedCommand::getHelpString(){  
42         try {
43                 string helpString = "";
44                 ValidCalculators validCalculator;
45                 helpString += "The rarefaction.shared command parameters are shared, design, label, iters, groups, sets, jumble, groupmode and calc.  shared is required if there is no current sharedfile. \n";
46         helpString += "The design parameter allows you to assign your groups to sets. If provided mothur will run rarefaction.shared on a per set basis. \n";
47         helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
48                 helpString += "The rarefaction command should be in the following format: \n";
49                 helpString += "rarefaction.shared(label=yourLabel, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n";
50                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
51                 helpString += "Example rarefaction.shared(label=unique-0.01-0.03,  iters=10000, groups=B-C, jumble=T, calc=sharedobserved).\n";
52                 helpString += "The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness.\n";
53                 helpString += "The default value for groups is all the groups in your groupfile, and jumble is true.\n";
54                 helpString += validCalculator.printCalc("sharedrarefaction");
55                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
56                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
57                 helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n";
58                 return helpString;
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "RareFactSharedCommand", "getHelpString");
62                 exit(1);
63         }
64 }
65 //**********************************************************************************************************************
66 string RareFactSharedCommand::getOutputFileNameTag(string type, string inputName=""){   
67         try {
68         string outputFileName = "";
69                 map<string, vector<string> >::iterator it;
70         
71         //is this a type this command creates
72         it = outputTypes.find(type);
73         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
74         else {
75             if (type == "sharedrarefaction") {  outputFileName =  "shared.rarefaction"; }
76             else if (type == "sharedr_nseqs") {  outputFileName =  "shared.r_nseqs"; }
77             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
78         }
79         return outputFileName;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "RareFactSharedCommand", "getOutputFileNameTag");
83                 exit(1);
84         }
85 }
86 //**********************************************************************************************************************
87 RareFactSharedCommand::RareFactSharedCommand(){ 
88         try {
89                 abort = true; calledHelp = true; 
90                 setParameters();
91                 vector<string> tempOutNames;
92                 outputTypes["sharedrarefaction"] = tempOutNames;
93                 outputTypes["sharedr_nseqs"] = tempOutNames;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101
102 RareFactSharedCommand::RareFactSharedCommand(string option)  {
103         try {
104                 abort = false; calledHelp = false;   
105                 allLines = 1;
106                                 
107                 //allow user to run help
108                 if(option == "help") {  help(); abort = true; calledHelp = true; }
109                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
110                 
111                 else {
112                         vector<string> myArray = setParameters();
113                         
114                         OptionParser parser(option);
115                         map<string,string> parameters = parser.getParameters();
116                         map<string,string>::iterator it;
117                         
118                         ValidParameters validParameter;
119                         
120                         //check to make sure all parameters are valid for command
121                         for (it = parameters.begin(); it != parameters.end(); it++) { 
122                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
123                         }
124                         
125                         //initialize outputTypes
126                         vector<string> tempOutNames;
127                         outputTypes["sharedrarefaction"] = tempOutNames;
128                         outputTypes["sharedr_nseqs"] = tempOutNames;
129                         
130                         //if the user changes the input directory command factory will send this info to us in the output parameter 
131                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
132                         if (inputDir == "not found"){   inputDir = "";          }
133                         else {
134                                 string path;
135                                 it = parameters.find("shared");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
141                                 }
142                 
143                 it = parameters.find("design");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
149                                 }
150
151                         }
152                         
153                         //get shared file
154                         sharedfile = validParameter.validFile(parameters, "shared", true);
155                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
156                         else if (sharedfile == "not found") { 
157                                 //if there is a current shared file, use it
158                                 sharedfile = m->getSharedFile(); 
159                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
160                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
161                         }else { m->setSharedFile(sharedfile); }
162             
163             designfile = validParameter.validFile(parameters, "design", true);
164                         if (designfile == "not open") { abort = true; designfile = ""; }
165                         else if (designfile == "not found") {   designfile = "";        }
166                         else { m->setDesignFile(designfile); }
167                         
168                         
169                         //if the user changes the output directory command factory will send this info to us in the output parameter 
170                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
171                         
172                         
173                         //check for optional parameter and set defaults
174                         // ...at some point should added some additional type checking...
175                         label = validParameter.validFile(parameters, "label", false);                   
176                         if (label == "not found") { label = ""; }
177                         else { 
178                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
179                                 else { allLines = 1;  }
180                         }
181                         
182                                 
183                         calc = validParameter.validFile(parameters, "calc", false);                     
184                         if (calc == "not found") { calc = "sharedobserved";  }
185                         else { 
186                                  if (calc == "default")  {  calc = "sharedobserved";  }
187                         }
188                         m->splitAtDash(calc, Estimators);
189                         if (m->inUsersGroups("citation", Estimators)) { 
190                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
191                                 //remove citation from list of calcs
192                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
193                         }
194                         
195                         groups = validParameter.validFile(parameters, "groups", false);                 
196                         if (groups == "not found") { groups = ""; }
197                         else { 
198                                 m->splitAtDash(groups, Groups);
199                         }
200                         m->setGroups(Groups);
201             
202             string sets = validParameter.validFile(parameters, "sets", false);                  
203                         if (sets == "not found") { sets = ""; }
204                         else { 
205                                 m->splitAtDash(sets, Sets);
206                         }
207                         
208                         string temp;
209                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
210                         m->mothurConvert(temp, freq); 
211                         
212                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
213                         m->mothurConvert(temp, nIters); 
214                         
215                         temp = validParameter.validFile(parameters, "jumble", false);                   if (temp == "not found") { temp = "T"; }
216                         if (m->isTrue(temp)) { jumble = true; }
217                         else { jumble = false; }
218                         m->jumble = jumble;
219             
220             temp = validParameter.validFile(parameters, "groupmode", false);            if (temp == "not found") { temp = "T"; }
221                         groupMode = m->isTrue(temp);
222                                 
223                 }
224
225         }
226         catch(exception& e) {
227                 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
228                 exit(1);
229         }
230 }
231 //**********************************************************************************************************************
232
233 int RareFactSharedCommand::execute(){
234         try {
235         
236                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
237         
238         GroupMap designMap;
239         if (designfile == "") { //fake out designMap to run with process
240             process(designMap, "");
241         }else {
242             designMap.readDesignMap(designfile);
243             
244             //fill Sets - checks for "all" and for any typo groups
245                         SharedUtil util;
246                         vector<string> nameSets = designMap.getNamesOfGroups();
247                         util.setGroups(Sets, nameSets);
248             
249             for (int i = 0; i < Sets.size(); i++) {
250                 process(designMap, Sets[i]);
251             }
252             
253             if (groupMode) { outputNames = createGroupFile(outputNames); }
254         }
255                     
256                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
257                 
258                 m->mothurOutEndLine();
259                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
260                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
261                 m->mothurOutEndLine();
262
263                 return 0;
264         }
265         catch(exception& e) {
266                 m->errorOut(e, "RareFactSharedCommand", "execute");
267                 exit(1);
268         }
269 }
270 //**********************************************************************************************************************
271
272 int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
273         try {
274         Rarefact* rCurve;
275         vector<Display*> rDisplays;
276         
277         InputData input(sharedfile, "sharedfile");
278                 lookup = input.getSharedRAbundVectors();
279         if (lookup.size() < 2) { 
280                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
281             for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
282             return 0;
283         }
284         
285         string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile));
286         
287         vector<string> newGroups = m->getGroups();
288         if (thisSet != "") {  //make groups only filled with groups from this set so that's all inputdata will read
289             vector<string> thisSets; thisSets.push_back(thisSet);
290             newGroups = designMap.getNamesSeqs(thisSets);
291             fileNameRoot += thisSet + ".";
292         }
293         
294         vector<SharedRAbundVector*> subset;
295         if (thisSet == "") {  subset.clear(); subset = lookup;  }
296         else {//fill subset with this sets groups
297             subset.clear();
298             for (int i = 0; i < lookup.size(); i++) {
299                 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
300                     subset.push_back(lookup[i]);
301                 }
302             }
303         }
304                 
305                 ValidCalculators validCalculator;
306                 for (int i=0; i<Estimators.size(); i++) {
307                         if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) { 
308                                 if (Estimators[i] == "sharedobserved") { 
309                                         rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(fileNameRoot+getOutputFileNameTag("sharedrarefaction"), "")));
310                                         outputNames.push_back(fileNameRoot+getOutputFileNameTag("sharedrarefaction")); outputTypes["sharedrarefaction"].push_back(fileNameRoot+getOutputFileNameTag("sharedrarefaction"));
311                                 }else if (Estimators[i] == "sharednseqs") { 
312                                         rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(fileNameRoot+getOutputFileNameTag("sharedr_nseqs"), "")));
313                                         outputNames.push_back(fileNameRoot+getOutputFileNameTag("sharedr_nseqs")); outputTypes["sharedr_nseqs"].push_back(fileNameRoot+getOutputFileNameTag("sharedr_nseqs"));
314                                 }
315                         }
316             file2Group[outputNames.size()-1] = thisSet;
317                 }
318                 
319                 //if the users entered no valid calculators don't execute command
320                 if (rDisplays.size() == 0) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
321                 
322                 if (m->control_pressed) { 
323                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
324                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
325                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
326                         return 0;
327                 }
328         
329         
330                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
331         string lastLabel = subset[0]->getLabel();
332                 set<string> processedLabels;
333                 set<string> userLabels = labels;
334         
335                 //as long as you are not at the end of the file or done wih the lines you want
336                 while((subset[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
337                         if (m->control_pressed) { 
338                                 for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
339                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
340                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
341                                 return 0;
342                         }
343                         
344                         if(allLines == 1 || labels.count(subset[0]->getLabel()) == 1){
345                                 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
346                                 rCurve = new Rarefact(subset, rDisplays);
347                                 rCurve->getSharedCurve(freq, nIters);
348                                 delete rCurve;
349                 
350                                 processedLabels.insert(subset[0]->getLabel());
351                                 userLabels.erase(subset[0]->getLabel());
352                         }
353                         
354                         if ((m->anyLabelsToProcess(subset[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
355                 string saveLabel = subset[0]->getLabel();
356                 
357                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
358                 lookup = input.getSharedRAbundVectors(lastLabel);
359                 
360                 if (thisSet == "") {  subset.clear(); subset = lookup;  }
361                 else {//fill subset with this sets groups
362                     subset.clear();
363                     for (int i = 0; i < lookup.size(); i++) {
364                         if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
365                             subset.push_back(lookup[i]);
366                         }
367                     }
368                 }
369
370                 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
371                 rCurve = new Rarefact(subset, rDisplays);
372                 rCurve->getSharedCurve(freq, nIters);
373                 delete rCurve;
374                 
375                 processedLabels.insert(subset[0]->getLabel());
376                 userLabels.erase(subset[0]->getLabel());
377                 
378                 //restore real lastlabel to save below
379                 subset[0]->setLabel(saveLabel);
380                         }
381             
382                         
383                         lastLabel = subset[0]->getLabel();
384                         
385                         //get next line to process
386                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
387                         lookup = input.getSharedRAbundVectors();
388             
389             if (lookup[0] != NULL) {
390                 if (thisSet == "") {  subset.clear(); subset = lookup;  }
391                 else {//fill subset with this sets groups
392                     subset.clear();
393                     for (int i = 0; i < lookup.size(); i++) {
394                         if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
395                             subset.push_back(lookup[i]);
396                         }
397                     }
398                 }
399             }else {  subset.clear(); subset.push_back(NULL); }
400
401                 }
402                 
403                 if (m->control_pressed) { 
404                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
405                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
406                         return 0;
407                 }
408                 
409                 //output error messages about any remaining user labels
410                 set<string>::iterator it;
411                 bool needToRun = false;
412                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
413                         m->mothurOut("Your file does not include the label " + *it); 
414                         if (processedLabels.count(lastLabel) != 1) {
415                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
416                                 needToRun = true;
417                         }else {
418                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
419                         }
420                 }
421                 
422                 if (m->control_pressed) { 
423                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
424                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
425                         return 0;
426                 }
427                 
428                 //run last label if you need to
429                 if (needToRun == true)  {
430                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i]; }  } 
431                         lookup = input.getSharedRAbundVectors(lastLabel);
432             
433             if (thisSet == "") {  subset.clear(); subset = lookup;  }
434             else {//fill subset with this sets groups
435                 subset.clear();
436                 for (int i = 0; i < lookup.size(); i++) {
437                     if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
438                         subset.push_back(lookup[i]);
439                     }
440                 }
441             }
442             
443                         m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
444                         rCurve = new Rarefact(subset, rDisplays);
445                         rCurve->getSharedCurve(freq, nIters);
446                         delete rCurve;
447                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
448                 }
449                 
450                 for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }       
451
452         
453         return 0;
454     }
455         catch(exception& e) {
456                 m->errorOut(e, "RareFactSharedCommand", "process");
457                 exit(1);
458         }
459 }
460 //**********************************************************************************************************************
461 vector<string> RareFactSharedCommand::createGroupFile(vector<string>& outputNames) {
462         try {
463                 
464                 vector<string> newFileNames;
465                 
466                 //find different types of files
467                 map<string, map<string, string> > typesFiles;
468         map<string, vector< vector<string> > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci.
469         vector<string> groupNames;
470                 for (int i = 0; i < outputNames.size(); i++) {
471             
472                         string extension = m->getExtension(outputNames[i]);
473             string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
474                         m->mothurRemove(combineFileName); //remove old file
475             
476                         ifstream in;
477                         m->openInputFile(outputNames[i], in);
478                         
479                         string labels = m->getline(in);
480             
481                         istringstream iss (labels,istringstream::in);
482             string newLabel = ""; vector<string> theseLabels;
483             while(!iss.eof()) {  iss >> newLabel; m->gobble(iss); theseLabels.push_back(newLabel); }
484             vector< vector<string> > allLabels;
485             vector<string> thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping
486             for (int j = 1; j < theseLabels.size()-1; j++) {
487                 if (theseLabels[j+1] == "lci") {
488                     thisSet.push_back(theseLabels[j]); 
489                     thisSet.push_back(theseLabels[j+1]); 
490                     thisSet.push_back(theseLabels[j+2]);
491                     j++; j++;
492                 }else{ //no lci or hci for this calc.
493                     thisSet.push_back(theseLabels[j]); 
494                 }
495                 allLabels.push_back(thisSet); 
496                 thisSet.clear();
497             }
498             fileLabels[combineFileName] = allLabels;
499             
500             map<string, map<string, string> >::iterator itfind = typesFiles.find(extension);
501             if (itfind != typesFiles.end()) {
502                 (itfind->second)[outputNames[i]] = file2Group[i];
503             }else {
504                 map<string, string> temp;  
505                 temp[outputNames[i]] = file2Group[i];
506                 typesFiles[extension] = temp;
507             }
508             if (!(m->inUsersGroups(file2Group[i], groupNames))) {  groupNames.push_back(file2Group[i]); }
509                 }
510                 
511                 //for each type create a combo file
512                 
513                 for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
514                         
515                         ofstream out;
516                         string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
517                         m->openOutputFileAppend(combineFileName, out);
518                         newFileNames.push_back(combineFileName);
519                         map<string, string> thisTypesFiles = it->second; //it->second maps filename to group
520             set<int> numSampledSet;
521             
522                         //open each type summary file
523                         map<string, map<int, vector< vector<string> > > > files; //maps file name to lines in file
524                         int maxLines = 0;
525                         for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
526                 
527                 string thisfilename = itFileNameGroup->first;
528                 string group = itFileNameGroup->second;
529                 
530                                 ifstream temp;
531                                 m->openInputFile(thisfilename, temp);
532                                 
533                                 //read through first line - labels
534                                 m->getline(temp);       m->gobble(temp);
535                                 
536                                 map<int, vector< vector<string> > > thisFilesLines;
537                                 while (!temp.eof()){
538                     int numSampled = 0;
539                     temp >> numSampled; m->gobble(temp);
540                     
541                     vector< vector<string> > theseReads;
542                     vector<string> thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear();
543                     for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
544                         vector<string> reads;
545                         string next = "";
546                         for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
547                             temp >> next; m->gobble(temp);
548                             reads.push_back(next);
549                         }
550                         theseReads.push_back(reads);
551                     }
552                     thisFilesLines[numSampled] = theseReads;
553                     m->gobble(temp);
554                     
555                     numSampledSet.insert(numSampled);
556                                 }
557                                 
558                                 files[group] = thisFilesLines;
559                                 
560                                 //save longest file for below
561                                 if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
562                                 
563                                 temp.close();
564                                 m->mothurRemove(thisfilename);
565                         }
566                         
567             //output new labels line
568             out << fileLabels[combineFileName][0][0] << '\t';
569             for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
570                 for (int n = 0; n < groupNames.size(); n++) { // for each group
571                     for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
572                         out << fileLabels[combineFileName][k][l] << '-' << groupNames[n] << '\t';
573                     }
574                 }
575             }
576                         out << endl;
577             
578                         //for each label
579                         for (set<int>::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) {
580                                 
581                 out << (*itNumSampled) << '\t';
582                 
583                 if (m->control_pressed) { break; }
584                 
585                 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk
586                                     //grab data for each group
587                     for (map<string, map<int, vector< vector<string> > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) {
588                         
589                         string group = itFileNameGroup->first;
590                         
591                         map<int, vector< vector<string> > >::iterator itLine = files[group].find(*itNumSampled);
592                         if (itLine != files[group].end()) { 
593                             for (int l = 0; l < (itLine->second)[k].size(); l++) { 
594                                 out << (itLine->second)[k][l] << '\t';
595                                 
596                             }                             
597                         }else { 
598                             for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { 
599                                 out << "NA" << '\t';
600                             } 
601                         }
602                     }
603                 }
604                 out << endl;
605                         }       
606                         out.close();
607                 }
608                 
609                 //return combine file name
610                 return newFileNames;
611                 
612         }
613         catch(exception& e) {
614                 m->errorOut(e, "RareFactSharedCommand", "createGroupFile");
615                 exit(1);
616         }
617 }
618 //**********************************************************************************************************************