]> git.donarmstrong.com Git - mothur.git/blobdiff - parselistcommand.cpp
added logfile feature
[mothur.git] / parselistcommand.cpp
index 5379052d6dd4f1bbfe51b3b346874def78a7b841..0fdcf9ad2dd3e76ed04630096895cf9c81385d81 100644 (file)
@@ -15,68 +15,66 @@ ParseListCommand::ParseListCommand(){
                globaldata = GlobalData::getInstance();
                
                //read in group map info.
-               groupMap = new GroupMap(globaldata->getGroupFile());
-               groupMap->readMap();
-                       
+               //groupMap = new GroupMap(globaldata->getGroupFile());
+               //groupMap->readMap();
+               groupMap = globaldata->gGroupmap;
+
                //fill filehandles with neccessary ofstreams
                int i;
                ofstream* temp;
+               SharedListVector* templist;
                for (i=0; i<groupMap->getNumGroups(); i++) {
                        temp = new ofstream;
+                       templist = new SharedListVector();
                        filehandles[groupMap->namesOfGroups[i]] = temp;
+                       mapOfLists[groupMap->namesOfGroups[i]] = templist;
                }
                
                //set fileroot
                fileroot = getRootName(globaldata->getListFile());
                
-               //open output list files
-               for (i=0; i<groupMap->getNumGroups(); i++) {//opens an output file for each group
-                       openOutputFile(fileroot + groupMap->namesOfGroups[i] + ".list", *(filehandles[groupMap->namesOfGroups[i]]));
+               //clears file before we start to write to it below
+               for (int i=0; i<groupMap->getNumGroups(); i++) {
+                       remove((fileroot + groupMap->namesOfGroups[i] + ".list").c_str());
                }
+       
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the ParseListCommand class function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               errorOut(e, "ParseListCommand", "ParseListCommand");
                exit(1);
        }
+       
 }
 /***********************************************************************/
-void ParseListCommand::parse(int index) {
+void ParseListCommand::parse(int index, SharedListVector* list) {
        try {
-               string prefix, suffix, groupsName;
-               suffix = list->get(index);
-       
-               while (suffix.find_first_of(',') != -1) {//while you still have sequences
-                       prefix = suffix.substr(0,suffix.find_first_of(','));
-                       if ((suffix.find_first_of(',')+1) <= suffix.length()) {  //checks to make sure you don't have comma at end of string
-                               suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
+               string member, bin, groupName;
+               bin = list->get(index);
+               
+               while (bin.find_first_of(',') != -1) {//while you still have sequences
+                       member = bin.substr(0,bin.find_first_of(','));
+                       if ((bin.find_first_of(',')+1) <= bin.length()) {  //checks to make sure you don't have comma at end of string
+                               bin = bin.substr(bin.find_first_of(',')+1, bin.length());
                        }
                        
-                       groupsName = groupMap->getGroup(prefix);
-                       if (groupsName != "not found") {
-                               listGroups[groupsName] = listGroups[groupsName] + "," + prefix; //adds prefix to the correct group.
+                       groupName = groupMap->getGroup(member);
+                       if (groupName != "not found") {
+                               listGroups[groupName] = listGroups[groupName] + "," + member; //adds prefix to the correct group.
                        }else {
-                               cerr << "Error: Sequence '" << prefix << "' was not found in the group file, please correct\n";
+                               mothurOut("Error: Sequence '" + toString(member) + "' was not found in the group file, please correct\n");
                        }
                }
                
                //save last name after comma
-               groupsName = groupMap->getGroup(suffix);
-               if (groupsName != "not found") {
-                       listGroups[groupsName] = listGroups[groupsName] + "," + suffix; //adds prefix to the correct group.
+               groupName = groupMap->getGroup(bin);
+               if (groupName != "not found") {
+                       listGroups[groupName] = listGroups[groupName] + "," + bin; //adds prefix to the correct group.
                }else {
-                       cerr << "Error: Sequence '" << suffix << "' was not found in the group file, please correct\n";
+                       mothurOut("Error: Sequence '" + toString(bin) + "' was not found in the group file, please correct\n");
                }
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the ParseListCommand class function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               errorOut(e, "ParseListCommand", "parse");
                exit(1);
        }
 }
@@ -85,66 +83,128 @@ void ParseListCommand::parse(int index) {
 
 int ParseListCommand::execute(){
        try{
-                       globaldata = GlobalData::getInstance();
+               
+                       int count = 1;
                        
                        //read in listfile
-                       read = new ReadPhilFile(globaldata->inputFileName);     
+                       read = new ReadOTUFile(globaldata->inputFileName);      
                        read->read(&*globaldata); 
                        input = globaldata->ginput;
                        list = globaldata->gSharedList;
-
-                       //read in group map info.
-                       groupMap = new GroupMap(globaldata->getGroupFile());
-                       groupMap->readMap();
-                       
-                       string seq, label;
-                       int i;
-                       //create new list vectors to fill with parsed data
-                       for (i=0; i<groupMap->getNumGroups(); i++) {
-                               groupOfLists[groupMap->namesOfGroups[i]] = new SharedListVector();
-                       }
-                       
+                       string lastLabel = list->getLabel();
+               
+                       //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+                       set<string> processedLabels;
+                       set<string> userLabels = globaldata->labels;
+                       set<int> userLines = globaldata->lines;
+               
                        //parses and sets each groups listvector
-                       while(list != NULL){
-                               label = list->getLabel();
-                               for(i=0; i<list->size(); i++) {
-                                       parse(i); //parses data[i] list of sequence names
-                                       for (it=listGroups.begin(); it != listGroups.end(); it++) {  //loop through map and set new list vectors
-                                               seq = it->second;
-                                               seq = seq.substr(1, seq.length()); //rips off extra comma
-                                               groupOfLists[it->first]->push_back(seq); //sets new listvector for each group
-                                       }
-                                       listGroups.clear();
+                       //as long as you are not at the end of the file or done wih the lines you want
+                       while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
+                                                               
+                               if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lastLabel) == 1){
+                                       mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine();
+                                       process(list);
+                                       
+                                       processedLabels.insert(list->getLabel());
+                                       userLabels.erase(list->getLabel());
+                                       userLines.erase(count);
                                }
-                               //prints each new list file
-                               for (i=0; i<groupMap->getNumGroups(); i++) {
-                                       groupOfLists[groupMap->namesOfGroups[i]]->setLabel(label);
-                                       groupOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]]));
-                                       groupOfLists[groupMap->namesOfGroups[i]]->clear();
+                               
+                               if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       delete list;
+                                       list = input->getSharedListVector(lastLabel);
+                                       
+                                       mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine();
+                                       process(list);
+                                       
+                                       processedLabels.insert(list->getLabel());
+                                       userLabels.erase(list->getLabel());
                                }
+
+                               
+                               lastLabel = list->getLabel();                   
+                               
+                               delete list;
                                list = input->getSharedListVector();
+                               count++;
                        }
                        
-                       //set groupmap for .shared commands
-                       globaldata->gGroupmap = groupMap; 
+                       //output error messages about any remaining user labels
+                       set<string>::iterator it;
+                       bool needToRun = false;
+                       for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                               mothurOut("Your file does not include the label " + *it); 
+                               if (processedLabels.count(lastLabel) != 1) {
+                                       mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
+                                       needToRun = true;
+                               }else {
+                                       mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
+                               }
+                       }
+               
+                       //run last line if you need to
+                       if (needToRun == true)  {
+                               delete list;
+                               list = input->getSharedListVector(lastLabel);
+                                       
+                               mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine();
+                               process(list);
+                               delete list;
+                       }
+                       
+                       globaldata->gSharedList = NULL;
+                       //delete list vectors to fill with parsed data
+                       for (it2 = mapOfLists.begin(); it2 != mapOfLists.end(); it2++) {
+                               delete it2->second;
+                       }
+                       for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
+                               delete it2->second;
+                       }
+                       
+                       delete input;  globaldata->ginput = NULL;
+                       delete read;
+
                        
                        return 0;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               errorOut(e, "ParseListCommand", "execute");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the ParseListCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-
 }
 //**********************************************************************************************************************
 
 ParseListCommand::~ParseListCommand(){
-       delete list;
-       delete input;
-       delete read;    
+
+                       
 }
 //**********************************************************************************************************************
+void ParseListCommand::process(SharedListVector* thisList) {
+       try {
+                       string seq;
+
+                       for(int i=0; i<thisList->size(); i++) {
+                               parse(i, thisList); //parses data[i] list of sequence names
+                               for (it=listGroups.begin(); it != listGroups.end(); it++) {  //loop through map and set new list vectors
+                                       seq = it->second;
+                                       seq = seq.substr(1, seq.length()); //rips off extra comma
+                                       mapOfLists[it->first]->push_back(seq); //sets new listvector for each group
+                               }
+                               listGroups.clear();
+                       }
+                       //prints each new list file
+                       for (int i=0; i<groupMap->getNumGroups(); i++) {
+                               openOutputFileAppend(fileroot + groupMap->namesOfGroups[i] + ".list", *(filehandles[groupMap->namesOfGroups[i]]));
+                               mapOfLists[groupMap->namesOfGroups[i]]->setLabel(thisList->getLabel());
+                               mapOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]]));
+                               mapOfLists[groupMap->namesOfGroups[i]]->clear();
+                               (*(filehandles[groupMap->namesOfGroups[i]])).close();
+                       }
+
+       }
+       catch(exception& e) {
+               errorOut(e, "ParseListCommand", "process");
+               exit(1);
+       }
+}