X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=classifyseqscommand.cpp;h=7ae2ee5041af97ff462909095aa2af0ec24e5b0d;hb=b447f829850ae054e42560c7c3ed71b14f3f40bb;hp=580dd9b6e056197bfc48cb39abcfb526237e1487;hpb=7a7870ab773b993d8d1fd89703b1df3beb47f8d4;p=mothur.git diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index 580dd9b..7ae2ee5 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -226,7 +226,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { } #endif - if (ableToOpen == 1) { m->mothurOut("Unable to match group file with fasta file."); m->mothurOutEndLine(); abort = true; } + if (ableToOpen == 1) { m->mothurOut("Unable to match group file with fasta file, not using " + groupfileNames[i] + "."); m->mothurOutEndLine(); groupfileNames[i] = ""; } } } @@ -413,8 +413,6 @@ int ClassifySeqsCommand::execute(){ //delete inFileName; if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPINewTax); MPI_File_close(&outMPITempTax); delete classify; return 0; } - - if(namefile != "") { MPIReadNamesFile(namefileNames[s]); } if (pid == 0) { //you are the root process @@ -465,22 +463,7 @@ int ClassifySeqsCommand::execute(){ MPI_File_close(&outMPITempTax); #else - //read namefile - if(namefile != "") { - nameMap.clear(); //remove old names - - ifstream inNames; - openInputFile(namefileNames[s], inNames); - - string firstCol, secondCol; - while(!inNames.eof()) { - inNames >> firstCol >> secondCol; gobble(inNames); - nameMap[firstCol] = getNumNames(secondCol); //ex. seq1 seq1,seq3,seq5 -> seq1 = 3. - } - inNames.close(); - } - - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors == 1){ ifstream inFASTA; openInputFile(fastaFileNames[s], inFASTA); @@ -543,18 +526,48 @@ int ClassifySeqsCommand::execute(){ #endif #endif + m->mothurOutEndLine(); + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); + start = time(NULL); + + #ifdef USE_MPI if (pid == 0) { //this part does not need to be paralellized + + if(namefile != "") { m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush(); MPIReadNamesFile(namefileNames[s]); m->mothurOut(" Done."); m->mothurOutEndLine(); } + #else + //read namefile + if(namefile != "") { + + m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush(); + + nameMap.clear(); //remove old names + + ifstream inNames; + openInputFile(namefileNames[s], inNames); + + string firstCol, secondCol; + while(!inNames.eof()) { + inNames >> firstCol >> secondCol; gobble(inNames); + + vector temp; + splitAtComma(secondCol, temp); + + nameMap[firstCol] = temp; + } + inNames.close(); + + m->mothurOut(" Done."); m->mothurOutEndLine(); + } #endif - m->mothurOutEndLine(); - m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); - start = time(NULL); + string group = ""; + if (groupfile != "") { group = groupfileNames[s]; } - PhyloSummary taxaSum(taxonomyFileName, groupfileNames[s]); + PhyloSummary taxaSum(taxonomyFileName, group); if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete classify; return 0; } - + if (namefile == "") { taxaSum.summarize(tempTaxonomyFile); } else { ifstream in; @@ -562,6 +575,7 @@ int ClassifySeqsCommand::execute(){ //read in users taxonomy file and add sequences to tree string name, taxon; + while(!in.eof()){ in >> name >> taxon; gobble(in); @@ -570,9 +584,11 @@ int ClassifySeqsCommand::execute(){ if (itNames == nameMap.end()) { m->mothurOut(name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1); }else{ - for (int i = 0; i < itNames->second; i++) { - taxaSum.addSeqToTree(name, taxon); //add it as many times as there are identical seqs + for (int i = 0; i < itNames->second.size(); i++) { + taxaSum.addSeqToTree(itNames->second[i], taxon); //add it as many times as there are identical seqs } + itNames->second.clear(); + nameMap.erase(itNames->first); } } in.close(); @@ -883,7 +899,11 @@ int ClassifySeqsCommand::MPIReadNamesFile(string nameFilename){ string firstCol, secondCol; while(!iss.eof()) { iss >> firstCol >> secondCol; gobble(iss); - nameMap[firstCol] = getNumNames(secondCol); //ex. seq1 seq1,seq3,seq5 -> seq1 = 3. + + vector temp; + splitAtComma(secondCol, temp); + + nameMap[firstCol] = temp; } MPI_File_close(&inMPI);