X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=sequenceparser.cpp;h=37891eb44a0b42c2b9f0879dfd084da1893c289b;hp=08e5ae8b859280da965566ea9bf4d6dce3b202c7;hb=050a3ff02473a3d4c0980964e1a9ebe52e55d6b8;hpb=bfc3401db792f7630a5bfe7aea44b4eb5bae6e6f diff --git a/sequenceparser.cpp b/sequenceparser.cpp index 08e5ae8..37891eb 100644 --- a/sequenceparser.cpp +++ b/sequenceparser.cpp @@ -59,7 +59,7 @@ SequenceParser::SequenceParser(string groupFile, string fastaFile, string nameFi in.close(); if (error == 1) { m->control_pressed = true; } - + //read name file ifstream inName; m->openInputFile(nameFile, inName); @@ -148,6 +148,78 @@ SequenceParser::SequenceParser(string groupFile, string fastaFile, string nameFi } } inName.close(); + + //in case file does not end in white space + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { //save one line + if (m->debug) { m->mothurOut("[DEBUG]: reading names: " + firstCol + '\t' + secondCol + ".\n"); } + vector names; + m->splitAtChar(secondCol, names, ','); + + //get aligned string for these seqs from the fasta file + string alignedString = ""; + map::iterator itAligned = seqName.find(names[0]); + if (itAligned == seqName.end()) { + error = 1; m->mothurOut("[ERROR]: " + names[0] + " is in your name file and not in your fasta file, please correct."); m->mothurOutEndLine(); + }else { + alignedString = itAligned->second; + } + + //separate by group - parse one line in name file + map splitMap; //group -> name1,name2,... + map::iterator it; + for (int i = 0; i < names.size(); i++) { + + string group = groupMap->getGroup(names[i]); + if (group == "not found") { error = 1; m->mothurOut("[ERROR]: " + names[i] + " is in your name file and not in your groupfile, please correct."); m->mothurOutEndLine(); } + else { + + it = splitMap.find(group); + if (it != splitMap.end()) { //adding seqs to this group + (it->second) += "," + names[i]; + thisnames1.insert(names[i]); + countName++; + }else { //first sighting of this group + splitMap[group] = names[i]; + countName++; + thisnames1.insert(names[i]); + + //is this seq in the fasta file? + if (i != 0) { //if not then we need to add a duplicate sequence to the seqs for this group so the new "fasta" and "name" files will match + Sequence tempSeq(names[i], alignedString); //get the first guys sequence string since he's in the fasta file. + seqs[group].push_back(tempSeq); + } + } + } + + allSeqsMap[names[i]] = names[0]; + } + + + //fill nameMapPerGroup - holds all lines in namefile separated by group + for (it = splitMap.begin(); it != splitMap.end(); it++) { + //grab first name + string firstName = ""; + for(int i = 0; i < (it->second).length(); i++) { + if (((it->second)[i]) != ',') { + firstName += ((it->second)[i]); + }else { break; } + } + + //group1 -> seq1 -> seq1,seq2,seq3 + nameMapPerGroup[it->first][firstName] = it->second; + } + + pairDone = false; + } + } + } if (error == 1) { m->control_pressed = true; } @@ -238,8 +310,6 @@ vector SequenceParser::getNamesOfGroups(){ return groupMap->getNamesOfGr /************************************************************/ bool SequenceParser::isValidGroup(string g){ return groupMap->isValidGroup(g); } /************************************************************/ -string SequenceParser::getGroup(string g){ return groupMap->getGroup(g); } -/************************************************************/ int SequenceParser::getNumSeqs(string g){ try { map >::iterator it; @@ -330,7 +400,7 @@ int SequenceParser::getSeqs(string g, string filename, bool uchimeFormat=false){ if(m->control_pressed) { out.close(); m->mothurRemove(filename); return 1; } - out << ">" << nameVector[i].name << "/ab=" << nameVector[i].numIdentical << "/" << endl << nameVector[i].seq << endl; + out << ">" << nameVector[i].name << "/ab=" << nameVector[i].numIdentical << "/" << endl << nameVector[i].seq << endl; // } }else {