From a0e9a8a746faf084e31624f80e69e3c9af9822ad Mon Sep 17 00:00:00 2001 From: westcott Date: Fri, 11 Mar 2011 16:55:27 +0000 Subject: [PATCH] added summary outputs for align.check --- screenseqscommand.cpp | 4 +-- secondarystructurecommand.cpp | 53 ++++++++++++++++++++++++++++++++++- seqsummarycommand.cpp | 15 +++++----- 3 files changed, 62 insertions(+), 10 deletions(-) diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index 937959c..e6d29c1 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -634,10 +634,10 @@ int ScreenSeqsCommand::getSummary(vector& positions){ for (int i = 0; i < optimize.size(); i++) { if (optimize[i] == "start") { startPos = startPosition[criteriaPercentile]; m->mothurOut("Optimizing start to " + toString(startPos) + "."); m->mothurOutEndLine(); } - else if (optimize[i] == "end") { int endcriteriaPercentile = int(numSeqs * ((100 - criteria) / (float) 100)); endPos = endPosition[endcriteriaPercentile]; m->mothurOut("Optimizing end to " + toString(endPos) + "."); m->mothurOutEndLine();} + else if (optimize[i] == "end") { int endcriteriaPercentile = int(endPosition.size() * ((100 - criteria) / (float) 100)); endPos = endPosition[endcriteriaPercentile]; m->mothurOut("Optimizing end to " + toString(endPos) + "."); m->mothurOutEndLine();} else if (optimize[i] == "maxambig") { maxAmbig = ambigBases[criteriaPercentile]; m->mothurOut("Optimizing maxambig to " + toString(maxAmbig) + "."); m->mothurOutEndLine(); } else if (optimize[i] == "maxhomop") { maxHomoP = longHomoPolymer[criteriaPercentile]; m->mothurOut("Optimizing maxhomop to " + toString(maxHomoP) + "."); m->mothurOutEndLine(); } - else if (optimize[i] == "minlength") { int mincriteriaPercentile = int(numSeqs * ((100 - criteria) / (float) 100)); minLength = seqLength[mincriteriaPercentile]; m->mothurOut("Optimizing minlength to " + toString(minLength) + "."); m->mothurOutEndLine(); } + else if (optimize[i] == "minlength") { int mincriteriaPercentile = int(seqLength.size() * ((100 - criteria) / (float) 100)); minLength = seqLength[mincriteriaPercentile]; m->mothurOut("Optimizing minlength to " + toString(minLength) + "."); m->mothurOutEndLine(); } else if (optimize[i] == "maxlength") { maxLength = seqLength[criteriaPercentile]; m->mothurOut("Optimizing maxlength to " + toString(maxLength) + "."); m->mothurOutEndLine(); } } diff --git a/secondarystructurecommand.cpp b/secondarystructurecommand.cpp index 2b474c1..ece2de0 100644 --- a/secondarystructurecommand.cpp +++ b/secondarystructurecommand.cpp @@ -186,7 +186,15 @@ int AlignCheckCommand::execute(){ out << "name" << '\t' << "pound" << '\t' << "dash" << '\t' << "plus" << '\t' << "equal" << '\t'; out << "loop" << '\t' << "tilde" << '\t' << "total" << '\t' << "numseqs" << endl; + vector pound; + vector dash; + vector plus; + vector equal; + vector loop; + vector tilde; + vector total; + int count = 0; while(!in.eof()){ if (m->control_pressed) { in.close(); out.close(); remove(outfile.c_str()); return 0; } @@ -194,7 +202,7 @@ int AlignCheckCommand::execute(){ if (seq.getName() != "") { statData data = getStats(seq.getAligned()); - if (haderror == 1) { break; } + if (haderror == 1) { m->control_pressed = true; break; } int num = 1; if (namefile != "") { @@ -205,6 +213,18 @@ int AlignCheckCommand::execute(){ else { num = it->second; } } + //for each sequence this sequence represents + for (int i = 0; i < num; i++) { + pound.push_back(data.pound); + dash.push_back(data.dash); + plus.push_back(data.plus); + equal.push_back(data.equal); + loop.push_back(data.loop); + tilde.push_back(data.tilde); + total.push_back(data.total); + } + count++; + out << seq.getName() << '\t' << data.pound << '\t' << data.dash << '\t' << data.plus << '\t' << data.equal << '\t'; out << data.loop << '\t' << data.tilde << '\t' << data.total << '\t' << num << endl; } @@ -215,6 +235,37 @@ int AlignCheckCommand::execute(){ if (m->control_pressed) { remove(outfile.c_str()); return 0; } + sort(pound.begin(), pound.end()); + sort(dash.begin(), dash.end()); + sort(plus.begin(), plus.end()); + sort(equal.begin(), equal.end()); + sort(loop.begin(), loop.end()); + sort(tilde.begin(), tilde.end()); + sort(total.begin(), total.end()); + int size = pound.size(); + + int ptile0_25 = int(size * 0.025); + int ptile25 = int(size * 0.250); + int ptile50 = int(size * 0.500); + int ptile75 = int(size * 0.750); + int ptile97_5 = int(size * 0.975); + int ptile100 = size - 1; + + if (m->control_pressed) { remove(outfile.c_str()); return 0; } + + m->mothurOutEndLine(); + m->mothurOut("\t\tPound\tDash\tPlus\tEqual\tLoop\tTilde\tTotal"); m->mothurOutEndLine(); + m->mothurOut("Minimum:\t" + toString(pound[0]) + "\t" + toString(dash[0]) + "\t" + toString(plus[0]) + "\t" + toString(equal[0]) + "\t" + toString(loop[0]) + "\t" + toString(tilde[0]) + "\t" + toString(total[0])); m->mothurOutEndLine(); + m->mothurOut("2.5%-tile:\t" + toString(pound[ptile0_25]) + "\t" + toString(dash[ptile0_25]) + "\t" + toString(plus[ptile0_25]) + "\t" + toString(equal[ptile0_25]) + "\t"+ toString(loop[ptile0_25]) + "\t"+ toString(tilde[ptile0_25]) + "\t"+ toString(total[ptile0_25])); m->mothurOutEndLine(); + m->mothurOut("25%-tile:\t" + toString(pound[ptile25]) + "\t" + toString(dash[ptile25]) + "\t" + toString(plus[ptile25]) + "\t" + toString(equal[ptile25]) + "\t" + toString(loop[ptile25]) + "\t" + toString(tilde[ptile25]) + "\t" + toString(total[ptile25])); m->mothurOutEndLine(); + m->mothurOut("Median: \t" + toString(pound[ptile50]) + "\t" + toString(dash[ptile50]) + "\t" + toString(plus[ptile50]) + "\t" + toString(equal[ptile50]) + "\t" + toString(loop[ptile50]) + "\t" + toString(tilde[ptile50]) + "\t" + toString(total[ptile50])); m->mothurOutEndLine(); + m->mothurOut("75%-tile:\t" + toString(pound[ptile75]) + "\t" + toString(dash[ptile75]) + "\t" + toString(plus[ptile75]) + "\t" + toString(equal[ptile75]) + "\t" + toString(loop[ptile75]) + "\t" + toString(tilde[ptile75]) + "\t" + toString(total[ptile75])); m->mothurOutEndLine(); + m->mothurOut("97.5%-tile:\t" + toString(pound[ptile97_5]) + "\t" + toString(dash[ptile97_5]) + "\t" + toString(plus[ptile97_5]) + "\t" + toString(equal[ptile97_5]) + "\t" + toString(loop[ptile97_5]) + "\t" + toString(tilde[ptile97_5]) + "\t" + toString(total[ptile97_5])); m->mothurOutEndLine(); + m->mothurOut("Maximum:\t" + toString(pound[ptile100]) + "\t" + toString(dash[ptile100]) + "\t" + toString(plus[ptile100]) + "\t" + toString(equal[ptile100]) + "\t" + toString(loop[ptile100]) + "\t" + toString(tilde[ptile100]) + "\t" + toString(total[ptile100])); m->mothurOutEndLine(); + if (namefile == "") { m->mothurOut("# of Seqs:\t" + toString(count)); m->mothurOutEndLine(); } + else { m->mothurOut("# of unique seqs:\t" + toString(count)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(size)); m->mothurOutEndLine(); } + + m->mothurOutEndLine(); m->mothurOut("Output File Name: "); m->mothurOutEndLine(); m->mothurOut(outfile); m->mothurOutEndLine(); outputNames.push_back(outfile); outputTypes["aligncheck"].push_back(outfile); diff --git a/seqsummarycommand.cpp b/seqsummarycommand.cpp index c0f9196..8c13e7f 100644 --- a/seqsummarycommand.cpp +++ b/seqsummarycommand.cpp @@ -328,13 +328,14 @@ int SeqSummaryCommand::execute(){ sort(seqLength.begin(), seqLength.end()); sort(ambigBases.begin(), ambigBases.end()); sort(longHomoPolymer.begin(), longHomoPolymer.end()); - - int ptile0_25 = int(numSeqs * 0.025); - int ptile25 = int(numSeqs * 0.250); - int ptile50 = int(numSeqs * 0.500); - int ptile75 = int(numSeqs * 0.750); - int ptile97_5 = int(numSeqs * 0.975); - int ptile100 = numSeqs - 1; + int size = startPosition.size(); + + int ptile0_25 = int(size * 0.025); + int ptile25 = int(size * 0.250); + int ptile50 = int(size * 0.500); + int ptile75 = int(size * 0.750); + int ptile97_5 = int(size * 0.975); + int ptile100 = size - 1; //to compensate for blank sequences that would result in startPosition and endPostion equalling -1 if (startPosition[0] == -1) { startPosition[0] = 0; } -- 2.39.2