X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sequencecountparser.h;fp=sequencecountparser.h;h=4889ea6c3f853cbd6bdbdb3fe3f778b10ff29f24;hb=c7e8c2d15bd7cedcfdf18675cb0ea1a0dcd0e3c0;hp=0000000000000000000000000000000000000000;hpb=28bcfc4a41b8b82f66636587e0d4d355d07cbdd1;p=mothur.git diff --git a/sequencecountparser.h b/sequencecountparser.h new file mode 100644 index 0000000..4889ea6 --- /dev/null +++ b/sequencecountparser.h @@ -0,0 +1,59 @@ +#ifndef Mothur_sequencecountparser_h +#define Mothur_sequencecountparser_h + +// +// sequencecountparser.h +// Mothur +// +// Created by Sarah Westcott on 8/7/12. +// Copyright (c) 2012 Schloss Lab. All rights reserved. +// + +#include "mothur.h" +#include "mothurout.h" +#include "sequence.hpp" +#include "counttable.h" + +/* This class reads a fasta and count file and parses the data by group. The countfile must contain group information. + + Note: The sum of all the groups unique sequences will be larger than the original number of unique sequences. + This is because when we parse the count file we make a unique for each group instead of 1 unique for all + groups. + + */ + +class SequenceCountParser { + +public: + + SequenceCountParser(string, string); //count, fasta - file mismatches will set m->control_pressed = true + SequenceCountParser(string, CountTable&); //fasta, counttable - file mismatches will set m->control_pressed = true + ~SequenceCountParser(); + + //general operations + int getNumGroups(); + vector getNamesOfGroups(); + + int getNumSeqs(string); //returns the number of unique sequences in a specific group + vector getSeqs(string); //returns unique sequences in a specific group + map getCountTable(string); //returns seqName -> numberOfRedundantSeqs for a specific group - the count file format, but each line is parsed by group. + + int getSeqs(string, string, bool); //prints unique sequences in a specific group to a file - group, filename, uchimeFormat=false + int getCountTable(string, string); //print seqName -> numberRedundantSeqs for a specific group - group, filename + + map getAllSeqsMap(){ return allSeqsMap; } //returns map where the key=sequenceName and the value=representativeSequence - helps us remove duplicates after group by group processing +private: + + CountTable countTable; + MothurOut* m; + + int numSeqs; + map allSeqsMap; + map > seqs; //a vector for each group + map > countTablePerGroup; //countTable for each group + vector namesOfGroups; +}; + + + +#endif