1 #ifndef Mothur_sequencecountparser_h
2 #define Mothur_sequencecountparser_h
5 // sequencecountparser.h
8 // Created by Sarah Westcott on 8/7/12.
9 // Copyright (c) 2012 Schloss Lab. All rights reserved.
13 #include "mothurout.h"
14 #include "sequence.hpp"
15 #include "counttable.h"
17 /* This class reads a fasta and count file and parses the data by group. The countfile must contain group information.
19 Note: The sum of all the groups unique sequences will be larger than the original number of unique sequences.
20 This is because when we parse the count file we make a unique for each group instead of 1 unique for all
25 class SequenceCountParser {
29 SequenceCountParser(string, string); //count, fasta - file mismatches will set m->control_pressed = true
30 SequenceCountParser(string, CountTable&); //fasta, counttable - file mismatches will set m->control_pressed = true
31 ~SequenceCountParser();
35 vector<string> getNamesOfGroups();
37 int getNumSeqs(string); //returns the number of unique sequences in a specific group
38 vector<Sequence> getSeqs(string); //returns unique sequences in a specific group
39 map<string, int> getCountTable(string); //returns seqName -> numberOfRedundantSeqs for a specific group - the count file format, but each line is parsed by group.
41 int getSeqs(string, string, bool); //prints unique sequences in a specific group to a file - group, filename, uchimeFormat=false
42 int getCountTable(string, string); //print seqName -> numberRedundantSeqs for a specific group - group, filename
44 map<string, string> getAllSeqsMap(){ return allSeqsMap; } //returns map where the key=sequenceName and the value=representativeSequence - helps us remove duplicates after group by group processing
47 CountTable countTable;
51 map<string, string> allSeqsMap;
52 map<string, vector<Sequence> > seqs; //a vector for each group
53 map<string, map<string, int> > countTablePerGroup; //countTable for each group
54 vector<string> namesOfGroups;