1 // ***************************************************************************
2 // SamSequenceDictionary.cpp (c) 2010 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 16 October 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides methods for operating on a collection of SamSequence entries.
8 // *************************************************************************
10 #include "api/SamSequenceDictionary.h"
11 using namespace BamTools;
16 /*! \class BamTools::SamSequenceDictionary
17 \brief Container of SamSequence entries.
19 Provides methods for operating on a collection of SamSequence entries.
22 /*! \fn SamSequenceDictionary::SamSequenceDictionary(void)
25 SamSequenceDictionary::SamSequenceDictionary(void) { }
27 /*! \fn SamSequenceDictionary::SamSequenceDictionary(const SamSequenceDictionary& other)
28 \brief copy constructor
30 SamSequenceDictionary::SamSequenceDictionary(const SamSequenceDictionary& other)
31 : m_data(other.m_data)
32 , m_lookupData(other.m_lookupData)
35 /*! \fn SamSequenceDictionary::~SamSequenceDictionary(void)
38 SamSequenceDictionary::~SamSequenceDictionary(void) { }
40 /*! \fn void SamSequenceDictionary::Add(const SamSequence& sequence)
41 \brief Appends a sequence to the dictionary.
43 Duplicate entries are silently discarded.
45 \param[in] sequence entry to be added
47 void SamSequenceDictionary::Add(const SamSequence& sequence) {
48 if ( IsEmpty() || !Contains(sequence) ) {
49 m_data.push_back(sequence);
50 m_lookupData[sequence.Name] = m_data.size() - 1;
54 /*! \fn void SamSequenceDictionary::Add(const std::string& name, const int& length)
55 \brief Appends a sequence to the dictionary.
57 This is an overloaded function.
59 \param[in] name name of sequence entry to be added
60 \param[in] length length of sequence entry to be added
63 void SamSequenceDictionary::Add(const std::string& name, const int& length) {
64 Add( SamSequence(name, length) );
67 /*! \fn void SamSequenceDictionary::Add(const SamSequenceDictionary& sequences)
68 \brief Appends another sequence dictionary to this one
70 This is an overloaded function.
72 \param[in] sequences sequence dictionary to be appended
75 void SamSequenceDictionary::Add(const SamSequenceDictionary& sequences) {
76 SamSequenceConstIterator seqIter = sequences.ConstBegin();
77 SamSequenceConstIterator seqEnd = sequences.ConstEnd();
78 for ( ; seqIter != seqEnd; ++seqIter )
82 /*! \fn void SamSequenceDictionary::Add(const std::vector<SamSequence>& sequences)
83 \brief Appends multiple sequences to the dictionary.
85 This is an overloaded function.
87 \param[in] sequences entries to be added
90 void SamSequenceDictionary::Add(const std::vector<SamSequence>& sequences) {
91 vector<SamSequence>::const_iterator seqIter = sequences.begin();
92 vector<SamSequence>::const_iterator seqEnd = sequences.end();
93 for ( ; seqIter!= seqEnd; ++seqIter )
97 /*! \fn void SamSequenceDictionary::Add(const std::map<std::string, int>& sequenceMap)
98 \brief Appends multiple sequences to the dictionary.
100 This is an overloaded function.
102 \param[in] sequenceMap map of sequence entries (name => length) to be added
105 void SamSequenceDictionary::Add(const std::map<std::string, int>& sequenceMap) {
106 map<string, int>::const_iterator seqIter = sequenceMap.begin();
107 map<string, int>::const_iterator seqEnd = sequenceMap.end();
108 for ( ; seqIter != seqEnd; ++seqIter ) {
109 const string& name = (*seqIter).first;
110 const int& length = (*seqIter).second;
111 Add( SamSequence(name, length) );
115 /*! \fn SamSequenceIterator SamSequenceDictionary::Begin(void)
116 \return an STL iterator pointing to the first sequence
117 \sa ConstBegin(), End()
119 SamSequenceIterator SamSequenceDictionary::Begin(void) {
120 return m_data.begin();
123 /*! \fn SamSequenceConstIterator SamSequenceDictionary::Begin(void) const
124 \return an STL const_iterator pointing to the first sequence
126 This is an overloaded function.
128 \sa ConstBegin(), End()
130 SamSequenceConstIterator SamSequenceDictionary::Begin(void) const {
131 return m_data.begin();
134 /*! \fn void SamSequenceDictionary::Clear(void)
135 \brief Clears all sequence entries.
137 void SamSequenceDictionary::Clear(void) {
139 m_lookupData.clear();
142 /*! \fn SamSequenceConstIterator SamSequenceDictionary::ConstBegin(void) const
143 \return an STL const_iterator pointing to the first sequence
144 \sa Begin(), ConstEnd()
146 SamSequenceConstIterator SamSequenceDictionary::ConstBegin(void) const {
147 return m_data.begin();
150 /*! \fn SamSequenceConstIterator SamSequenceDictionary::ConstEnd(void) const
151 \return an STL const_iterator pointing to the imaginary entry after the last sequence
152 \sa End(), ConstBegin()
154 SamSequenceConstIterator SamSequenceDictionary::ConstEnd(void) const {
158 /*! \fn bool SamSequenceDictionary::Contains(const std::string& sequenceName) const
159 \brief Returns true if dictionary contains sequence.
161 \param[in] sequenceName search for sequence matching this name
162 \return \c true if dictionary contains a sequence with this name
164 bool SamSequenceDictionary::Contains(const std::string& sequenceName) const {
165 return ( m_lookupData.find(sequenceName) != m_lookupData.end() );
168 /*! \fn bool SamSequenceDictionary::Contains(const SamSequence& sequence) const
169 \brief Returns true if dictionary contains sequence (matches on name).
171 This is an overloaded function.
173 \param[in] sequence search for this sequence
174 \return \c true if dictionary contains sequence (matching on name)
176 bool SamSequenceDictionary::Contains(const SamSequence& sequence) const {
177 return Contains(sequence.Name);
180 /*! \fn SamSequenceIterator SamSequenceDictionary::End(void)
181 \return an STL iterator pointing to the imaginary entry after the last sequence
182 \sa Begin(), ConstEnd()
184 SamSequenceIterator SamSequenceDictionary::End(void) {
188 /*! \fn SamSequenceConstIterator SamSequenceDictionary::End(void) const
189 \return an STL const_iterator pointing to the imaginary entry after the last sequence
191 This is an overloaded function.
193 \sa Begin(), ConstEnd()
195 SamSequenceConstIterator SamSequenceDictionary::End(void) const {
199 /*! \fn bool SamSequenceDictionary::IsEmpty(void) const
200 \brief Returns \c true if dictionary contains no sequences
203 bool SamSequenceDictionary::IsEmpty(void) const {
204 return m_data.empty();
207 /*! \fn void SamSequenceDictionary::Remove(const SamSequence& sequence)
208 \brief Removes sequence from dictionary, if found (matches on name).
210 This is an overloaded function.
212 \param[in] sequence SamSequence to remove (matching on name)
214 void SamSequenceDictionary::Remove(const SamSequence& sequence) {
215 Remove(sequence.Name);
218 /*! \fn void SamSequenceDictionary::Remove(const std::string& sequenceName)
219 \brief Removes sequence from dictionary, if found.
221 \param[in] sequenceName name of sequence to remove
224 void SamSequenceDictionary::Remove(const std::string& sequenceName) {
226 // skip if empty dictionary or if name unknown
227 if ( IsEmpty() || !Contains(sequenceName) )
230 // update 'lookup index' for every entry after @sequenceName
231 const size_t indexToRemove = m_lookupData[sequenceName];
232 const size_t numEntries = m_data.size();
233 for ( size_t i = indexToRemove+1; i < numEntries; ++i ) {
234 const SamSequence& sq = m_data.at(i);
235 --m_lookupData[sq.Name];
238 // erase entry from containers
239 m_data.erase( Begin() + indexToRemove );
240 m_lookupData.erase(sequenceName);
243 /*! \fn void SamSequenceDictionary::Remove(const std::vector<SamSequence>& sequences)
244 \brief Removes multiple sequences from dictionary.
246 This is an overloaded function.
248 \param[in] sequences sequences to remove
251 void SamSequenceDictionary::Remove(const std::vector<SamSequence>& sequences) {
252 vector<SamSequence>::const_iterator rgIter = sequences.begin();
253 vector<SamSequence>::const_iterator rgEnd = sequences.end();
254 for ( ; rgIter!= rgEnd; ++rgIter )
258 /*! \fn void SamSequenceDictionary::Remove(const std::vector<std::string>& sequenceNames)
259 \brief Removes multiple sequences from dictionary.
261 This is an overloaded function.
263 \param[in] sequenceNames names of the sequences to remove
266 void SamSequenceDictionary::Remove(const std::vector<std::string>& sequenceNames) {
267 vector<string>::const_iterator rgIter = sequenceNames.begin();
268 vector<string>::const_iterator rgEnd = sequenceNames.end();
269 for ( ; rgIter!= rgEnd; ++rgIter )
273 /*! \fn int SamSequenceDictionary::Size(void) const
274 \brief Returns number of sequences in dictionary.
277 int SamSequenceDictionary::Size(void) const {
278 return m_data.size();
281 /*! \fn SamSequence& SamSequenceDictionary::operator[](const std::string& sequenceName)
282 \brief Retrieves the modifiable SamSequence that matches \a sequenceName.
284 \note If the dictionary contains no sequence matching this name, this function inserts
285 a new one with this name (length:0), and returns a reference to it. If you want to avoid
286 this insertion behavior, check the result of Contains() before using this operator.
288 \param[in] sequenceName name of sequence to retrieve
289 \return a modifiable reference to the SamSequence associated with the name
291 SamSequence& SamSequenceDictionary::operator[](const std::string& sequenceName) {
293 if ( !Contains(sequenceName) ) {
294 SamSequence seq(sequenceName, 0);
295 m_data.push_back(seq);
296 m_lookupData[sequenceName] = m_data.size() - 1;
299 const size_t index = m_lookupData[sequenceName];
300 return m_data.at(index);