src/SeedMatchEnumerator.h

Go to the documentation of this file.
00001 #ifndef __SeedMatchEnumerator_h__
00002 #define __SeedMatchEnumerator_h__
00003 
00004 #include "libMems/MatchFinder.h"
00005 #include "libMems/RepeatHash.h"
00006 #include "libMems/MemHash.h"
00007 #include "libMems/MatchList.h"
00008 #include "libMems/SortedMerList.h"
00009 #include "libMems/Match.h"
00010 
00014 class SeedMatchEnumerator : public mems::MatchFinder 
00015 {
00016 public:
00017         virtual SeedMatchEnumerator* Clone() const;
00018 
00019         void FindMatches( mems::MatchList& match_list, size_t min_multi = 2, size_t max_multi = 1000, bool direct_repeats_only = false )
00020         {
00021         this->max_multiplicity = max_multi;
00022         this->min_multiplicity = min_multi;
00023         this->only_direct = direct_repeats_only;
00024                 for( size_t seqI = 0; seqI < match_list.seq_table.size(); ++seqI ){
00025                         if( !AddSequence( match_list.sml_table[ seqI ], match_list.seq_table[ seqI ] ) ){
00026                                 genome::ErrorMsg( "Error adding " + match_list.seq_filename[seqI] + "\n");
00027                                 return;
00028                         }
00029                 }
00030                 CreateMatches();
00031                 match_list.clear();
00032                 match_list.insert( match_list.end(), mlist.begin(), mlist.end() );
00033         }
00034 
00035         virtual boolean CreateMatches();
00036 protected:
00037 
00038         virtual boolean EnumerateMatches( mems::IdmerList& match_list );
00039         virtual boolean HashMatch(mems::IdmerList& match_list);
00040         virtual mems::SortedMerList* GetSar(uint32 sarI) const;
00041         mems::MatchList mlist;
00042         void SetDirection(mems::Match& mhe);
00043 private:
00044     //used to store rmin, rmax values
00045     size_t max_multiplicity;
00046     size_t min_multiplicity;
00047         bool only_direct;
00048 };
00049 
00050 SeedMatchEnumerator* SeedMatchEnumerator::Clone() const{
00051         return new SeedMatchEnumerator(*this);
00052 }
00053 
00054 inline
00055 mems::SortedMerList* SeedMatchEnumerator::GetSar(uint32 sarI) const{
00056         return sar_table[0];
00057 }
00058 
00059 boolean SeedMatchEnumerator::CreateMatches(){
00060         if(seq_count == 1){
00061                 MatchFinder::FindMatchSeeds();
00062                 return true;
00063         }
00064         return false;
00065 }
00066 
00067 boolean SeedMatchEnumerator::EnumerateMatches( mems::IdmerList& match_list ){
00068         return HashMatch(match_list);
00069 }
00070 
00071 boolean SeedMatchEnumerator::HashMatch(mems::IdmerList& match_list){
00072         //check that there is at least one forward component
00073         match_list.sort(&mems::idmer_position_lessthan);
00074         // initialize the hash entry
00075         mems::Match mhe = mems::Match( match_list.size() );
00076         mhe.SetLength( GetSar(0)->SeedLength() );
00077         
00078         //Fill in the new Match and set direction parity if needed.
00079         mems::IdmerList::iterator iter = match_list.begin();
00080     
00081         uint32 repeatI = 0;
00082         for(; iter != match_list.end(); iter++)
00083                 mhe.SetStart(repeatI++, iter->position + 1);
00084 
00085         SetDirection( mhe );
00086         bool found_reverse = false;
00087         vector< size_t > component_map;
00088         if(this->only_direct)
00089         {
00090                 for( uint seqI = 0; seqI < mhe.Multiplicity(); seqI++)
00091                 {
00092                         if (mhe.Orientation(seqI) == 0)
00093                                 component_map.push_back(seqI);
00094                         else
00095                                 found_reverse = true;
00096                 }
00097         }
00098         mems::MatchProjectionAdapter mpaa(mhe.Copy(),  component_map);
00099         if(mhe.Multiplicity() < 2){
00100                 std::cerr << "red flag " << mhe << "\n";
00101     }
00102     //use rmin & rmax to discard irrelevant seed matches
00103     else if(mhe.Multiplicity() > this->max_multiplicity || mhe.Multiplicity() < this->min_multiplicity )
00104     {
00105         ;
00106     }
00107         else if(this->only_direct && found_reverse)
00108         {
00109                 if ( mpaa.Multiplicity() > 1)
00110                 {
00111                         mems::Match new_mhe = mems::Match( mpaa.Multiplicity() );
00112                         new_mhe.SetLength( GetSar(0)->SeedLength() );
00113                         for(uint mult = 0; mult < mpaa.Multiplicity(); mult++)
00114                                 new_mhe.SetStart(mult, mpaa.Start(mult));
00115                         mlist.push_back(new_mhe.Copy());
00116                 }
00117         }
00118     else{
00119                 mlist.push_back(mhe.Copy());
00120                 
00121         }
00122         return true;
00123 }
00124 
00125 // evil, evil code duplication.
00126 
00127 void SeedMatchEnumerator::SetDirection(mems::Match& mhe){
00128         //get the reference direction
00129         boolean ref_forward = false;
00130         uint32 seqI=0;
00131         for(; seqI < mhe.SeqCount(); ++seqI)
00132                 if(mhe[seqI] != mems::NO_MATCH){
00133                         ref_forward = !(GetSar(seqI)->GetMer(mhe[seqI] - 1) & 0x1);
00134                         break;
00135                 }
00136         //set directional parity for the rest
00137         for(++seqI; seqI < mhe.SeqCount(); ++seqI)
00138                 if(mhe[seqI] != mems::NO_MATCH)
00139                         if(ref_forward == (GetSar(seqI)->GetMer(mhe[seqI] - 1) & 0x1))
00140                                 mhe.SetStart(seqI, -mhe[seqI]);
00141 }
00142 
00143 
00144 #endif  // __SeedMatchEnumerator_h__

Generated on Mon Aug 19 06:00:40 2013 for mauveAligner by doxygen 1.3.6