00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef __CompactGappedAlignment_h__
00010 #define __CompactGappedAlignment_h__
00011
00012 #ifdef HAVE_CONFIG_H
00013 #include "config.h"
00014 #endif
00015
00016 #include "libGenome/gnDebug.h"
00017 #include "libGenome/gnFilter.h"
00018 #include "libGenome/gnSequence.h"
00019 #include "libMems/SparseAbstractMatch.h"
00020 #include "libMems/HybridAbstractMatch.h"
00021 #include "libMems/AbstractGappedAlignment.h"
00022 #include "libMems/UngappedLocalAlignment.h"
00023
00024 #include <algorithm>
00025
00026 #ifdef WIN32
00027 #include "windows.h"
00028 #endif
00029
00030 namespace mems {
00031
00037 template< class BaseType = AbstractGappedAlignment< HybridAbstractMatch<> > >
00038 class CompactGappedAlignment : public BaseType
00039 {
00040 public:
00041 CompactGappedAlignment() : BaseType(){};
00042 CompactGappedAlignment( uint seq_count, gnSeqI align_length );
00043 CompactGappedAlignment( std::vector< bitset_t >& aln_mat, gnSeqI alignment_length );
00044
00045 template< class MatchType >
00046 CompactGappedAlignment( MatchType& m ) :
00047 BaseType( m.SeqCount(), m.AlignmentLength() ),
00048 bcount( std::vector< std::vector< size_t > >( m.SeqCount() ) )
00049 {
00050 m.GetAlignment(align_matrix);
00051
00052 for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
00053 {
00054 this->SetStart(seqI, m.Start(seqI));
00055 if( m.Start(seqI) != NO_MATCH )
00056 this->SetLength(m.Length(seqI), seqI);
00057 else
00058 this->SetLength(0, seqI);
00059 }
00060
00061 this->create_bitcount();
00062
00063 if( !this->validate() )
00064 std::cerr << "kahnstruct error\n";
00065 }
00066
00067 CompactGappedAlignment* Clone() const { return new CompactGappedAlignment( *this ); }
00068 CompactGappedAlignment* Copy() const;
00069 virtual void Free();
00070
00071 void SetAlignment( const std::vector< std::string >& seq_align );
00072
00073 void SetAlignment( std::vector< bitset_t >& seq_align );
00074
00075
00076 virtual void Invert();
00077 virtual void CropStart(gnSeqI crop_amount);
00078 virtual void CropEnd(gnSeqI crop_amount);
00079
00080 virtual void CropLeft(gnSeqI crop_amount, uint seqI);
00081 virtual void CropRight(gnSeqI crop_amount, uint seqI);
00082
00083 void GetAlignment( std::vector< bitset_t >& align_matrix ) const;
00084
00086 const std::vector< bitset_t >& GetAlignment() const{ return align_matrix; }
00087
00088
00089
00090 void GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const;
00091
00093 virtual bool IsGap( uint seq, gnSeqI col ) const;
00095 void translate( CompactGappedAlignment& cga, uint cga_seq, uint my_seq, bool add_bits = true );
00096
00097 bool validate() const;
00098 bool validate_bitcount() const;
00099
00100 void copyRange( CompactGappedAlignment& dest, gnSeqI left_column, gnSeqI length );
00101 gnSeqI SeqPosToColumn( uint seq, int64 pos);
00102
00104 void CondenseGapColumns();
00105
00106 void swap( CompactGappedAlignment& other ){ swap(&other); }
00107
00108 protected:
00109
00110 void swap( CompactGappedAlignment* other ){
00111 std::swap( align_matrix, other->align_matrix );
00112 std::swap( bcount, other->bcount );
00113 BaseType::swap( other );
00114 }
00115
00116 std::vector< bitset_t > align_matrix;
00117 std::vector< std::vector< size_t > > bcount;
00118
00119 void create_bitcount();
00120 gnSeqI SeqPosToColumn( gnSeqI pos, const bitset_t& bvec, const std::vector< size_t >& index ) const;
00121
00122 };
00123
00124 static bool debug_cga = false;
00125
00126 template< class BaseType >
00127 CompactGappedAlignment<BaseType>* CompactGappedAlignment<BaseType>::Copy() const
00128 {
00129 return m_allocateAndCopy( *this );
00130 }
00131
00132 template< class BaseType >
00133 void CompactGappedAlignment<BaseType>::Free()
00134 {
00135 m_free(this);
00136 }
00137
00138 template< class BaseType >
00139 bool CompactGappedAlignment<BaseType>::validate() const
00140 {
00141 if( !debug_cga )
00142 return true;
00143 bool good = true;
00144 for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
00145 {
00146 if( this->AlignmentLength() != align_matrix[seqI].size() )
00147 {
00148 good = false;
00149 std::cerr << "vanishing pig trick\n";
00150 genome::breakHere();
00151 }
00152 gnSeqI count = align_matrix[seqI].count();
00153 if( count > 0 && this->LeftEnd(seqI) == 0 )
00154 {
00155 good = false;
00156 std::cerr << "boner_McHoserknob\n";
00157 genome::breakHere();
00158 }
00159 if( (count == 0 || this->Length(seqI) == 0) && this->LeftEnd(seqI) != 0 )
00160 {
00161 good = false;
00162 std::cerr << "Length(" << seqI << "): " << this->Length(seqI) << std::endl;
00163 std::cerr << "LeftEnd(seqI): " << this->LeftEnd(seqI) << std::endl;
00164 std::cerr << "spumante explosion\n";
00165 genome::breakHere();
00166 }
00167 if( count != this->Length(seqI) )
00168 {
00169 std::cerr << "seqI: " << seqI << " count: " << count << " Length(seqI): " << this->Length(seqI) << std::endl;
00170 std::cerr << "LeftEnd(seqI): " << this->LeftEnd(seqI) << std::endl;
00171 std::cerr << "lendo mismatcho\n";
00172 genome::breakHere();
00173 return false;
00174 }
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184 }
00185 if( good )
00186 {
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200 }
00201 return validate_bitcount() && good;
00202 }
00203
00204
00205 template< class BaseType >
00206 CompactGappedAlignment<BaseType>::CompactGappedAlignment( std::vector< bitset_t >& aln_mat, gnSeqI alignment_length ) :
00207 BaseType( aln_mat.size(), alignment_length ),
00208 align_matrix( aln_mat ),
00209 bcount( std::vector< std::vector< size_t > >( aln_mat.size() ) )
00210 {
00211 this->create_bitcount();
00212 this->validate_bitcount();
00213 }
00214
00215 template< class BaseType >
00216 CompactGappedAlignment<BaseType>::CompactGappedAlignment( uint seq_count, gnSeqI align_length ) :
00217 BaseType( seq_count, align_length )
00218 {}
00219
00220
00221 template< class BaseType >
00222 void CompactGappedAlignment<BaseType>::SetAlignment( const std::vector< std::string >& seq_align ){
00223 if( seq_align.size() == 0 )
00224 {
00225 this->SetAlignmentLength(0);
00226 return;
00227 }
00228 this->SetAlignmentLength(seq_align[0].size());
00229 align_matrix = std::vector< bitset_t >( seq_align.size(), bitset_t( seq_align[0].size(), false ) );
00230 bcount = std::vector< std::vector<size_t> >( seq_align.size() );
00231 for( size_t seqI = 0; seqI < seq_align.size(); seqI++ )
00232 {
00233 bool nonzero = false;
00234 for( size_t charI = 0; charI < seq_align[seqI].size(); charI++ )
00235 if( seq_align[seqI][charI] != '-' )
00236 {
00237 align_matrix[seqI].set(charI);
00238 nonzero = true;
00239 }
00240 }
00241 this->create_bitcount();
00242 }
00243
00244 template< class BaseType >
00245 void CompactGappedAlignment<BaseType>::SetAlignment( std::vector< bitset_t >& seq_align )
00246 {
00247 std::swap( align_matrix, seq_align );
00248 seq_align.clear();
00249 if( align_matrix.size() > 0 )
00250 this->SetAlignmentLength( align_matrix[0].size() );
00251 else
00252 this->SetAlignmentLength(0);
00253 bcount = std::vector< std::vector<size_t> >(align_matrix.size());
00254 this->create_bitcount();
00255 this->validate_bitcount();
00256 }
00257
00258 template< class BaseType >
00259 void CompactGappedAlignment<BaseType>::GetAlignment( std::vector< bitset_t >& align_matrix ) const
00260 {
00261 align_matrix = this->align_matrix;
00262 }
00263
00264 template< class BaseType >
00265 bool CompactGappedAlignment<BaseType>::IsGap( uint seq, gnSeqI col ) const
00266 {
00267 return !align_matrix[seq][col];
00268 }
00269
00270 static const unsigned INDEX_INTERVAL = 512;
00271
00272 template< class BaseType >
00273 void CompactGappedAlignment<BaseType>::create_bitcount()
00274 {
00275 for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
00276 {
00277
00278
00279 bitset_t& bvec = align_matrix[seqI];
00280 bcount[seqI].clear();
00281 bcount[seqI].push_back(0);
00282 for( size_t indie = 0; indie + INDEX_INTERVAL <= bvec.size(); indie += INDEX_INTERVAL )
00283 {
00284 size_t end = indie + INDEX_INTERVAL;
00285 size_t ct = 0;
00286 for( size_t i = indie; i < end; ++i )
00287 ct += bvec.test(i);
00288 bcount[seqI].push_back( ct + bcount[seqI].back() );
00289 }
00290 }
00291 }
00292
00293 template< class BaseType >
00294 bool CompactGappedAlignment<BaseType>::validate_bitcount() const
00295 {
00296 if( !debug_cga )
00297 return true;
00298 bool valid = true;
00299 for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
00300 {
00301 gnSeqI count = align_matrix[seqI].count();
00302 size_t bc_len = align_matrix[seqI].size() / INDEX_INTERVAL;
00303 if( count < INDEX_INTERVAL && bcount[seqI].size() == 0 )
00304 continue;
00305 if( bc_len + 1 != bcount[seqI].size() && (bcount[seqI].back() % INDEX_INTERVAL != 0) )
00306 {
00307 std::cerr << "bitcount problem, bc_len + 1: " << bc_len + 1 << " and bcount[seqI].size(): " << bcount[seqI].size() << std::endl;
00308 std::cerr << "count: " << count << " and bcount[seqI].back(): " << bcount[seqI].back() << std::endl;
00309 valid = false;
00310 }
00311 if( count - bcount[seqI].back() > INDEX_INTERVAL )
00312 {
00313 std::cerr << "bitcount problem, count: " << count << " and bcount[seqI].back(): " << bcount[seqI].back() << std::endl;
00314 valid = false;
00315 }
00316 }
00317 return valid;
00318 }
00319
00320 template< class BaseType >
00321 gnSeqI CompactGappedAlignment<BaseType>::SeqPosToColumn( uint seq, int64 pos )
00322 {
00323 if( this->Orientation(seq) == AbstractMatch::forward )
00324 pos = genome::absolut(pos) - this->LeftEnd(seq) + 1;
00325 else
00326 pos = this->RightEnd(seq)-genome::absolut(pos) + 1;
00327 return SeqPosToColumn(pos, align_matrix[seq], bcount[seq]);
00328 }
00329
00330 template< class BaseType >
00331 gnSeqI CompactGappedAlignment<BaseType>::SeqPosToColumn( gnSeqI pos, const bitset_t& bvec, const std::vector< size_t >& index ) const
00332 {
00333 std::vector<size_t>::const_iterator iter = std::lower_bound(index.begin(), index.end(), pos);
00334 --iter;
00335 size_t cur_pos = *iter;
00336 size_t col = iter - index.begin();
00337 col *= INDEX_INTERVAL;
00338 if( col == 0 )
00339 col = bvec.find_first();
00340 else
00341 col = bvec.find_next(col-1);
00342 for( ++cur_pos; cur_pos < pos; ++cur_pos )
00343 col = bvec.find_next(col);
00344 return col;
00345 }
00346
00347 template< class BaseType >
00348 void CompactGappedAlignment<BaseType>::translate( CompactGappedAlignment& cga, uint cga_seq, uint my_seq, bool add_bits )
00349 {
00350 AbstractMatch::orientation my_orient = this->Orientation(my_seq);
00351
00352 if( cga.Length(cga_seq) > this->Length(my_seq) )
00353 {
00354 std::cerr << "Oh scheisskopf. What are you trying to do to me??\n";
00355 std::cerr << "cga.Length(" << cga_seq << "): " << cga.Length(cga_seq) << std::endl;
00356 std::cerr << "Length(" << my_seq << "): " << this->Length(my_seq) << std::endl;
00357 genome::breakHere();
00358 }
00359
00360 gnSeqI prev_lend = cga.LeftEnd(cga_seq);
00361 gnSeqI prev_len = cga.Length(cga_seq);
00362 gnSeqI my_lend = this->LeftEnd(my_seq);
00363 gnSeqI my_len = this->Length(my_seq);
00364 gnSeqI my_count = 0;
00365 uint seqI = 0;
00366
00367
00368
00369
00370 size_t cur_bit = 0;
00371
00372
00373 size_t left_bit = this->SeqPosToColumn(cga.LeftEnd(cga_seq), align_matrix[my_seq], bcount[my_seq]);
00374
00375 size_t right_bit = this->SeqPosToColumn(cga.RightEnd(cga_seq), align_matrix[my_seq], bcount[my_seq]);
00376 if( right_bit > 4000000000u )
00377 {
00378 std::cerr << "cga doesn't fit\n";
00379 std::cerr << "cga.RightEnd(cga_seq) " << cga.RightEnd(cga_seq) << std::endl;
00380 std::cerr << "RightEnd(my_seq): " << this->RightEnd(my_seq) << std::endl;
00381 std::cerr << "cga.LeftEnd(cga_seq) " << cga.LeftEnd(cga_seq) << std::endl;
00382 std::cerr << "LeftEnd(my_seq): " << this->LeftEnd(my_seq) << std::endl;
00383 std::cerr << "cga.AlignmentLength(): " << cga.AlignmentLength() << std::endl;
00384 std::cerr << "AlignmentLength(): " << this->AlignmentLength() << std::endl;
00385 genome::breakHere();
00386 }
00387 right_bit++;
00388 if( right_bit == 0 )
00389 right_bit = this->AlignmentLength();
00390
00391 cga.SetLeftEnd(cga_seq,left_bit+1);
00392
00393
00394 size_t cga_left = cga.align_matrix[cga_seq].find_first();
00395
00396 size_t somesize = (right_bit - left_bit) - cga.Length(cga_seq) + cga.AlignmentLength();
00397
00398 size_t cga_bit = cga_left;
00399 size_t my_bit = left_bit;
00400 size_t xlat_bit = cga_left;
00401 size_t added_bits = 0;
00402
00403 std::vector< bitset_t > xrated( cga.SeqCount(), bitset_t( somesize, false ) );
00404 for( size_t seqI = 0; seqI < xrated.size(); ++seqI )
00405 for( size_t asdf = cga.align_matrix[seqI].find_first(); asdf < cga_left; asdf = cga.align_matrix[seqI].find_next(asdf) )
00406 xrated[seqI].set(asdf);
00407
00408 while(xlat_bit < somesize)
00409 {
00410
00411 if( !align_matrix[my_seq].test(my_bit) )
00412 {
00413 std::cerr << "ohhhhhhzheiss!\n";
00414 genome::breakHere();
00415 }
00416
00417 for( size_t seqI = 0; seqI < xrated.size(); ++seqI )
00418 xrated[seqI].set( xlat_bit, cga.align_matrix[seqI].test(cga_bit) );
00419
00420 ++cga_bit;
00421 ++xlat_bit;
00422
00423 if( xlat_bit >= somesize )
00424 break;
00425
00426
00427 if( cga.align_matrix[cga_seq].test(cga_bit) )
00428 {
00429 size_t next_bit = align_matrix[my_seq].find_next(my_bit);
00430 if( next_bit > 4000000000u )
00431 genome::breakHere();
00432 size_t diff = next_bit - my_bit;
00433 if( diff > 1 && add_bits )
00434 {
00435 if( xlat_bit + diff - 1 >= somesize )
00436 {
00437 std::cerr << "ERRRORRR porker!!\n";
00438 genome::breakHere();
00439 }
00440 for( size_t i = xlat_bit; i < xlat_bit + diff - 1; ++i )
00441 xrated[cga_seq].set(i);
00442 added_bits += diff-1;
00443 }
00444 my_bit = next_bit;
00445 xlat_bit += diff - 1;
00446 }
00447 }
00448
00449 cga.align_matrix = xrated;
00450 cga.create_bitcount();
00451 cga.SetLength(cga.Length(cga_seq)+added_bits,cga_seq);
00452 cga.SetAlignmentLength(somesize);
00453 if( !cga.validate() )
00454 {
00455 std::cerr << "prev_lend: " << prev_lend << std::endl;
00456 std::cerr << "prev_len: " << prev_len << std::endl;
00457 std::cerr << "translate error\n";
00458 genome::breakHere();
00459 }
00460 }
00461
00462
00463 template< class BaseType >
00464 void CompactGappedAlignment<BaseType>::Invert(){
00465 for(uint seqI = 0; seqI < this->SeqCount(); seqI++)
00466 {
00467 if( this->LeftEnd(seqI) == NO_MATCH )
00468 continue;
00469 bitset_t& fwd = align_matrix[seqI];
00470 bitset_t rev(this->AlignmentLength());
00471 size_t r = this->AlignmentLength();
00472 for( size_t i = 0; i < fwd.size(); ++i )
00473 rev.set( --r, fwd.test(i) );
00474 fwd.swap(rev);
00475 }
00476 this->create_bitcount();
00477 BaseType::Invert();
00478 if( !this->validate() )
00479 {
00480 std::cerr << "invert error\n";
00481 }
00482 }
00483
00484 template< class BaseType >
00485 void CompactGappedAlignment<BaseType>::CropStart(gnSeqI crop_amount){
00486 if( crop_amount > this->AlignmentLength() )
00487 Throw_gnEx( genome::SeqIndexOutOfBounds() );
00488 if( crop_amount == 0 )
00489 return;
00490
00491 gnSeqI pre_alignlen = this->AlignmentLength();
00492 gnSeqI pre_lend0 = this->LeftEnd(0);
00493
00494 std::vector<gnSeqI> pos;
00495 std::vector<bool> column;
00496 GetColumn( crop_amount-1, pos, column );
00497
00498 for( uint i=0; i < this->SeqCount(); i++ ){
00499 if( this->LeftEnd(i) == NO_MATCH )
00500 {
00501 align_matrix[i].resize(this->AlignmentLength()-crop_amount);
00502 align_matrix[i] = align_matrix[i];
00503 continue;
00504 }
00505
00506 align_matrix[i] >>= crop_amount;
00507 align_matrix[i].resize(this->AlignmentLength()-crop_amount);
00508 align_matrix[i] = align_matrix[i];
00509 size_t char_count = this->Orientation(i) == AbstractMatch::forward ? pos[i] - this->LeftEnd(i) + 1 : this->RightEnd(i) - pos[i] + 1;
00510
00511 if( pos[i] > 0 && char_count > 0 )
00512 {
00513 this->SetLength(this->Length(i)-char_count, i);
00514 if( this->Length(i) == 0 )
00515 this->SetStart(i, NO_MATCH);
00516 if( this->Orientation(i) == AbstractMatch::forward )
00517 this->SetStart(i, this->Start(i) + char_count);
00518 }else if( pos[i] == 0 && this->Orientation(i) == AbstractMatch::reverse )
00519 {
00520
00521 this->SetLength(0, i);
00522 this->SetStart(i, NO_MATCH);
00523 }
00524 }
00525
00526 this->SetAlignmentLength( this->AlignmentLength() - crop_amount );
00527 this->create_bitcount();
00528 if( !this->validate() )
00529 {
00530 std::cerr << "pre_lend0: " << pre_lend0 << std::endl;
00531 std::cerr << "pre_alignlen: " << pre_alignlen << std::endl;
00532 std::cerr << "CropStart error\n";
00533 }
00534 }
00535
00536 template< class BaseType >
00537 void CompactGappedAlignment<BaseType>::CropEnd(gnSeqI crop_amount){
00538 if( crop_amount > this->AlignmentLength() )
00539 Throw_gnEx( genome::SeqIndexOutOfBounds() );
00540 if( crop_amount == 0 )
00541 return;
00542
00543 std::vector<gnSeqI> pos;
00544 std::vector<bool> column;
00545 this->GetColumn( this->AlignmentLength()-crop_amount, pos, column );
00546
00547 for( uint i=0; i < this->SeqCount(); i++ ){
00548 align_matrix[i].resize( this->AlignmentLength() - crop_amount );
00549 align_matrix[i] = align_matrix[i];
00550 if( this->LeftEnd(i) == NO_MATCH )
00551 continue;
00552 AbstractMatch::orientation orient = this->Orientation(i);
00553 if( pos[i] > 0 )
00554 {
00555 gnSeqI char_count = pos[i] - (orient == AbstractMatch::forward ? (column[i] ? 1 : 0 ) : (column[i] ? 0 : 1 ) );
00556 char_count = orient == AbstractMatch::forward ? char_count - this->LeftEnd(i) + 1 : this->RightEnd(i) - char_count;
00557 if( char_count == 0 && align_matrix[i].count() > 0)
00558 {
00559 std::cerr << "orienatation: " << (orient == AbstractMatch::forward ? "forward\n" : (orient == AbstractMatch::reverse ? "reverse\n" : "undef\n"));
00560 std::cerr << "lend: " << this->LeftEnd(i) << std::endl;
00561 std::cerr << "length: " << this->Length(i) << std::endl;
00562 std::cerr << "count: " << align_matrix[i].count() << std::endl;
00563 }
00564 gnSeqI deleted = this->Length(i) - char_count;
00565 this->SetLength(char_count, i);
00566 if( this->Length(i) == 0 )
00567 this->SetStart(i, 0);
00568 if( this->Start(i) < 0 )
00569 this->SetStart(i, this->Start(i)-deleted);
00570 }else if( orient == AbstractMatch::forward ){
00571 this->SetLength(0, i);
00572 this->SetStart(i, 0);
00573 }
00574 }
00575 SetAlignmentLength( this->AlignmentLength() - crop_amount );
00576 this->create_bitcount();
00577 if( !this->validate() )
00578 std::cerr << "CropEnd error\n";
00579 }
00580
00581 template< class BaseType >
00582 void CompactGappedAlignment<BaseType>::CropLeft(gnSeqI crop_amount, uint seqI)
00583 {
00584 if( crop_amount == 0 )
00585 return;
00586
00587 gnSeqI pre_len = this->Length(seqI);
00588
00589 if( this->Orientation(seqI) == AbstractMatch::forward )
00590 {
00591 size_t left_col = this->SeqPosToColumn(crop_amount, align_matrix[seqI], bcount[seqI]) + 1;
00592 this->CropStart(left_col);
00593 }else{
00594 size_t left_col = this->SeqPosToColumn(this->Length(seqI) - crop_amount + 1, align_matrix[seqI], bcount[seqI]);
00595 if( left_col > 4000000000u )
00596 {
00597 std::cerr << this->LeftEnd(seqI) << std::endl;
00598 std::cerr << this->LeftEnd(0) << std::endl;
00599 std::cerr << "bogus cropper cga\n";
00600 }
00601 this->CropEnd(this->AlignmentLength()-left_col);
00602 }
00603 if( this->Length(seqI) != pre_len - crop_amount )
00604 {
00605 std::cerr << this->LeftEnd(seqI) << std::endl;
00606 std::cerr << this->LeftEnd(0) << std::endl;
00607 std::cerr << "bad cropperLeftie\n";
00608 }
00609 if( !this->validate() )
00610 std::cerr << "CropLeft error\n";
00611 }
00612
00613 template< class BaseType >
00614 void CompactGappedAlignment<BaseType>::CropRight(gnSeqI crop_amount, uint seqI)
00615 {
00616 if( crop_amount == 0 )
00617 return;
00618
00619 gnSeqI pre_len = this->Length(seqI);
00620 gnSeqI pre_lend = this->LeftEnd(seqI);
00621 gnSeqI pre_lend0 = this->LeftEnd(0);
00622 if( this->Orientation(seqI) == AbstractMatch::forward )
00623 {
00624
00625 size_t right_col = this->SeqPosToColumn(this->Length(seqI) - crop_amount + 1, align_matrix[seqI], bcount[seqI]);
00626 this->CropEnd( this->AlignmentLength()-right_col );
00627 }else
00628 {
00629 size_t right_col = this->SeqPosToColumn(crop_amount, align_matrix[seqI], bcount[seqI]) + 1;
00630 if( right_col > 4000000000u )
00631 {
00632 std::cerr << this->LeftEnd(seqI) << std::endl;
00633 std::cerr << this->LeftEnd(0) << std::endl;
00634 std::cerr << "bogus cropper cga\n";
00635 }
00636 this->CropStart( right_col );
00637 }
00638 if( this->Length(seqI) != pre_len - crop_amount )
00639 {
00640 std::cerr << this->LeftEnd(seqI) << std::endl;
00641 std::cerr << this->LeftEnd(0) << std::endl;
00642 std::cerr << "bad cropperight\n";
00643 }
00644 if( !this->validate() )
00645 std::cerr << "CropRight error\n";
00646 }
00647
00648 template< class BaseType >
00649 void CompactGappedAlignment<BaseType>::GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const
00650 {
00651 pos = std::vector<gnSeqI>(this->SeqCount(), NO_MATCH);
00652 column = std::vector<bool>(this->SeqCount(), false);
00653 for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
00654 {
00655 if( align_matrix[seqI][col] )
00656 column[seqI] = true;
00657
00658 gnSeqI count = 0;
00659 if( this->LeftEnd(seqI) != NO_MATCH )
00660 {
00661 size_t col_index = col / INDEX_INTERVAL;
00662 for( size_t i = col_index * INDEX_INTERVAL; i <= col; i++ )
00663 count += align_matrix[seqI].test(i);
00664 count += bcount[seqI][col_index];
00665 }
00666
00667 if( count > 0 && this->Orientation(seqI) == AbstractMatch::forward )
00668 pos[seqI] = this->LeftEnd(seqI) + count - 1;
00669 else if( this->Orientation(seqI) == AbstractMatch::reverse && !(count == this->Length(seqI) && !column[seqI]) )
00670 pos[seqI] = this->RightEnd(seqI) - count + 1;
00671 }
00672 }
00673
00674 template< class BaseType >
00675 void CompactGappedAlignment<BaseType>::copyRange( CompactGappedAlignment& dest, gnSeqI left_column, gnSeqI length )
00676 {
00677 if( left_column + length > this->AlignmentLength() )
00678 Throw_gnEx( genome::SeqIndexOutOfBounds() );
00679
00680
00681
00682
00683 dest = CompactGappedAlignment(this->SeqCount(), length);
00684 for( uint i=0; i < this->SeqCount(); i++ ){
00685 dest.SetStart(i, this->Start(i));
00686 if( this->Orientation(i) != AbstractMatch::undefined )
00687 dest.SetLength(this->Length(i), i);
00688 }
00689
00690
00691 gnSeqI pre_alignlen = this->AlignmentLength();
00692 gnSeqI pre_lend0 = this->LeftEnd(0);
00693
00694 std::vector< bitset_t > dest_mat(this->SeqCount(), bitset_t(length));
00695 std::vector<gnSeqI> pos;
00696 std::vector<bool> column;
00697 std::vector<gnSeqI> left_cc(this->SeqCount(), 0);
00698 if( left_column > 0 )
00699 {
00700 this->GetColumn( left_column-1, pos, column );
00701 for( uint i=0; i < this->SeqCount(); i++ ){
00702 if( this->LeftEnd(i) == NO_MATCH )
00703 continue;
00704
00705 size_t char_count = this->Orientation(i) == AbstractMatch::forward ? pos[i] - this->LeftEnd(i) + 1 : this->RightEnd(i) - pos[i] + 1;
00706 if( pos[i] > 0 && char_count > 0 )
00707 {
00708 left_cc[i] = char_count;
00709 if( dest.Orientation(i) == AbstractMatch::forward )
00710 dest.SetStart(i, dest.Start(i) + char_count);
00711 }else if( pos[i] == 0 && dest.Orientation(i) == AbstractMatch::reverse )
00712 {
00713
00714 dest.SetStart(i, NO_MATCH);
00715 }
00716 }
00717 }
00718
00719
00720 gnSeqI right_trim = this->AlignmentLength() - left_column - length;
00721
00722 if( right_trim > 0 )
00723 {
00724 this->GetColumn( this->AlignmentLength()-right_trim, pos, column );
00725
00726 for( uint i=0; i < this->SeqCount(); i++ ){
00727 if( this->LeftEnd(i) == NO_MATCH )
00728 continue;
00729 AbstractMatch::orientation orient = this->Orientation(i);
00730 if( pos[i] > 0 )
00731 {
00732 gnSeqI char_count = pos[i] - (orient == AbstractMatch::forward ? (column[i] ? 1 : 0 ) : (column[i] ? 0 : 1 ) );
00733 char_count = orient == AbstractMatch::forward ? char_count - this->LeftEnd(i) + 1 : this->RightEnd(i) - char_count;
00734 char_count -= left_cc[i];
00735 gnSeqI deleted = this->Length(i) - char_count;
00736 if( dest.Start(i) < 0 )
00737 dest.SetStart(i, dest.Start(i)-deleted+left_cc[i]);
00738 }else if( orient == AbstractMatch::forward ){
00739 dest.SetStart(i, NO_MATCH);
00740 }
00741 }
00742 }
00743
00744 for( size_t i = 0; i < dest_mat.size(); ++i )
00745 {
00746 size_t count = 0;
00747 for( size_t j = 0; j < length; ++j )
00748 {
00749 if(align_matrix[i].test(j+left_column))
00750 {
00751 dest_mat[i].set(j, true);
00752 ++count;
00753 }
00754 }
00755 dest.SetLength(count, i);
00756 if( count == 0 )
00757 dest.SetStart(i, NO_MATCH);
00758 }
00759 dest.SetAlignment(dest_mat);
00760
00761 dest.create_bitcount();
00762 if( !dest.validate() )
00763 {
00764 std::cerr << "pre_lend0: " << pre_lend0 << std::endl;
00765 std::cerr << "pre_alignlen: " << pre_alignlen << std::endl;
00766 std::cerr << "CropStart error\n";
00767 }
00768
00769 }
00770
00771 template< class BaseType >
00772 void CompactGappedAlignment<BaseType>::CondenseGapColumns()
00773 {
00774 const size_t len = this->AlignmentLength();
00775 size_t d = 0;
00776 for( size_t i = 0; i < len; ++i )
00777 {
00778 size_t seqI = 0;
00779
00780 for( ; seqI < align_matrix.size(); ++seqI )
00781 if( this->LeftEnd(seqI) != 0 && align_matrix[seqI].test(i) )
00782 break;
00783
00784
00785 if( seqI < align_matrix.size() )
00786 {
00787 if( i != d )
00788 {
00789 for( seqI = 0; seqI < align_matrix.size(); ++seqI )
00790 align_matrix[seqI].set( d, align_matrix[seqI].test(i) );
00791 }
00792 d++;
00793 }
00794 else
00795 std::cout << "";
00796 }
00797 this->SetAlignmentLength(d);
00798 for( size_t seqI = 0; seqI < align_matrix.size(); ++seqI )
00799 {
00800 align_matrix[seqI].resize(d);
00801 align_matrix[seqI] = align_matrix[seqI];
00802 }
00803 this->create_bitcount();
00804 }
00805
00806
00807 }
00808
00809 namespace std {
00810 template<> inline
00811 void swap( mems::CompactGappedAlignment<>& a, mems::CompactGappedAlignment<>& b )
00812 {
00813 a.swap(b);
00814 }
00815 }
00816
00817
00818 #endif // __CompactGappedAlignment_h__
00819