00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef _IntervalList_h_
00010 #define _IntervalList_h_
00011
00012 #ifdef HAVE_CONFIG_H
00013 #include "config.h"
00014 #endif
00015
00016 #include <iostream>
00017 #include <list>
00018 #include <sstream>
00019
00020 #include "libMems/SortedMerList.h"
00021 #include "libGenome/gnSequence.h"
00022 #include "libMems/Interval.h"
00023 #include "libMems/MemHash.h"
00024 #include "libMems/CompactGappedAlignment.h"
00025 #include "libGenome/gnSourceFactory.h"
00026 #include "libGenome/gnFASSource.h"
00027 #include "libGenome/gnSEQSource.h"
00028 #include "libGenome/gnGBKSource.h"
00029 #include "libGenome/gnRAWSource.h"
00030
00031 namespace mems {
00032
00038 template< class MatchType = Interval >
00039 class GenericIntervalList : public std::vector< MatchType > {
00040 public:
00041 GenericIntervalList(){};
00042 GenericIntervalList( const GenericIntervalList& ml );
00043 GenericIntervalList& operator=( const GenericIntervalList& ml );
00044 ~GenericIntervalList(){};
00045
00050 void Clear();
00051
00060 void ReadList( std::istream& match_stream );
00061
00066 void WriteList( std::ostream& match_stream ) const;
00067
00071 void WriteAlignedSequences(std::ostream& match_file) const;
00072
00076 void WriteStandardAlignment( std::ostream& out_file ) const;
00077
00081 void ReadStandardAlignment( std::istream& in_stream );
00082
00087 void ReadStandardAlignmentCompact( std::istream& in_stream );
00088
00089 std::vector<std::string> seq_filename;
00090 std::vector<genome::gnSequence*> seq_table;
00092 std::string backbone_filename;
00093 protected:
00094
00095 };
00096
00097
00098 typedef GenericIntervalList<> IntervalList;
00099
00100 template< class MatchType >
00101 GenericIntervalList<MatchType>::GenericIntervalList( const GenericIntervalList<MatchType>& ml )
00102 {
00103 *this = ml;
00104 }
00105
00106 template< class MatchType >
00107 GenericIntervalList<MatchType>& GenericIntervalList<MatchType>::operator=( const GenericIntervalList<MatchType>& ml )
00108 {
00109 std::vector< MatchType >::operator=( ml );
00110 seq_filename = ml.seq_filename;
00111 seq_table = ml.seq_table;
00112 return *this;
00113 }
00114
00115 template< class MatchType >
00116 void GenericIntervalList<MatchType>::Clear()
00117 {
00118 for( uint seqI = 0; seqI < seq_table.size(); seqI++ ){
00119 if( seq_table[ seqI ] != NULL )
00120 delete seq_table[ seqI ];
00121 }
00122 seq_filename.clear();
00123 this->clear();
00124 }
00125
00126 template< class MatchType >
00127 void GenericIntervalList<MatchType>::ReadList(std::istream& match_file)
00128 {
00129 std::string tag;
00130 gnSeqI len;
00131 int64 start;
00132 unsigned int seq_count;
00133 uint seqI;
00134
00135 match_file >> tag;
00136 if( tag != "FormatVersion" ){
00137 Throw_gnEx(InvalidFileFormat());
00138 }
00139 match_file >> tag;
00140 if( tag != "4" ){
00141 Throw_gnEx(InvalidFileFormat());
00142 }
00143 match_file >> tag;
00144 if( tag != "SequenceCount" ){
00145 Throw_gnEx(InvalidFileFormat());
00146 }
00147 match_file >> seq_count;
00148 if(seq_count < 2){
00149 Throw_gnEx(InvalidFileFormat());
00150 }
00151
00152 std::vector< std::string > alignment;
00153
00154 for( seqI = 0; seqI < seq_count; seqI++ ){
00155 match_file >> tag;
00156 getline( match_file, tag );
00157
00158 tag = tag.substr( 1 );
00159 seq_filename.push_back(tag);
00160
00161
00162
00163
00164
00165 match_file >> tag;
00166 match_file >> tag;
00167
00168 alignment.push_back( "" );
00169 }
00170 uint interval_count;
00171 match_file >> tag;
00172 match_file >> interval_count;
00173
00174
00175
00176 std::string cur_line;
00177 Interval* cur_iv = NULL;
00178 boolean clustal_match;
00179 std::vector< AbstractMatch* > iv_matches;
00180 bool parsing = false;
00181
00182 while( std::getline( match_file, cur_line ) ){
00183 if( cur_line.find( "Interval" ) != std::string::npos ){
00184
00185 if( iv_matches.size() > 0 )
00186 {
00187 this->push_back( Interval(iv_matches.begin(), iv_matches.end()) );
00188
00189
00190 iv_matches.clear();
00191 }
00192 parsing = true;
00193 continue;
00194 }
00195 if( !parsing )
00196 continue;
00197 if( cur_line.length() == 0 )
00198 continue;
00199
00200 clustal_match = false;
00201 if( cur_line == "GappedAlignment" ){
00202 clustal_match = true;
00203 getline( match_file, cur_line );
00204
00205 std::stringstream line_stream( cur_line );
00206 line_stream >> len;
00207 GappedAlignment* cr = new GappedAlignment( seq_count, len );
00208
00209 for( seqI = 0; seqI < seq_count; seqI++ ){
00210 line_stream >> start;
00211 cr->SetStart( seqI, start );
00212 std::getline( match_file, alignment[ seqI ] );
00213 int64 seq_len = 0;
00214 for( uint charI = 0; charI < alignment[ seqI ].length(); charI++ )
00215 if( alignment[ seqI ][ charI ] != '-' )
00216 seq_len++;
00217 cr->SetLength( seq_len, seqI );
00218 }
00219 cr->SetAlignment( alignment );
00220 iv_matches.push_back( cr );
00221 }
00222 else{
00223
00224 Match mmhe( seq_count );
00225 Match* mhe = mmhe.Copy();
00226 std::stringstream line_stream( cur_line );
00227
00228 line_stream >> len;
00229 mhe->SetLength(len);
00230
00231 for( seqI = 0; seqI < seq_count; seqI++){
00232 line_stream >> start;
00233 mhe->SetStart(seqI, start);
00234 }
00235
00236 iv_matches.push_back( mhe );
00237 }
00238 }
00239 if( iv_matches.size() > 0 )
00240 this->push_back( Interval(iv_matches.begin(), iv_matches.end()) );
00241 if( interval_count != this->size() ){
00242 Throw_gnEx(InvalidFileFormat());
00243 }
00244
00245 }
00246
00247 template< class MatchType >
00248 void GenericIntervalList<MatchType>::WriteList(std::ostream& match_file) const
00249 {
00250
00251 unsigned int seq_count = seq_table.size();
00252 uint seqI;
00253
00254 match_file << "FormatVersion" << '\t' << 4 << "\n";
00255 match_file << "SequenceCount" << '\t' << seq_count << "\n";
00256 for(seqI = 0; seqI < seq_count; seqI++){
00257 match_file << "Sequence" << seqI << "File" << '\t';
00258 if( seq_filename.size() > seqI )
00259 match_file << seq_filename[seqI];
00260 else
00261 match_file << "null";
00262 match_file << "\n";
00263 match_file << "Sequence" << seqI << "Length" << '\t';
00264 if( seq_table.size() > seqI )
00265 match_file << seq_table[seqI]->length();
00266 else
00267 match_file << "0";
00268 match_file << "\n";
00269 }
00270
00271 match_file << "IntervalCount" << '\t' << this->size() << std::endl;
00272
00273 for( uint ivI = 0; ivI < this->size(); ivI++ ){
00274 match_file << "Interval " << ivI << std::endl;
00275 const std::vector<AbstractMatch*>& matches = (*this)[ ivI ].GetMatches();
00276 for( uint matchI = 0; matchI < matches.size(); matchI++ ){
00277 const AbstractMatch* m = matches[ matchI ];
00278 const GappedAlignment* cr = dynamic_cast< const GappedAlignment* >( m );
00279 const Match* match = dynamic_cast< const Match* >( m );
00280 if( match != NULL ){
00281 match_file << *match << std::endl;
00282 }
00283 else if( cr != NULL ){
00284 match_file << "GappedAlignment\n";
00285 match_file << cr->Length();
00286 for( seqI = 0; seqI < seq_count; seqI++ )
00287 match_file << '\t' << cr->Start( seqI );
00288 match_file << std::endl;
00289
00290 const std::vector< std::string >& align_matrix = GetAlignment( *cr, seq_table );
00291 for( seqI = 0; seqI < seq_count; seqI++ )
00292 match_file << align_matrix[ seqI ] << std::endl;
00293 }
00294 }
00295 match_file << std::endl;
00296 }
00297 }
00298
00299 template< class MatchType >
00300 void GenericIntervalList<MatchType>::WriteStandardAlignment( std::ostream& out_file ) const
00301 {
00302 if( this->size() == 0 )
00303 return;
00304
00305
00306 uint seqI = 0;
00307
00308
00309 out_file << "#FormatVersion Mauve1" << std::endl;
00310
00311
00312
00313 boolean single_input = true;
00314 for( seqI = 1; seqI < seq_filename.size(); seqI++ ){
00315 if( seq_filename[ 0 ] != seq_filename[ seqI ] ){
00316 single_input = false;
00317 break;
00318 }
00319 }
00320 for( seqI = 0; seqI < seq_filename.size(); seqI++ ){
00321 out_file << "#Sequence" << seqI + 1 << "File\t" << seq_filename[ seqI ] << std::endl;
00322 if( single_input )
00323 out_file << "#Sequence" << seqI + 1 << "Entry\t" << seqI + 1 << std::endl;
00324
00325 genome::gnSourceFactory* sf = genome::gnSourceFactory::GetSourceFactory();
00326 genome::gnBaseSource* gnbs = sf->MatchSourceClass( seq_filename[ seqI ] );
00327 genome::gnFASSource* gnfs = dynamic_cast< genome::gnFASSource* >(gnbs);
00328 genome::gnRAWSource* gnrs = dynamic_cast< genome::gnRAWSource* >(gnbs);
00329 genome::gnSEQSource* gnss = dynamic_cast< genome::gnSEQSource* >(gnbs);
00330 genome::gnGBKSource* gngs = dynamic_cast< genome::gnGBKSource* >(gnbs);
00331 if( gnfs != NULL )
00332 out_file << "#Sequence" << seqI + 1 << "Format\tFastA" << std::endl;
00333 else if( gnrs != NULL )
00334 out_file << "#Sequence" << seqI + 1 << "Format\traw" << std::endl;
00335 else if( gnss != NULL ){
00336 out_file << "#Sequence" << seqI + 1 << "Format\tDNAstar" << std::endl;
00337 out_file << "#Annotation" << seqI + 1 << "File\t" << seq_filename[ seqI ] << std::endl;
00338 out_file << "#Annotation" << seqI + 1 << "Format\tDNAstar" << std::endl;
00339 }else if( gngs != NULL ){
00340 out_file << "#Sequence" << seqI + 1 << "Format\tGenBank" << std::endl;
00341 out_file << "#Annotation" << seqI + 1 << "File\t" << seq_filename[ seqI ] << std::endl;
00342 out_file << "#Annotation" << seqI + 1 << "Format\tGenBank" << std::endl;
00343 }
00344 }
00345
00346 if( this->backbone_filename != "" )
00347 out_file << "#BackboneFile\t" << this->backbone_filename << std::endl;
00348
00349 for( uint ivI = 0; ivI < this->size(); ivI++ ){
00350 if( (*this)[ ivI ].AlignmentLength() == 0 ){
00351 continue;
00352 }
00353 std::vector<std::string> alignment;
00354 if( seq_table.size() == 1 && seq_table.size() != (*this)[ ivI ].SeqCount() )
00355 {
00356 GetAlignment( (*this)[ ivI ], std::vector<genome::gnSequence*>((*this)[ ivI ].SeqCount(), seq_table[0]), alignment );
00357 }else
00358 GetAlignment( (*this)[ ivI ], seq_table, alignment );
00359 for( seqI = 0; seqI < (*this)[ ivI ].SeqCount(); seqI++ ){
00360 int64 startI = (*this)[ ivI ].Start( seqI );
00361 gnSeqI length = (*this)[ ivI ].Length( seqI );
00362
00363
00364 if( startI == 0 &&ivI > 0)
00365 continue;
00366 out_file << "> " << seqI + 1 << ":";
00367 if( startI > 0 ){
00368 out_file << genome::absolut( startI ) << "-" << genome::absolut( startI ) + length - 1 << " + ";
00369 }else if(startI == 0){
00370 out_file << 0 << "-" << 0 << " + ";
00371 }else{
00372 out_file << genome::absolut( startI ) << "-" << genome::absolut( startI ) + length - 1 << " - ";
00373 }
00374 if( single_input )
00375 out_file << seq_filename[ 0 ];
00376 else
00377 out_file << seq_filename[ seqI ];
00378 out_file << std::endl;
00379 gnSeqI cur_pos = 0;
00380 for( ; cur_pos < alignment[ seqI ].length(); cur_pos += 80 ){
00381 gnSeqI cur_len = cur_pos + 80 < alignment[ seqI ].length() ? 80 : alignment[ seqI ].length() - cur_pos;
00382 out_file.write( alignment[ seqI ].data() + cur_pos, cur_len );
00383 out_file << std::endl;
00384 }
00385 }
00386 out_file << "=" << std::endl;
00387 }
00388
00389 }
00390
00391 template< class MatchType >
00392 void GenericIntervalList<MatchType>::ReadStandardAlignment( std::istream& in_stream )
00393 {
00394 uint seq_count = 0;
00395 gnSeqI max_len = 0;
00396 std::string cur_line;
00397 if( !std::getline( in_stream, cur_line ) )
00398 {
00399 Clear();
00400 return;
00401 }
00402 uint seqI = 0;
00403 std::vector< gnSeqI > lengths;
00404 std::vector< GappedAlignment* > ga_list;
00405 GappedAlignment cr;
00406 std::string empty_line;
00407 std::vector< std::string > aln_mat;
00408 uint line_count = 1;
00409 while( true ){
00410
00411 while( cur_line[0] == '#' ){
00412
00413 std::getline( in_stream, cur_line );
00414 line_count++;
00415 std::stringstream ss( cur_line );
00416 std::string token;
00417 std::getline( ss, token, '\t' );
00418 if( token.substr(1, 8) != "Sequence" || token.find( "File" ) == std::string::npos )
00419 continue;
00420 std::getline( ss, token );
00421 seq_filename.push_back( token );
00422 }
00423
00424
00425 std::stringstream line_str( cur_line );
00426 std::getline( line_str, cur_line, '>' );
00427 std::getline( line_str, cur_line, ':' );
00428
00429 std::stringstream parse_str( cur_line );
00430
00431 parse_str >> seqI;
00432
00433 int64 start, stop;
00434 std::getline( line_str, cur_line, '-' );
00435 parse_str.clear();
00436 parse_str.str( cur_line );
00437 parse_str >> start;
00438 line_str >> stop;
00439 std::string strand;
00440 line_str >> strand;
00441
00442 std::string name;
00443 std::getline( line_str, name );
00444
00445
00446 while( aln_mat.size() < seqI )
00447 aln_mat.push_back( empty_line );
00448
00449 gnSeqI chars = 0;
00450 while( std::getline( in_stream, cur_line ) ){
00451 line_count++;
00452 if( (cur_line[ 0 ] == '>' ) || (cur_line[ 0 ] == '=' ))
00453 break;
00454 for( uint charI = 0; charI < cur_line.length(); charI++ )
00455 if( cur_line[ charI ] != '-' )
00456 chars++;
00457 aln_mat[ seqI - 1 ] += cur_line;
00458 }
00459 while( lengths.size() < seqI )
00460 lengths.push_back(0);
00461
00462 lengths[ seqI - 1 ] = chars;
00463
00464
00465 if( strand == "+" )
00466 cr.SetStart( seqI - 1, start );
00467 else if( start < stop ){
00468 if( chars == 0 )
00469 cr.SetStart( seqI - 1, 0 );
00470 else
00471 cr.SetStart( seqI - 1, -start );
00472 if( chars != stop - start + 1 && !(chars == 0 && stop - start == 1) ){
00473 std::cerr << "Error in XMFA file format\n";
00474 std::cerr << "Before line " << line_count << std::endl;
00475 std::cerr << "Expecting " << stop - start + 1 << " characters based on defline\n";
00476 std::cerr << "Actually read " << chars << " characters of sequence\n";
00477 Throw_gnEx(InvalidFileFormat());
00478 }
00479 }else{
00480 if( chars == 0 )
00481 cr.SetStart( seqI - 1, 0 );
00482 else
00483 cr.SetStart( seqI - 1, -stop );
00484 if( chars != start - stop + 1 && !(chars == 0 && stop - start == 1) ){
00485 std::cerr << "Error in XMFA file format\n";
00486 std::cerr << "Before line " << line_count << std::endl;
00487 std::cerr << "Expecting " << start - stop + 1 << " characters based on defline\n";
00488 std::cerr << "Actually read " << chars << " characters of sequence\n";
00489 Throw_gnEx(InvalidFileFormat());
00490 }
00491 }
00492
00493 if( chars > max_len )
00494 max_len = aln_mat[ seqI - 1 ].length();
00495
00496 if( cur_line.size() == 0 )
00497 break;
00498
00499 if( cur_line[ 0 ] != '>' ){
00500 GappedAlignment *new_cr = new GappedAlignment( aln_mat.size(), max_len );
00501 for( uint seqJ = 0; seqJ < seqI; seqJ++ ){
00502 new_cr->SetStart( seqJ, cr.Start( seqJ ) );
00503 new_cr->SetLength( lengths[ seqJ ], seqJ );
00504 cr.SetStart( seqJ, NO_MATCH );
00505 }
00506 for( uint seqJ = 0; seqJ < seqI; seqJ++ )
00507 aln_mat[seqJ].resize( max_len, '-' );
00508
00509 new_cr->SetAlignment(aln_mat);
00510 lengths.clear();
00511 if( seq_count < seqI )
00512 seq_count = seqI;
00513
00514 ga_list.push_back( new_cr );
00515
00516 max_len = 0;
00517 aln_mat.clear();
00518
00519
00520 if( cur_line[ 0 ] != '=' )
00521 break;
00522
00523 while( std::getline( in_stream, cur_line ) ){
00524 line_count++;
00525 if( cur_line[ 0 ] == '>' )
00526 break;
00527 }
00528 if( cur_line[ 0 ] != '>' )
00529 break;
00530 }
00531 }
00532
00533
00534 for( uint ivI = 0; ivI < ga_list.size(); ivI++ ){
00535 GappedAlignment* cr = ga_list[ ivI ];
00536 GappedAlignment* new_cr = new GappedAlignment( seq_count, cr->AlignmentLength() );
00537
00538 const std::vector< std::string >& align_matrix = GetAlignment( *cr, seq_table );
00539 std::vector< std::string > new_aln_mat(seq_count);
00540 for( seqI = 0; seqI < align_matrix.size(); seqI++ ){
00541 new_cr->SetLength( cr->Length( seqI ), seqI );
00542 new_cr->SetStart( seqI, cr->Start(seqI) );
00543 new_aln_mat[ seqI ] = align_matrix[ seqI ];
00544 if( new_aln_mat[ seqI ].length() == 0 )
00545 new_aln_mat[ seqI ] = std::string( new_cr->AlignmentLength(), '-' );
00546 }
00547 for( ; seqI < seq_count; seqI++ ){
00548 new_cr->SetLength( 0, seqI );
00549 new_cr->SetStart( seqI, 0 );
00550 new_aln_mat[ seqI ] = std::string( new_cr->AlignmentLength(), '-' );
00551 }
00552 new_cr->SetAlignment(new_aln_mat);
00553 delete cr;
00554 cr = new_cr;
00555 ga_list[ ivI ] = new_cr;
00556
00557 std::vector<AbstractMatch*> asdf(1, cr);
00558 Interval iv( asdf.begin(), asdf.end() );
00559 this->push_back( iv );
00560 }
00561 }
00562
00563 template< class MatchType >
00564 void GenericIntervalList<MatchType>::ReadStandardAlignmentCompact( std::istream& in_stream )
00565 {
00566 uint seq_count = 0;
00567 gnSeqI max_len = 0;
00568 std::string cur_line;
00569 std::getline( in_stream, cur_line );
00570 uint seqI = 0;
00571 std::vector< gnSeqI > lengths;
00572 std::vector< GappedAlignment* > ga_list;
00573 GappedAlignment cr;
00574 std::string empty_line;
00575 std::vector< std::string > aln_mat;
00576 uint line_count = 1;
00577 while( true ){
00578
00579 while( cur_line[0] == '#' ){
00580
00581 std::getline( in_stream, cur_line );
00582 line_count++;
00583 std::stringstream ss( cur_line );
00584 std::string token;
00585 std::getline( ss, token, '\t' );
00586 if( token.substr(1, 8) != "Sequence" || token.find( "File" ) == std::string::npos )
00587 continue;
00588 std::getline( ss, token );
00589 seq_filename.push_back( token );
00590 }
00591
00592
00593 std::stringstream line_str( cur_line );
00594 std::getline( line_str, cur_line, '>' );
00595 std::getline( line_str, cur_line, ':' );
00596
00597 std::stringstream parse_str( cur_line );
00598
00599 parse_str >> seqI;
00600
00601 int64 start, stop;
00602 std::getline( line_str, cur_line, '-' );
00603 parse_str.clear();
00604 parse_str.str( cur_line );
00605 parse_str >> start;
00606 line_str >> stop;
00607 std::string strand;
00608 line_str >> strand;
00609
00610 std::string name;
00611 std::getline( line_str, name );
00612
00613
00614 while( aln_mat.size() < seqI )
00615 aln_mat.push_back( empty_line );
00616
00617 gnSeqI chars = 0;
00618 while( std::getline( in_stream, cur_line ) ){
00619 line_count++;
00620 if( (cur_line[ 0 ] == '>' ) || (cur_line[ 0 ] == '=' ))
00621 break;
00622 for( uint charI = 0; charI < cur_line.length(); charI++ )
00623 if( cur_line[ charI ] != '-' )
00624 chars++;
00625 aln_mat[ seqI - 1 ] += cur_line;
00626 }
00627 while( lengths.size() < seqI )
00628 lengths.push_back(0);
00629
00630 lengths[ seqI - 1 ] = chars;
00631
00632
00633 if( strand == "+" )
00634 cr.SetStart( seqI - 1, start );
00635 else if( start < stop ){
00636 if( chars == 0 )
00637 cr.SetStart( seqI - 1, 0 );
00638 else
00639 cr.SetStart( seqI - 1, -start );
00640 if( chars != stop - start + 1 && !(chars == 0 && stop - start == 1) ){
00641 std::cerr << "Error in XMFA file format\n";
00642 std::cerr << "Before line " << line_count << std::endl;
00643 std::cerr << "Expecting " << stop - start + 1 << " characters based on defline\n";
00644 std::cerr << "Actually read " << chars << " characters of sequence\n";
00645 Throw_gnEx(InvalidFileFormat());
00646 }
00647 }else{
00648 if( chars == 0 )
00649 cr.SetStart( seqI - 1, 0 );
00650 else
00651 cr.SetStart( seqI - 1, -stop );
00652 if( chars != start - stop + 1 && !(chars == 0 && stop - start == 1) ){
00653 std::cerr << "Error in XMFA file format\n";
00654 std::cerr << "Before line " << line_count << std::endl;
00655 std::cerr << "Expecting " << start - stop + 1 << " characters based on defline\n";
00656 std::cerr << "Actually read " << chars << " characters of sequence\n";
00657 Throw_gnEx(InvalidFileFormat());
00658 }
00659 }
00660
00661 if( chars > max_len )
00662 max_len = aln_mat[ seqI - 1 ].length();
00663
00664 if( cur_line.size() == 0 )
00665 break;
00666
00667 if( cur_line[ 0 ] != '>' ){
00668 GappedAlignment *new_cr = new GappedAlignment( aln_mat.size(), max_len );
00669 for( uint seqJ = 0; seqJ < seqI; seqJ++ ){
00670 new_cr->SetStart( seqJ, cr.Start( seqJ ) );
00671 new_cr->SetLength( lengths[ seqJ ], seqJ );
00672 cr.SetStart( seqJ, NO_MATCH );
00673 }
00674 for( uint seqJ = 0; seqJ < seqI; seqJ++ )
00675 aln_mat[seqJ].resize( max_len, '-' );
00676
00677 new_cr->SetAlignment(aln_mat);
00678 lengths.clear();
00679 if( seq_count < seqI )
00680 seq_count = seqI;
00681
00682 ga_list.push_back( new_cr );
00683
00684 max_len = 0;
00685 aln_mat.clear();
00686
00687
00688 if( cur_line[ 0 ] != '=' )
00689 break;
00690
00691 while( std::getline( in_stream, cur_line ) ){
00692 line_count++;
00693 if( cur_line[ 0 ] == '>' )
00694 break;
00695 }
00696 if( cur_line[ 0 ] != '>' )
00697 break;
00698 }
00699 }
00700
00701
00702
00703 for( uint ivI = 0; ivI < ga_list.size(); ivI++ )
00704 {
00705 GappedAlignment* cr = ga_list[ ivI ];
00706 uint compact_seq_count = cr->SeqCount();
00707 CompactGappedAlignment<>* new_cr = new CompactGappedAlignment<>(compact_seq_count, cr->AlignmentLength() );
00708 const std::vector< std::string > align_matrix = GetAlignment( *cr, seq_table );
00709
00710
00711 std::vector< std::string > new_aln_mat(compact_seq_count);
00712 for( seqI = 0; seqI < compact_seq_count; seqI++ ){
00713 new_cr->SetLength( cr->Length( seqI ), seqI );
00714 new_cr->SetStart( seqI, cr->Start(seqI) );
00715 new_aln_mat[ seqI ] = align_matrix[ seqI ];
00716 if( new_aln_mat[ seqI ].length() == 0 )
00717 new_aln_mat[ seqI ] = std::string( new_cr->AlignmentLength(), '-' );
00718 }
00719
00720 for( ; seqI < compact_seq_count; seqI++ ){
00721 new_cr->SetLength( 0, seqI );
00722 new_cr->SetStart( seqI, 0 );
00723 new_aln_mat[ seqI ] = std::string( new_cr->AlignmentLength(), '-' );
00724 }
00725
00726 new_cr->SetAlignment( new_aln_mat );
00727 delete cr;
00728
00729
00730
00731 Interval iv;
00732 this->push_back( iv );
00733 std::vector< AbstractMatch* > matches(1, new_cr);
00734 this->back().SetMatches( matches );
00735 }
00736 }
00737
00738
00739 template< class MatchType >
00740 void GenericIntervalList<MatchType>::WriteAlignedSequences(std::ostream& match_file) const
00741 {
00742
00743 unsigned int seq_count = seq_table.size();
00744 uint seqI;
00745
00746 match_file << "mauveAligner data\n";
00747 match_file << "FormatVersion" << '\t' << 5 << "\n";
00748 match_file << "SequenceCount" << '\t' << seq_count << "\n";
00749 for(seqI = 0; seqI < seq_count; seqI++){
00750 match_file << "Sequence" << seqI << "File" << '\t';
00751 if( seq_filename.size() > seqI )
00752 match_file << seq_filename[seqI];
00753 else
00754 match_file << "null";
00755 match_file << "\n";
00756 match_file << "Sequence" << seqI << "Length" << '\t';
00757 if( seq_table.size() > seqI )
00758 match_file << seq_table[seqI]->length();
00759 else
00760 match_file << "0";
00761 match_file << "\n";
00762 }
00763
00764 match_file << "AlignmentCount" << '\t' << this->size() << std::endl;
00765
00766 if( this->size() == 0 )
00767 return;
00768
00769 for( uint ivI = 0; ivI < this->size(); ivI++ ){
00770
00771 match_file << (*this)[ ivI ].AlignmentLength();
00772 for( seqI = 0; seqI < seq_count; seqI++ )
00773 match_file << '\t' << (*this)[ ivI ].Start( seqI );
00774 match_file << std::endl;
00775
00776 std::vector<std::string> alignment;
00777 GetAlignment( (*this)[ ivI ], this->seq_table, alignment );
00778 for( seqI = 0; seqI < seq_count; seqI++ )
00779 match_file << alignment[ seqI ] << std::endl;
00780 match_file << std::endl;
00781 }
00782
00783 }
00784
00785
00786 }
00787
00788 #endif //_IntervalList_h_