libMems/gnAlignedSequences.cpp

Go to the documentation of this file.
00001 /*******************************************************************************
00002  * $Id: gnAlignedSequences.cpp,v 1.11 2004/03/01 02:40:08 darling Exp $
00003  * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
00004  * Please see the file called COPYING for licensing, copying, and modification
00005  * Please see the file called COPYING for licensing details.
00006  * **************
00007  ******************************************************************************/
00008 
00009 #ifdef HAVE_CONFIG_H
00010 #include "config.h"
00011 #endif
00012 
00013 #include "libMems/gnAlignedSequences.h"
00014 #include <sstream>
00015 
00016 using namespace std;
00017 using namespace genome;
00018 namespace mems {
00019 
00020 gnAlignedSequences::gnAlignedSequences()
00021 {
00022         alignedSequenceFileName = "";
00023         
00024         
00025 }
00026 
00027 
00028 gnAlignedSequences::gnAlignedSequences(const gnAlignedSequences &toCopy)
00029 {
00030         alignedSequenceFileName = toCopy.alignedSequenceFileName;
00031         consensus = toCopy.consensus;
00032         
00033         names = toCopy.names;
00034         sequences = toCopy.sequences;
00035         positions = toCopy.positions;
00036 }
00037 
00038 
00039 void gnAlignedSequences::constructFromClustalW(string alignedFileName)
00040 {
00041         alignedSequenceFileName = alignedFileName;
00042         
00043         readClustalWAlignment();
00044         buildConsensus();
00045         
00046         indexPositions.resize(consensus.size());
00047         for (int i=0; i<consensus.size(); i++)
00048                 indexPositions[i] = i+1;
00049 }
00050 
00051 
00052 void gnAlignedSequences::constructFromPhylip(string alignedFileName)
00053 {
00054         alignedSequenceFileName = alignedFileName;
00055         
00056         readPhylipAlignment();
00057         buildConsensus();
00058         
00059         indexPositions.resize(consensus.size());
00060         for (int i=0; i<consensus.size(); i++)
00061                 indexPositions[i] = i+1;
00062 }
00063 
00064 
00065 void gnAlignedSequences::constructFromMSF(string alignedFileName)
00066 {
00067         alignedSequenceFileName = alignedFileName;
00068         
00069         readMSFAlignment();
00070         buildConsensus();
00071         
00072         indexPositions.resize(consensus.size());
00073         for (int i=0; i<consensus.size(); i++)
00074                 indexPositions[i] = i+1;
00075 }
00076 
00077 
00078 void gnAlignedSequences::constructFromRelaxedNexus( istream& align_stream ){
00079         readRelaxedNexusAlignment( align_stream );
00080 //      buildConsensus();
00081         
00082 //      indexPositions.resize(consensus.size());
00083 //      for (int i=0; i<consensus.size(); i++)
00084 //              indexPositions[i] = i+1;
00085 }
00086 
00087 void gnAlignedSequences::constructFromNexus(string alignedFileName)
00088 {
00089         alignedSequenceFileName = alignedFileName;
00090         
00091         readNexusAlignment();
00092         buildConsensus();
00093         
00094         indexPositions.resize(consensus.size());
00095         for (int i=0; i<consensus.size(); i++)
00096                 indexPositions[i] = i+1;
00097 }
00098 
00099 
00100 void gnAlignedSequences::constructFromMega(string alignedFileName)
00101 {
00102         alignedSequenceFileName = alignedFileName;
00103         
00104         readMegaAlignment();
00105         buildConsensus();
00106         
00107         indexPositions.resize(consensus.size());
00108         for (int i=0; i<consensus.size(); i++)
00109                 indexPositions[i] = i+1;
00110 }
00111 
00112 const vector< string >& gnAlignedSequences::getSupportedFormats()
00113 {
00114         static vector< string > formats;
00115         if( formats.size() == 0 ){
00116                 formats.push_back( "phylip" );
00117                 formats.push_back( "clustal" );
00118                 formats.push_back( "msf" );
00119                 formats.push_back( "nexus" );
00120                 formats.push_back( "mega" );
00121                 formats.push_back( "codon" );
00122         }
00123         return formats;
00124 }
00125 
00126 boolean gnAlignedSequences::isSupportedFormat( const string& format_name )
00127 {
00128         const vector< string >& formats = getSupportedFormats();
00129         for( int formatI = 0; formatI < formats.size(); formatI++ ){
00130                 if( formats[ formatI ] == format_name )
00131                         return true;
00132         }
00133         return false;
00134 }
00135 void gnAlignedSequences::output( const string& format_name, ostream& os ) const
00136 {
00137         bool rval = false;
00138 
00139         if( format_name == "phylip" )
00140                 rval = outputPhylip( os );
00141 
00142         if( format_name == "clustal" )
00143                 rval = outputClustalW( os );
00144 
00145         if( format_name == "msf" )
00146                 rval = outputMSF( os );
00147 
00148         if( format_name == "nexus" )
00149                 rval = outputNexus( os );
00150 
00151         if( format_name == "mega" )
00152                 rval = outputMega( os );
00153         
00154         if( format_name == "codon" )
00155                 rval = outputCodon( os );
00156         
00157         if( !rval )
00158                 throw "Error writing alignment\n";
00159 
00160 }
00161 
00162 bool gnAlignedSequences::outputPhylip(ostream& os) const
00163 {
00164         
00165         os << "Sequences in Alignment: " << sequences.size()
00166                 << "  Bases in Each Aligned Sequence: " << sequences[0].length() << endl;
00167         
00168         int offset = 10;
00169         uint seqI;
00170         for( seqI = 0; seqI < sequences.size(); seqI++ )
00171         {
00172                 int position = 0;
00173                 const string& seq = sequences[ seqI ];
00174                 string seqName = names[ seqI ].substr( 0, offset );
00175                 seqName.append( offset - seqName.length() + 1, ' ' ); 
00176                 
00177                 os << seqName;
00178 
00179                 for ( position=0; position + offset < seq.size(); position += offset){
00180                         if ( position % 50 == 0)
00181                                 os << endl;
00182                         os.write( seq.data() + position, offset );
00183                         os << ' ';
00184                 }
00185 
00186                 if ( position % 50 == 0)
00187                         os << endl;
00188 
00189                 os.write( seq.data() + position, seq.size() - position );
00190                 os << endl;
00191         }
00192         
00193         return true;
00194 }
00195 
00196 uint64 countGaps( string& seq );
00197 uint64 countGaps( string& seq ){
00198         uint gap_count = 0;
00199         for( uint charI = 0; charI < seq.length(); charI++ )
00200                 if( seq[ charI ] == '-' )
00201                         gap_count++;
00202         return gap_count;
00203 }
00204 
00205 bool gnAlignedSequences::outputClustalW(ostream& os) const
00206 {
00207         boolean output_positions = true;
00208         
00209         os << "Clustal W multiple sequence alignment" << endl;
00210         
00211         vector< int64 > seq_pos( sequences.size(), 0 );
00212         if( positions.size() == sequences.size() )
00213                 seq_pos = positions;
00214         vector< string > seq_names;
00215         int pos;
00216         uint seqI = 0;
00217         int longestNameSize = 0;
00218         for( ; seqI < sequences.size(); seqI++ )
00219         {
00220                 seq_names.push_back( names[ seqI ].substr( 0, 30 ) );
00221                 if ( seq_names[ seq_names.size() - 1 ].length() > longestNameSize)
00222                         longestNameSize=seq_names[ seq_names.size() - 1 ].length();
00223         }
00224         // add space padding to the names
00225         for( seqI = 0; seqI < seq_names.size(); seqI++ )
00226                 seq_names[ seqI ] += string( (longestNameSize - seq_names[ seqI ].length()) + 6, ' ' ); 
00227         for (pos=0; pos+60 < alignedSeqsSize(); pos+=60)
00228         {
00229                 os << endl
00230                    << endl;
00231                 for( seqI = 0; seqI < sequences.size(); seqI++ )
00232                 {
00233                         os << seq_names[ seqI ];
00234                         const string& seq = sequences[ seqI ];
00235                         string cur_seq = seq.substr( pos, 60 );
00236                         os << cur_seq;
00237                         if( output_positions ){
00238                                 seq_pos[ seqI ] += 60 - countGaps( cur_seq );
00239                                 os << " " << seq_pos[ seqI ];
00240                         }
00241                         os << endl;
00242                 }
00243         }
00244         
00245         if (pos<alignedSeqsSize())
00246         {
00247                 os << endl
00248                    << endl;
00249         
00250                 for( seqI = 0; seqI < sequences.size(); seqI++ )
00251                 {
00252                         os << seq_names[ seqI ];
00253                         const string& seq = sequences[ seqI ];
00254                         string cur_seq = seq.substr( pos, 60 );
00255                         os << cur_seq;
00256                         if( output_positions ){
00257                                 seq_pos[ seqI ] += 60 - countGaps( cur_seq );
00258                                 os << " " << seq_pos[ seqI ];
00259                         }
00260                         os << endl;
00261                 }
00262         }                  
00263         return true;
00264 }
00265 
00266 
00267 bool gnAlignedSequences::outputMSF(ostream& os) const
00268 {
00269         os << "//" << endl;
00270         
00271         list <pair <string*, string*> >::const_iterator sequenceItr = alignedSequences.begin();
00272         int longestSeqNameLength = 0;
00273         for ( ; sequenceItr!=alignedSequences.end(); sequenceItr++)
00274         {
00275                 if ((*(*sequenceItr).first).length() > longestSeqNameLength)
00276                         longestSeqNameLength = (*(*sequenceItr).first).length();
00277         }
00278         
00279         int pos = 0;
00280         for ( ; pos+60<(*(*alignedSequences.begin()).second).size(); pos+=60)
00281         {
00282                 // output spaces until sequence ordinates
00283                 for (int i=0; i<longestSeqNameLength+2; i++)
00284                         os << " ";
00285                 
00286                 os << pos+1;
00287                 for (int i=0; i<54; i++) // output appropriate number of spaces on ordinate line
00288                         os << " ";
00289                 os << pos+60 << endl;
00290                 
00291                 for (sequenceItr=alignedSequences.begin(); sequenceItr!=alignedSequences.end(); sequenceItr++)
00292                 {
00293                         int spaces = longestSeqNameLength-(*(*sequenceItr).first).length();
00294                         for (int i=0; i<spaces; i++)
00295                                 os << " ";
00296                                 
00297                         os << (*(*sequenceItr).first) << "  ";
00298                         
00299                         string seq = (*(*sequenceItr).second).substr(pos, 60);
00300                         for (int i=0; i<60; i++)
00301                         {
00302                                 if (seq[i]=='-')
00303                                         os << ".";
00304                                 else
00305                                         os << seq[i];
00306                         }
00307                         os << endl;
00308                 }
00309                 
00310                 os << endl;
00311         }
00312         
00313         if (pos<(*(*alignedSequences.begin()).second).size())
00314         {
00315                 // output spaces until sequence ordinates
00316                 for (int i=0; i<longestSeqNameLength+2; i++)
00317                         os << " ";
00318                 
00319                 os << pos+1;
00320                 for (int i=0; i<(*(*alignedSequences.begin()).second).size()-pos; i++) // output appropriate number of spaces on ordinate line
00321                         os << " ";
00322                 os << (*(*alignedSequences.begin()).second).size() << endl;
00323                 
00324                 for (sequenceItr=alignedSequences.begin(); sequenceItr!=alignedSequences.end(); sequenceItr++)
00325                 {
00326                         int spaces = longestSeqNameLength-(*(*sequenceItr).first).length();
00327                         for (int i=0; i<spaces; i++)
00328                                 os << " ";
00329                                 
00330                         os << (*(*sequenceItr).first) << "  ";
00331                         
00332                         string seq = (*(*sequenceItr).second).substr(pos, (*(*alignedSequences.begin()).second).size()-pos );
00333                         for (int i=0; i<seq.length(); i++)
00334                         {
00335                                 if (seq[i]=='-')
00336                                         os << ".";
00337                                 else
00338                                         os << seq[i];
00339                         }
00340                         os << endl;
00341                 }
00342                 
00343                 os << endl;
00344         }
00345         
00346         return false;
00347 }
00348 
00349 
00350 
00351 bool gnAlignedSequences::outputNexus(ostream& os) const
00352 {
00353         os << "begin data;" << endl
00354            << "  dimensions ntax=" << sequences.size();
00355         if( sequences.size() == 0 )
00356                 return true;
00357         os << " nchar=" 
00358            << sequences[0].length() << ";" << endl
00359            << "  ;" << endl
00360            << "  matrix" << endl;
00361            
00362         list <pair <string*, string*> >::const_iterator sequenceItr = alignedSequences.begin();
00363         int i;
00364         int seqI;
00365         int longestSeqNameLength = 0;
00366         for( seqI = 0; seqI < sequences.size(); seqI++ ){
00367                 if( names[ seqI ].length() > longestSeqNameLength )
00368                         longestSeqNameLength = names[ seqI ].length();
00369         }
00370         
00371         int pos = 1;
00372         for ( ; pos+59 < sequences[0].size(); pos+=60)
00373         {
00374                 os << "[";
00375                 // output spaces until sequence ordinates
00376                 for (i = 0; i < longestSeqNameLength+2; i++)
00377                         os << " ";
00378                 
00379                 os << pos;
00380                 for (i = 0; i < 54; i++) // output appropriate number of spaces on ordinate line
00381                         os << " ";
00382                 os << pos+59 << "]" << endl;
00383                 
00384                 for( seqI = 0; seqI < sequences.size(); seqI++ )
00385                 {
00386                         os << names[ seqI ];
00387                         
00388                         int spaces = longestSeqNameLength - names[ seqI ].length();
00389                         for (i = 0; i < spaces + 2; i++)
00390                                 os << " ";
00391                                 
00392                         string seq = sequences[ seqI ].substr( pos, 60 );
00393                         os << seq << endl;
00394                 }
00395                 
00396                 os << endl;
00397         }
00398         
00399         // write out the last little bit
00400         if (pos - 1 < sequences[0].size())
00401         {
00402                 // output spaces until sequence ordinates
00403                 os << "[";
00404                 // output spaces until sequence ordinates
00405                 for (i = 0; i < longestSeqNameLength + 2; i++)
00406                         os << " ";
00407                 
00408                 os << pos;
00409                 for (i=0; i < sequences[0].size() - pos + 1; i++) // output appropriate number of spaces on ordinate line
00410                         os << " ";
00411                 os << pos+59 << "]" << endl;
00412                 
00413                 for (sequenceItr = alignedSequences.begin(); sequenceItr != alignedSequences.end(); sequenceItr++)
00414                 for( seqI = 0; seqI < sequences.size(); seqI++ )
00415                 {
00416                         os << names[ seqI ];
00417                         
00418                         int spaces = longestSeqNameLength - names[ seqI ].length();
00419                         for (i=0; i<spaces+2; i++)
00420                                 os << " ";
00421                                 
00422                         string seq = sequences[seqI].substr( pos, sequences[seqI].size()-pos+1 );
00423                         os << seq << endl;
00424                 }
00425                 
00426                 os << endl;
00427         }
00428         
00429         return true;
00430 }
00431 /*
00432 bool gnAlignedSequences::outputNexus(ostream& os) const
00433 {
00434         os << "begin data;" << endl
00435            << "  dimensions ntax=" << alignedSequences.size() << " nchar=" 
00436            << alignedSequences.begin()->second->size() << ";" << endl
00437            << "  ;" << endl
00438            << "  matrix" << endl;
00439            
00440         list <pair <string*, string*> >::const_iterator sequenceItr = alignedSequences.begin();
00441         int i;
00442         int longestSeqNameLength = 0;
00443         for ( ; sequenceItr != alignedSequences.end(); sequenceItr++)
00444         {
00445                 if ( sequenceItr->first->length() > longestSeqNameLength )
00446                         longestSeqNameLength = sequenceItr->first->length();
00447         }
00448         
00449         int pos = 1;
00450         for ( ; pos+59 < alignedSequences.begin()->second->size(); pos+=60)
00451         {
00452                 os << "[";
00453                 // output spaces until sequence ordinates
00454                 for (i = 0; i < longestSeqNameLength+2; i++)
00455                         os << " ";
00456                 
00457                 os << pos;
00458                 for (i = 0; i < 54; i++) // output appropriate number of spaces on ordinate line
00459                         os << " ";
00460                 os << pos+59 << "]" << endl;
00461                 
00462                 for (sequenceItr=alignedSequences.begin(); sequenceItr != alignedSequences.end(); sequenceItr++)
00463                 {
00464                         os << (*(*sequenceItr).first);
00465                         
00466                         int spaces = longestSeqNameLength - sequenceItr->first->length();
00467                         for (i = 0; i < spaces + 2; i++)
00468                                 os << " ";
00469                                 
00470                         string seq = sequenceItr->second->substr( pos, 60 );
00471                         for (i = 0; i < 60; i++)
00472                                 os << seq[i];
00473                         os << endl;
00474                 }
00475                 
00476                 os << endl;
00477         }
00478         
00479         if (pos - 1 < alignedSequences.begin()->second->size())
00480         {
00481                 // output spaces until sequence ordinates
00482                 os << "[";
00483                 // output spaces until sequence ordinates
00484                 for (i = 0; i < longestSeqNameLength + 2; i++)
00485                         os << " ";
00486                 
00487                 os << pos;
00488                 for (i=0; i < alignedSequences.begin()->second->size() - pos + 1; i++) // output appropriate number of spaces on ordinate line
00489                         os << " ";
00490                 os << pos+59 << "]" << endl;
00491                 
00492                 for (sequenceItr = alignedSequences.begin(); sequenceItr != alignedSequences.end(); sequenceItr++)
00493                 {
00494                         os << *(sequenceItr->first);
00495                         
00496                         int spaces = longestSeqNameLength-(*(*sequenceItr).first).length();
00497                         for (i=0; i<spaces+2; i++)
00498                                 os << " ";
00499                                 
00500                         string seq = (*(*sequenceItr).second).substr( pos, (*(*alignedSequences.begin()).second).size()-pos+1 );
00501                         for (i=0; i<seq.length(); i++)
00502                                 os << seq[i];
00503                         os << endl;
00504                 }
00505                 
00506                 os << endl;
00507         }
00508         
00509         return false;
00510 }
00511 */
00512 bool gnAlignedSequences::outputMega(ostream& os) const
00513 {
00514         os << "#MEGA" << endl
00515            << "TITLE:" << endl;
00516            
00517         list <pair <string*, string*> >::const_iterator sequenceItr = alignedSequences.begin();
00518         int longestSeqNameLength = 0;
00519 
00520         for ( ; sequenceItr!=alignedSequences.end(); sequenceItr++){
00521                 if (sequenceItr->first->length() > longestSeqNameLength)
00522                         longestSeqNameLength = sequenceItr->first->length();
00523         }
00524         
00525         gnSeqI pos = 1;
00526         gnSeqI remaining_len = alignedSequences.begin()->second->size();        //determine the amount to be written
00527         // loop while there is more to write
00528         while(remaining_len > 0){
00529                 os << endl;
00530                 gnSeqI write_chars = MEGA_ALIGN_COLUMNS < remaining_len ? MEGA_ALIGN_COLUMNS : remaining_len;
00531 
00532                 //write each sequence's line
00533                 for (sequenceItr = alignedSequences.begin(); sequenceItr != alignedSequences.end(); sequenceItr++){
00534                         os << "#" << *(sequenceItr->first);
00535                         
00536                         int spaces = longestSeqNameLength - sequenceItr->first->length();
00537                         for (int i = 0; i < spaces + 5; i++)
00538                                 os << " ";
00539 
00540                         string seq = sequenceItr->second->substr( pos, write_chars );
00541                         for (int i = 0; i < write_chars; i++)
00542                                 os << seq[i];
00543                         os << endl;
00544                 }
00545                 os << endl;
00546 
00547                 pos += write_chars;
00548                 remaining_len -= write_chars;
00549         }
00550         return true;
00551 }
00552 
00553 
00554 bool gnAlignedSequences::outputCodon(ostream& os) const
00555 {
00556         list <pair <string*, string*> >::const_iterator sequenceItr = alignedSequences.begin();
00557         
00558         os << '\t' << alignedSequences.size() << '\t' << (*(*sequenceItr).second).size() << endl;
00559         
00560         int offset = 10;
00561         for ( ; sequenceItr != alignedSequences.end(); sequenceItr++)
00562         {
00563                 int position = 0;
00564                 string seq = (*(*sequenceItr).second);
00565                 string seqName = (*(*sequenceItr).first);
00566                 if (seqName.size() <= offset) 
00567                 {
00568                         for (int i=seqName.size(); i<offset; i++)
00569                                 seqName += " ";
00570                 }
00571                 
00572                 else 
00573                 {
00574                         string temp = seqName;
00575                         seqName = "";
00576                         for (int i=0; i<offset; i++)
00577                                 seqName += temp[i];
00578                 }
00579                 
00580                 os << seqName;
00581                 int count = 0;
00582                 for ( ; position+3<seq.size(); position+=3)
00583                 {
00584                         if (count == 20)
00585                         {
00586                                 count = 0;
00587                                 os << endl;
00588                         }
00589                         for (int i=position; i<position+3; i++)
00590                            os << seq[i];
00591                            
00592                         os << ' ';
00593                         count++;
00594                 }
00595                 
00596                 for ( ; position < seq.size(); position++)
00597                         os << seq[position];
00598                 
00599                 os << endl;
00600         }
00601         
00602         return false;
00603 }
00604 
00605 
00606 bool gnAlignedSequences::outputWithConsensus(ostream& os)
00607 {
00608         list <pair <string*, string*> >::iterator sequenceItr = alignedSequences.begin();
00609         
00610         os << '\t' << alignedSequences.size() << '\t' << (*(*sequenceItr).second).size() << endl;
00611         
00612         int offset = 10;
00613         for ( ; sequenceItr != alignedSequences.end(); sequenceItr++)
00614         {
00615                 int position = 0;
00616                 string seq = (*(*sequenceItr).second);
00617                 string seqName = (*(*sequenceItr).first);
00618                 if (seqName.size() <= offset) 
00619                 {
00620                         for (int i=seqName.size(); i<offset; i++)
00621                                 seqName += " ";
00622                 }
00623                 
00624                 else 
00625                 {
00626                         string temp = seqName;
00627                         seqName = "";
00628                         for (int i=0; i<offset; i++)
00629                                 seqName += temp[i];
00630                 }
00631                 
00632                 os << seqName;
00633                 int count = 0;
00634                 for ( ; position+10<seq.size(); position+=10)
00635                 {
00636                         if (count == 5)
00637                         {
00638                                 count = 0;
00639                                 os << endl;
00640                         }
00641                         for (int i=position; i<position+10; i++)
00642                            os << seq[i];
00643                            
00644                         os << ' ';
00645                         count++;
00646                 }
00647                 
00648                 for ( ; position < seq.size(); position++)
00649                         os << seq[position];
00650                 
00651                 os << endl;
00652         }
00653         
00654         int position = 0;
00655         int count = 0;
00656         os << "Consensus:";
00657         for ( ; position+10<consensus.size(); position+=10)
00658         {
00659                 if (count == 5)
00660                 {
00661                         count = 0;
00662                         os << endl;
00663                 }
00664                 for (int i=position; i<position+10; i++)
00665                    os << consensus[i];
00666                    
00667                 os << ' ';
00668                 count++;
00669         }
00670         
00671         for ( ; position < consensus.size(); position++)
00672                 os << consensus[position];
00673         
00674         os << endl;
00675         
00676         return false;
00677 }
00678 
00679 
00680 gnAlignedSequences gnAlignedSequences::getAlignedSegment(unsigned start, unsigned stop)
00681 {
00682         gnAlignedSequences newAlignment;
00683         
00684         addAllSegments(newAlignment, start, stop);
00685         newAlignment.buildConsensus();
00686         
00687         return newAlignment;
00688 }
00689 
00690 
00691 gnAlignedSequences gnAlignedSequences::getCodons(int readingFrame, int startCodon, int codonMultiple)
00692 {
00693         gnAlignedSequences toReturn;
00694         int startBase = ((startCodon*3)-2)+(readingFrame-1);
00695         
00696         for (int index=startBase; (index+2)<(*(*alignedSequences.begin()).second).size(); index+=(codonMultiple*3))
00697                 addAllSegmentsReplaceGaps(toReturn, index, index+2);
00698                 
00699         toReturn.buildConsensus();
00700         
00701         return toReturn;
00702 }
00703 
00704 
00705 gnSeqI gnAlignedSequences::alignedSeqsSize() const
00706 {
00707         if( sequences.size() > 0 )
00708                 return sequences[ 0 ].size();
00709         return 0;
00710 }
00711 
00712 
00713 bool gnAlignedSequences::removeAlignedSeq(string seqName)
00714 {
00715         list <pair <string*, string*> >::iterator sequenceItr = alignedSequences.begin();
00716         
00717         for ( ; sequenceItr != alignedSequences.end(); sequenceItr++)
00718         {
00719                 if ((*(*sequenceItr).first) == seqName)
00720                 {
00721                         alignedSequences.erase(sequenceItr);
00722                         return true;
00723                 }
00724         }       
00725         
00726         return false;
00727 }
00728 
00729 
00730 bool gnAlignedSequences::removeAlignedSeq(unsigned index)
00731 {
00732         list <pair <string*, string*> >::iterator sequenceItr = alignedSequences.begin();
00733         int i = 0;
00734         
00735         for ( ; sequenceItr != alignedSequences.end(); sequenceItr++)
00736         {
00737                 if (i == index)
00738                 {
00739                         alignedSequences.erase(sequenceItr);
00740                         return true;
00741                 }
00742                 
00743                 i++;
00744         }       
00745         
00746         return false;
00747 }
00748 
00749 
00750 void gnAlignedSequences::concatenateAlignedSequences(gnAlignedSequences toConcat)
00751 {
00752         list <pair <string*, string*> >::iterator toConcatItr = toConcat.alignedSequences.begin();
00753         list <pair <string*, string*> >::iterator originalItr;
00754         
00755         unsigned largestSeqSize = 0;
00756         
00757         for ( ; toConcatItr != toConcat.alignedSequences.end(); toConcatItr++)
00758         {
00759                 for (originalItr = alignedSequences.begin(); originalItr != alignedSequences.end(); originalItr++)
00760                 {
00761                         if ((*(*originalItr).second).size() > largestSeqSize)
00762                                 largestSeqSize = (*(*originalItr).second).size();
00763                         
00764                         if ((*(*toConcatItr).first) == (*(*originalItr).first)) {
00765                                 string seq = (*(*originalItr).second);
00766                                 seq += (*(*toConcatItr).second);
00767                                 (*(*originalItr).second) = seq;
00768                                 break;
00769                         }
00770                 }
00771         }
00772         
00773         for (originalItr = alignedSequences.begin(); originalItr != alignedSequences.end(); originalItr++)
00774         {
00775                 while ((*(*originalItr).second).size() < largestSeqSize)
00776                         (*(*originalItr).second).append("-");
00777         }
00778         
00779         buildConsensus();
00780 }
00781 
00782 
00783 void gnAlignedSequences::extractVariableSites(gnAlignedSequences &variableSites, bool countGapsAsMismatches)
00784 {
00785         list <pair <string*, string*> >::iterator originalItr = alignedSequences.begin();
00786         
00787         int alignedSeqSize = (*((*originalItr).second)).size();
00788         
00789         char positionBase;
00790         int matchStart = alignedSeqSize,
00791                 matchStop = alignedSeqSize;
00792                 
00793         bool mismatch = false;
00794         
00795         indexPositions.resize(0);
00796         
00797         for (int position=alignedSeqSize; position > 0; position--)
00798         {
00799                 originalItr = alignedSequences.begin();
00800                 positionBase = (*((*originalItr).second))[position-1];
00801                 while (!countGapsAsMismatches && (*((*originalItr).second))[position-1] == '-')
00802                 {
00803                         originalItr++;
00804                         positionBase = (*((*originalItr).second))[position-1];
00805                         if (originalItr == alignedSequences.end()) break;
00806                 }
00807                 
00808                 if (originalItr == alignedSequences.end()) break;
00809                         
00810                 for ( ; originalItr != alignedSequences.end(); originalItr++)
00811                 {
00812                         // extend matched segment before adding match to variableSites
00813                         // much less expensive to add blocks of sites rather than a single site at a time
00814                         if (positionBase != (*((*originalItr).second))[position-1])// && matchStop==position)
00815                         {
00816                                 if (!(!countGapsAsMismatches && (*((*originalItr).second))[position-1] == '-'))
00817                                 {
00818                                         mismatch = true;
00819                                         break;
00820                                 }
00821                         }
00822                 }
00823                 
00824                 if (!mismatch)
00825                         matchStart--;
00826                 
00827                 else
00828                 {
00829                         matchStart--;
00830                         matchStop = matchStart;
00831                         
00832                         //variableSites.indexPositions.resize(variableSites.indexPositions.size()+1);
00833                         variableSites.indexPositions.push_back(position);//[indexPositions.size()-1]=position;
00834                 }
00835                 
00836                 mismatch = false;
00837         }
00838         
00839         for (int i=variableSites.indexPositions.size()-1; i>=0; i--)
00840                 addAllSegments(variableSites, variableSites.indexPositions[i], variableSites.indexPositions[i]);
00841                 
00842         variableSites.buildConsensus();
00843 }
00844 
00845 
00846 bool gnAlignedSequences::collapseIdenticalSequences()
00847 {
00848         list <pair <string*, string*> >::iterator itr1 = alignedSequences.begin();
00849         list <pair <string*, string*> >::iterator itr2;
00850         bool toReturn = false;
00851         
00852         for ( ; itr1!=alignedSequences.end(); itr1++)
00853         {
00854                 itr2=alignedSequences.begin();
00855                 for (itr2++; itr2!=alignedSequences.end(); itr2++)
00856                 {
00857                         if (((*(*itr1).second)==(*(*itr2).second)) && itr1!=itr2)
00858                         {
00859                                 list <pair <string*, string*> >::iterator itrTemp = itr2;
00860                                 itr2--;
00861                                 alignedSequences.erase(itrTemp);
00862                                 toReturn = true;
00863                         }
00864                 }
00865         }
00866 
00867         return toReturn;
00868 }
00869 
00870 
00871 vector <char> gnAlignedSequences::operator[]( const int offset ) //const
00872 {
00873         vector <char> toReturn;
00874         list <pair <string*, string*> >::iterator itr;
00875         
00876         for (itr=alignedSequences.begin(); itr!=alignedSequences.end(); itr++)
00877                 toReturn.push_back((*(*itr).second)[offset]);
00878         
00879         return toReturn;
00880 }
00881 
00882 
00883 bool gnAlignedSequences::readClustalWAlignment()
00884 {
00885         ifstream alignmentFile;
00886         
00887         alignmentFile.open(alignedSequenceFileName.c_str(), ios::in | ios::binary);
00888         
00889         if (!(alignmentFile.is_open()))
00890         {
00891                 cout << "Unable to open " << alignedSequenceFileName << ".\n"
00892                          << "Exiting.\n";
00893                 
00894                 exit(-1);
00895         }
00896         
00897         string line;
00898         
00899         // REMOVE 1st 3 LINES FROM .ALN FILE - SEQUENCE BEGINS ON LINE 4
00900         getline(alignmentFile, line);
00901         getline(alignmentFile, line);
00902         getline(alignmentFile, line);
00903         
00904         bool constructSuccess = constructClustalWAlignedSequenceList(alignmentFile);
00905         
00906         alignmentFile.close();
00907         
00908         if (constructSuccess) return true;
00909         
00910         return false;
00911 }
00912 
00913 
00914 bool gnAlignedSequences::readPhylipAlignment()
00915 {
00916         ifstream alignmentFile;
00917         
00918         alignmentFile.open(alignedSequenceFileName.c_str(), ios::in | ios::binary);
00919         
00920         if (!(alignmentFile.is_open()))
00921         {
00922                 cout << "Unable to open " << alignedSequenceFileName << ".\n"
00923                          << "Exiting.\n";
00924                 
00925                 exit(-1);
00926         }
00927         
00928         string line;
00929         
00930         // REMOVE 1st LINE FROM PHYLIP FILE - SEQUENCE NUMBER AND LENGTH OF SEQUENCES
00931         getline(alignmentFile, line);
00932         
00933         bool constructSuccess = constructPhylipAlignedSequenceList(alignmentFile);
00934         
00935         alignmentFile.close();
00936         
00937         if (constructSuccess) return true;
00938         
00939         return false;
00940 }
00941 
00942 
00943 bool gnAlignedSequences::readMSFAlignment()
00944 {
00945         ifstream alignmentFile;
00946         
00947         alignmentFile.open(alignedSequenceFileName.c_str(), ios::in | ios::binary);
00948         
00949         if (!(alignmentFile.is_open()))
00950         {
00951                 cout << "Unable to open " << alignedSequenceFileName << ".\n"
00952                          << "Exiting.\n";
00953                 
00954                 exit(-1);
00955         }
00956         
00957         string line;
00958         getline(alignmentFile, line);
00959         
00960         // remove format's initial annotation
00961         while (line.find("//")<0 || line.find("//")>line.size())
00962                 getline(alignmentFile, line);
00963                 
00964         bool constructSuccess = constructMSFAlignedSequenceList(alignmentFile);
00965         
00966         alignmentFile.close();
00967         
00968         if (constructSuccess) return true;
00969         
00970         return false;
00971 }
00972 
00973 
00978 bool gnAlignedSequences::readRelaxedNexusAlignment( istream& align_stream ){
00979         
00980         string line;
00981         string comments;
00982         getline( align_stream, line );
00983         if( line == "#NEXUS" ){
00984                 getline( align_stream, line );
00985         }
00986         if( line[0] == '[' ){
00987                 getline( align_stream, line );
00988                 while( line[0] != ']' ){
00989                         comments += line + "\n";
00990                         getline( align_stream, line );
00991                 }
00992                 getline( align_stream, line );  // possibly empty line
00993                 if( line.size() == 0 )
00994                         getline( align_stream, line );
00995         }
00996         while( line.length() == 0 )
00997                 getline( align_stream, line );
00998         // this is the alignment info line
00999         stringstream align_info( line );
01000         uint seq_count;
01001         gnSeqI align_len;
01002         align_info >> seq_count;
01003         align_info >> align_len;
01004         sequences = vector< string >( seq_count );
01005         // now read in each alignment line
01006         for( uint seqI = 0; seqI < seq_count; seqI++ ){
01007                 align_stream >> line;
01008                 names.push_back( line );
01009 //              getline( align_stream, line );
01010                 align_stream >> sequences[ seqI ];
01011 //              Array< char > seq_data( align_len );
01012 //              align_stream.read( seq_data.data, align_len );
01013 //              sequences.push_back( seq_data.data );
01014         }
01015         
01016         // read off the trailing newline
01017         getline( align_stream, line );
01018         return true;
01019 }
01020 
01021 
01022 bool gnAlignedSequences::readNexusAlignment()
01023 {
01024         ifstream alignmentFile;
01025         
01026         alignmentFile.open(alignedSequenceFileName.c_str(), ios::in | ios::binary);
01027         
01028         if (!(alignmentFile.is_open()))
01029         {
01030                 cout << "Unable to open " << alignedSequenceFileName << ".\n"
01031                          << "Exiting.\n";
01032                 
01033                 exit(-1);
01034         }
01035         
01036         string line;
01037         getline(alignmentFile, line);
01038         
01039         // remove format's initial annotation
01040         while (line.find("begin data;")<0 || line.find("begin data;")>line.length()) // searching for "begin data;"
01041                 getline(alignmentFile, line);
01042                 
01043         bool constructSuccess = constructNexusAlignedSequenceList(alignmentFile);
01044         
01045         alignmentFile.close();
01046         
01047         if (constructSuccess) return true;
01048         
01049         return false;
01050 }
01051 
01052 
01053 bool gnAlignedSequences::readMegaAlignment()
01054 {
01055         ifstream alignmentFile;
01056         
01057         alignmentFile.open(alignedSequenceFileName.c_str(), ios::in | ios::binary);
01058         
01059         if (!(alignmentFile.is_open()))
01060         {
01061                 cout << "Unable to open " << alignedSequenceFileName << ".\n"
01062                          << "Exiting.\n";
01063                 
01064                 exit(-1);
01065         }
01066         
01067         string line;
01068         // remove first three lines from mega file - prior to begining of sequence data
01069         getline(alignmentFile, line);
01070         getline(alignmentFile, line);
01071         getline(alignmentFile, line);
01072                 
01073         bool constructSuccess = constructMegaAlignedSequenceList(alignmentFile);
01074         
01075         alignmentFile.close();
01076         
01077         if (constructSuccess) return true;
01078         
01079         return false;
01080 }
01081 
01082 
01083 bool gnAlignedSequences::constructClustalWAlignedSequenceList(ifstream& alignmentFile)
01084 {
01085         string line;
01086         
01087         // GET THE 1st LINE OF SEQUENCE
01088         getline(alignmentFile, line);
01089         
01090         while (alignmentFile.good())
01091         {
01092                 while (line[0] != ' ' && line[0] != '\0')
01093                 {
01094                         string sequenceName;
01095                         int i;
01096                         for (i=0; line[i] != ' '; i++)
01097                                 sequenceName += line[i];
01098                                 
01099                         const gnFilter* newFilter = gnFilter::fullDNASeqFilter();
01100                         string sequenceBases;
01101                         for(int i=sequenceName.size(); i < line.length(); i++){
01102                                 if ((*newFilter).IsValid(line[i]))
01103                                 sequenceBases += line[i];
01104                         }
01105                         
01106                         list <pair <string*, string*> >::iterator sequenceItr; 
01107                         if (!(sequenceNameInList(sequenceName, sequenceItr)))
01108                         {
01109                                 pair <string*, string*> sequence;
01110                                 sequence.first = new string(sequenceName);
01111                                 
01112                                 sequence.second = new string( sequenceBases );
01113                                 
01114                                 alignedSequences.push_back(sequence);
01115                         }
01116                         
01117                         else
01118                                 (*(*sequenceItr).second).append(sequenceBases);
01119         
01120                         getline(alignmentFile, line);
01121                 }
01122                 
01123                 getline(alignmentFile, line);
01124                 getline(alignmentFile, line);
01125         }
01126         
01127         
01128         return false;
01129 }
01130 
01131 
01132 bool gnAlignedSequences::constructPhylipAlignedSequenceList(ifstream& alignmentFile)
01133 {
01134         string line;
01135         
01136         // GET THE 1st LINE OF SEQUENCE
01137         getline(alignmentFile, line);
01138         
01139         while (alignmentFile.good())
01140         {
01141                 if (line[10]!=' ')
01142                 {
01143                         string sequenceName = line.substr(0,10);
01144                         cout << sequenceName << endl;
01145                         const gnFilter* newFilter = gnFilter::fullDNASeqFilter();
01146                         string sequenceBases;
01147                         for(int i=10; i < line.length(); i++)
01148                         {
01149                                 if ((*newFilter).IsValid(line[i]))
01150                                 sequenceBases += line[i];
01151                         }
01152                 
01153                         pair <string*, string*> sequence;
01154                         sequence.first = new string(sequenceName);
01155                         
01156                         sequence.second = new string( sequenceBases );
01157                         
01158                         alignedSequences.push_back(sequence);
01159                         
01160                         getline(alignmentFile, line);
01161                 }
01162 
01163                 // NOT THE 1st LINE IN SEQUENCE (CONTAINS SEQ NAME)
01164                 else
01165                 {
01166                         string sequenceBases;
01167                         while (line[10]==' ' && line[0]!='\0' && line.length()>0)
01168                         {
01169                                 const gnFilter* newFilter = gnFilter::fullDNASeqFilter();
01170                                 for(int i=0; i < line.length(); i++)
01171                                 {
01172                                         if ((*newFilter).IsValid(line[i]))
01173                                         sequenceBases += line[i];
01174                                 }
01175                                 
01176                                 getline(alignmentFile, line);
01177                         }
01178                         
01179                         list <pair <string*, string*> >::iterator sequenceItr = alignedSequences.end();
01180                         sequenceItr--;
01181                         (*(*sequenceItr).second) += sequenceBases;
01182                 }
01183         }
01184         
01185         return false;
01186 }
01187 
01188 
01189 bool gnAlignedSequences::constructMSFAlignedSequenceList(ifstream& alignmentFile)
01190 {
01191         string line;
01192         
01193         // clear coordinate line
01194         getline(alignmentFile, line);
01195 
01196         while (alignmentFile.good())//line[0] != '\0')
01197         {
01198                 getline(alignmentFile, line); // 1st line of sequence
01199                 while (!coordinates(line))
01200                 {
01201                         string sequenceName;
01202                         int i;
01203 
01204                         for (i=0; line[i] == ' '; i++) {}
01205 
01206                         for (; line[i] != ' '; i++)
01207                                 sequenceName += line[i];
01208                                 
01209                         const gnFilter* newFilter = gnFilter::fullDNASeqFilter();
01210                         string sequenceBases;
01211                         for( ; i < line.length(); i++){
01212                                 if ((*newFilter).IsValid(line[i]))
01213                                 sequenceBases += line[i];
01214                             else if (line[i] == '.' || line[i]=='~')
01215                                 sequenceBases += '-';
01216                         }
01217                         
01218                         list <pair <string*, string*> >::iterator sequenceItr; 
01219                         if (!(sequenceNameInList(sequenceName, sequenceItr)))
01220                         {
01221                                 pair <string*, string*> sequence;
01222                                 sequence.first = new string(sequenceName);
01223                                 
01224                                 sequence.second = new string( sequenceBases );
01225                                 
01226                                 alignedSequences.push_back(sequence);
01227                         }
01228                         
01229                         else
01230                                 (*(*sequenceItr).second).append(sequenceBases);
01231         
01232                         getline(alignmentFile, line);
01233                 }
01234         }       
01235         
01236         return false;
01237 }
01238 
01239 
01240 bool gnAlignedSequences::constructNexusAlignedSequenceList(ifstream& alignmentFile)
01241 {
01242         string line;
01243         
01244         // GET THE 1st LINE OF SEQUENCE
01245         getline(alignmentFile, line);
01246         
01247         // searching for "endblock;"
01248         while (alignmentFile.good() && (line.find("endblock;")<0 || line.find("endblock;")>line.length())) 
01249         {
01250                 while (line[0]!='[' && line[0]!=' ' && line[0]!='\n' && line[0]!='\r' && alignmentFile.good())
01251                 {
01252                         string sequenceName;
01253                         int i;
01254                         for (i=0; line[i] != ' '; i++)
01255                                 sequenceName += line[i];
01256                                 
01257                         const gnFilter* newFilter = gnFilter::fullDNASeqFilter();
01258                         string sequenceBases;
01259                         for(int i=sequenceName.size(); i < line.length(); i++){
01260                                 if ( line[i] != '\r' && line[i] != '\n' && line[i] != ' ' )
01261                                 sequenceBases += line[i];
01262                         }
01263                         
01264                         list <pair <string*, string*> >::iterator sequenceItr; 
01265                         if (!(sequenceNameInList(sequenceName, sequenceItr)))
01266                         {
01267                                 pair <string*, string*> sequence;
01268                                 sequence.first = new string(sequenceName);
01269                                 
01270                                 sequence.second = new string( sequenceBases );
01271                                 
01272                                 alignedSequences.push_back(sequence);
01273                         }
01274                         
01275                         else
01276                                 (*(*sequenceItr).second).append(sequenceBases);
01277         
01278                         getline(alignmentFile, line);
01279                 }
01280                 
01281                 getline(alignmentFile, line);
01282         }
01283         
01284         
01285         return false;
01286 }
01287 
01288 
01289 bool gnAlignedSequences::constructMegaAlignedSequenceList(ifstream& alignmentFile)
01290 {
01291         string line;
01292         string consensusSequenceBases;
01293         list <pair <string*, string*> >::iterator alignedSequencesItr;
01294         
01295         // GET THE 1st LINE OF SEQUENCE
01296         getline(alignmentFile, line);
01297         
01298         int previousLineLength = 0;
01299         
01300         // searching for "endblock;"
01301         while (alignmentFile.good()) 
01302         {
01303                 while (line.length()>0 && line[0]=='#')
01304                 {
01305                         string sequenceName;
01306                         for (int i=1; line[i] != ' '; i++)
01307                                 sequenceName += line[i];
01308                                 
01309                         const gnFilter* newFilter = gnFilter::fullDNASeqFilter();
01310                         string sequenceBases;
01311                         bool isInSeqName = true;
01312                         if (alignedSequences.size()>0)// && consensusSequenceBases.size()>0)
01313                                 consensusSequenceBases = (*(*alignedSequencesItr).second);
01314                                 
01315                         for(int i=sequenceName.size(); i < line.length(); i++)
01316                         {
01317                                 // allow only valid characters to be placed - if '.' replace leter
01318                                 // with consensus data
01319                                 if ((*newFilter).IsValid(line[i]) && !isInSeqName)
01320                                 sequenceBases += line[i];
01321                                 
01322                             else if (line[i] == ' ') isInSeqName=false;
01323                                 
01324                             else if (line[i]=='.' && alignedSequences.size()>0 && !isInSeqName) // a reference to the consensus
01325                                 sequenceBases += consensusSequenceBases[sequenceBases.size()+previousLineLength];
01326                         }
01327                         
01328                         list <pair <string*, string*> >::iterator sequenceItr; 
01329                         if (!(sequenceNameInList(sequenceName, sequenceItr)))
01330                         {
01331                                 pair <string*, string*> sequence;
01332                                 sequence.first = new string(sequenceName);
01333                                 
01334                                 sequence.second = new string( sequenceBases );
01335                                 
01336                                 alignedSequences.push_back(sequence);
01337                                 alignedSequencesItr = alignedSequences.begin();
01338                         }
01339                         
01340                         else
01341                                 (*(*sequenceItr).second).append(sequenceBases);
01342         
01343                         getline(alignmentFile, line);
01344                 }
01345                 
01346                 if (alignedSequences.size() > 0)
01347                         previousLineLength = (*(*alignedSequences.begin()).second).size();
01348                 
01349                 getline(alignmentFile, line);
01350         }
01351         
01352         
01353         return false;
01354 }
01355 
01356 
01357 int gnAlignedSequences::sequenceNameInList( string& sequenceName ){
01358         for( uint nameI = 0; nameI < names.size(); nameI++ ){
01359                 if( sequenceName == names[ nameI ] )
01360                         return nameI;
01361         }
01362         return -1;
01363 }
01364 
01365 bool gnAlignedSequences::sequenceNameInList(string sequenceName, list <pair <string*, string*> >::iterator &sequenceItr)
01366 {
01367         for (sequenceItr = alignedSequences.begin(); sequenceItr != alignedSequences.end(); sequenceItr++)
01368         {
01369                 if (sequenceName == (*(*sequenceItr).first))
01370                         return true;
01371         }
01372         
01373         return false;
01374 }
01375 
01376 
01377 bool gnAlignedSequences::buildConsensus()
01378 {
01379         char consensusBase = '-';
01380 
01381         consensus = "";
01382         
01383         vector <char> crossAlignmentBases;
01384         for (int index=0; index<(*(*alignedSequences.begin()).second).size(); index++)
01385         {
01386                 vector <int> baseCounts(26, 0);
01387                 crossAlignmentBases = (*this)[index];
01388                 /*list <pair <string*, string*> >::iterator itr = alignedSequences.begin();
01389                 itr++;*/
01390                 for (int i=0; i<crossAlignmentBases.size(); i++)
01391                 {
01392                         // to hold knowledge of consensus if MEGA '.' format employed
01393                         // ('.'==same as base in 1st sequence)
01394                         if (i == 0)
01395                                 consensusBase=crossAlignmentBases[i];
01396                         
01397                         // consensus already established if in MEGA '.' format - the 1st seq    
01398                         if (i>0 && crossAlignmentBases[i]=='.')
01399                                 break;
01400                 
01401                         else if (crossAlignmentBases[i] != '-')
01402                         {
01403                                 int baseIndex = determineBaseIndex(crossAlignmentBases[i]);
01404                                 baseCounts[baseIndex]++;
01405                         }
01406                 }
01407                 
01408                 int toAppendToConsensus = 0;
01409                 for (int i=1; i<baseCounts.size(); i++)
01410                 {
01411                         // strictly alphabetic - count ties are broken lexigraphically
01412                         if (baseCounts[i] > baseCounts[toAppendToConsensus])
01413                                 toAppendToConsensus = i;
01414 
01415                         /* nearly functional code for replacing '.'s w/ consensus data
01416                         if (crossAlignmentBases[i]=='.')
01417                         {
01418                                 (*(*itr).second).erase(index, 1);
01419                                 string toInsert;
01420                                 toInsert += crossAlignmentBases[0];
01421                                 (*(*itr).second).insert(index, toInsert);
01422                         }
01423 
01424                         itr++;*/
01425                 }
01426                 
01427                 consensus += (toAppendToConsensus+65);
01428         }
01429 
01430         return false;
01431 }
01432 
01433 
01434 void gnAlignedSequences::addSequence(string& seqToAdd, string& seqName)
01435 {
01436         sequences.push_back( seqToAdd );
01437         names.push_back( seqName );
01438 }
01439 
01440 
01441 void gnAlignedSequences::addSequence(gnSequence& seqToAdd, string& seqName)
01442 {
01443 
01444         ErrorMsg( "Fix gnAlignedSequences::addSequence()" );
01445         sequences.push_back( seqToAdd.ToString() );
01446         names.push_back( seqName );
01447 
01448 /*      list <pair <string*, string*> >::iterator itr;
01449         if (!sequenceNameInList(seqName, itr))
01450         {
01451                 pair <string*, string*> toAdd;
01452                 toAdd.first = new string(seqName);
01453                 toAdd.second = new string( seqToAdd.ToString() );
01454                 
01455                 alignedSequences.push_back(toAdd);
01456         }
01457 
01458         else
01459         {
01460                 (*((*itr).second)) += seqToAdd.ToString();
01461         }
01462 */
01463 }
01464 
01465 
01466 void gnAlignedSequences::addSequence(gnSequence seqToAdd, string seqName, int consensusStart, string originalConsensus)
01467 {
01468         list <pair <string*, string*> >::iterator itr;
01469         if (!sequenceNameInList(seqName, itr))
01470         {
01471                 pair <string*, string*> toAdd;
01472                 toAdd.first = new string(seqName);
01473                 string seq = seqToAdd.ToString();
01474                 toAdd.second = new string( seq );
01475                 (*toAdd.second).erase();
01476                 
01477                 for (int i=0; i<(*toAdd.second).size(); i++)
01478                 {
01479                         if (seq[i] == '-')
01480                                 seq[i] = originalConsensus[consensusStart+i-1];
01481                 }
01482                 
01483                 (*toAdd.second) = seq;
01484                 
01485                 alignedSequences.push_back(toAdd);
01486         }
01487 
01488         else
01489         {
01490                 string seq = (*((*itr).second));
01491                 seq += seqToAdd.ToString();
01492                 for (int i=0; i<seq.size(); i++)
01493                 {
01494                         if (seq[i+(*((*itr).second)).size()]=='-' && originalConsensus.size()>0)
01495                                 seq[i+(*((*itr).second)).size()] = originalConsensus[consensusStart+i-1];
01496                 }
01497                 
01498                 (*((*itr).second)) = seq;
01499         }
01500 }
01501 
01502 
01503 void gnAlignedSequences::addAllSegments(gnAlignedSequences &alignment, unsigned start, unsigned stop)
01504 {
01505         for ( uint seqI = 0; seqI < alignment.sequences.size(); seqI++ ){
01506                 if (stop == 0 || stop == alignment.sequences[ seqI ].size()-1)
01507                         stop = alignment.sequences[ seqI ].size();
01508                 string seq = alignment.sequences[ seqI ].substr(start, stop-start+1);
01509                 alignment.addSequence( seq, alignment.names[ seqI ] );
01510 
01511         }
01512 }
01513 
01514 
01515 void gnAlignedSequences::addAllSegmentsReplaceGaps(gnAlignedSequences &alignment, unsigned start, unsigned stop)
01516 {
01517         list <pair <string*, string*> >::iterator alignedItr = alignedSequences.begin();
01518         for ( ; alignedItr != alignedSequences.end(); alignedItr++)
01519         {
01520                 if (stop == 0 || stop == (*(*alignedItr).second).size()-1)
01521                         stop = (*(*alignedItr).second).size();
01522                         
01523                 alignment.addSequence(((*(*alignedItr).second).substr(start, stop-start+1)), 
01524                                                           ((*(*alignedItr).first)), start, consensus);
01525         }
01526 }
01527 
01528 
01529 void gnAlignedSequences::removeAllSegments(unsigned start, unsigned stop)
01530 {
01531         list <pair <string*, string*> >::iterator alignedItr = alignedSequences.begin();
01532         for ( ; alignedItr != alignedSequences.end(); alignedItr++)
01533         {
01534                 if (stop == 0)
01535                         stop = (*(*alignedItr).second).size();
01536                         
01537                 (alignedItr->second)->erase(start, stop-start+1);
01538         }
01539 
01540         cout << start << " " << stop << ": " << stop-start+1 << endl;
01541 }
01542 
01543 
01544 int gnAlignedSequences::determineBaseIndex(char base)
01545 {
01546         if (base < 91) // Upper Case
01547                 return (base-65);
01548                 
01549         // Lower Case
01550         return (base-97);
01551 }
01552 
01553 
01554 bool gnAlignedSequences::coordinates(string line)
01555 {
01556         bool toReturn = true;
01557         
01558         for (int i=0; i<line.length(); i++)
01559         {
01560                 if (line[i]!=' ' && line[i]!='\r' && line[i]!='\n' && (line[i]<48 || line[i]>57))
01561                 {
01562                         toReturn = false;
01563                         break;
01564                 }
01565         }
01566         
01567         return toReturn;
01568 }
01569 
01570 }

Generated on Fri Mar 14 06:01:03 2008 for libMems by doxygen 1.3.6