src/org/gel/mauve/assembly/AssemblyScorer.java

Go to the documentation of this file.
00001 package org.gel.mauve.assembly;
00002 
00003 import java.io.File;
00004 
00005 
00006 import org.gel.mauve.Chromosome;
00007 import org.gel.mauve.Genome;
00008 import org.gel.mauve.LCB;
00009 
00010 
00011 import java.io.BufferedWriter;
00012 import java.io.FileWriter;
00013 import java.io.IOException;
00014 import java.io.PrintStream;
00015 import java.text.NumberFormat;
00016 import java.util.Arrays;
00017 import java.util.Comparator;
00018 import java.util.Iterator;
00019 import java.util.Map;
00020 import java.util.Random;
00021 import java.util.TreeMap;
00022 import java.util.TreeSet;
00023 import java.util.Vector;
00024 import java.util.List;
00025 
00026 import org.gel.mauve.XmfaViewerModel;
00027 import org.gel.mauve.analysis.BrokenCDS;
00028 import org.gel.mauve.analysis.CDSErrorExporter;
00029 import org.gel.mauve.analysis.Gap;
00030 import org.gel.mauve.analysis.PermutationExporter;
00031 import org.gel.mauve.analysis.SNP;
00032 import org.gel.mauve.analysis.SnpExporter;
00033 import org.gel.mauve.contigs.ContigOrderer;
00034 import org.gel.mauve.dcjx.Adjacency;
00035 import org.gel.mauve.dcjx.Block;
00036 import org.gel.mauve.dcjx.DCJ;
00037 import org.gel.mauve.gui.AlignmentProcessListener;
00038 
00039 
00040 public class AssemblyScorer implements AlignmentProcessListener {
00041 
00042         private ContigOrderer co;
00043         private File alnmtFile;
00044         private File outputDir;
00045         private boolean batch;
00046         private String basename;
00047         private XmfaViewerModel model; 
00048         private int[][] subs;
00049         private SNP[] snps;
00050         private Gap[] refGaps;
00051         private Gap[] assGaps; 
00052         private Chromosome[] extraCtgs;
00053         private Chromosome[] missingChroms;
00054         private BrokenCDS[] brokenCDS;
00055         private CDSErrorExporter cdsEE;
00056         private DCJ dcj;
00057         private boolean getBrokenCDS = true;
00058         
00059         private int numSharedBnds;
00060         
00061         private int numInterLcbBnds;
00062         
00064         private Vector<Adjacency> typeI;
00065         
00067         private Vector<Adjacency> typeII;
00068         private Vector<Chromosome> invCtgs;
00069         private Map<Chromosome,Integer> misAsm;
00070         private int numMisAssemblies;
00071         
00072         private int miscalled;
00073         private int uncalled;
00074 
00076         private long maxContigLength = -1;
00077         private long contigN50 = -1;
00078         private long contigN90 = -1;
00079         private long minContigLength = -1;
00080 
00081         private static int A = 0;
00082         private static int C = 1;
00083         private static int T = 2;
00084         private static int G = 3;
00085         
00086 
00087         public AssemblyScorer(XmfaViewerModel model){
00088                 this.model = model;
00089                 loadInfo();
00090         }
00091         
00092         public AssemblyScorer(File alnmtFile, File outDir) {
00093                 this.alnmtFile = alnmtFile;
00094                 this.outputDir = outDir;
00095                 basename = alnmtFile.getName();
00096                 batch = false;
00097         }
00098         
00099         public AssemblyScorer(File alnmtFile, File outDir, String basename) {
00100                 this(alnmtFile,outDir);
00101                 this.basename = basename;
00102         }
00103         
00104         public AssemblyScorer(ContigOrderer co, File outDir) {
00105                 this.co = co;
00106                 this.outputDir = outDir;
00107                 batch = false;
00108         }
00109         
00110         public AssemblyScorer(ContigOrderer co, File outDir, String basename) {
00111                 this(co,outDir);
00112                 this.basename = basename;
00113         }
00114         
00115         public void completeAlignment(int retcode){
00116                 if (retcode == 0) {
00117                         if (co != null){
00118                                 alnmtFile = co.getAlignmentFile();
00119                                 if (basename == null) {
00120                                         basename = alnmtFile.getName();
00121                                 }
00122                         }
00123                         try {
00124                                 this.model = new XmfaViewerModel(alnmtFile,null);
00125                         } catch (IOException e) {
00126                                 System.err.println("Couldn't load alignment file " 
00127                                                                         + alnmtFile.getAbsolutePath());
00128                                 e.printStackTrace();
00129                         }
00130                         if (model != null) {
00131                                 loadInfo();
00132                                 printInfo(this, outputDir, basename, batch);
00133                         }
00134                         
00135                 } else { 
00136                         System.err.println("Alignment failed with error code "+ retcode);
00137                 }
00138                         
00139         }
00140         
00141         private void computeAdjacencyErrors(){
00142                 Adjacency[] ref = dcj.getAdjacencyGraph().getGenomeA();
00143                 Adjacency[] ass = dcj.getAdjacencyGraph().getGenomeB();
00144                 Comparator<Adjacency> comp = new Comparator<Adjacency>(){
00145                         public int compare(Adjacency o1, Adjacency o2) {
00146                                 String s1 = new String(min(o1.getFirstBlockEnd(),o1.getSecondBlockEnd()));
00147                                 s1 = s1 + max(o1.getFirstBlockEnd(),o1.getSecondBlockEnd());
00148                                 String s2 = new String(min(o2.getFirstBlockEnd(),o2.getSecondBlockEnd()));
00149                                 s2 = s2 + max(o2.getFirstBlockEnd(),o2.getSecondBlockEnd());
00150                                 return s1.compareTo(s2);
00151                         }
00152                         private String min(String s1, String s2){
00153                                 if (s1.compareTo(s2) > 0)
00154                                         return s2;
00155                                 else 
00156                                         return s1;
00157                         }
00158                         private String max(String s1, String s2){
00159                                 if (s1.compareTo(s2) > 0)
00160                                         return s1;
00161                                 else 
00162                                         return s2;
00163                         }
00164                 };
00165                 // false positives : adjacencies in assembly that aren't in the reference
00166                 typeI = new Vector<Adjacency>();
00167                 // false negatives : adjacencies in reference that aren't in the assembly
00168                 typeII = new Vector<Adjacency>();
00169                 
00170                 TreeSet<Adjacency> refSet = new TreeSet<Adjacency>(comp);
00171                 for (Adjacency a: ref) {
00172                         refSet.add(a);
00173                 }
00174                 
00175         //      System.err.println("\nnum assembly adjacencies: " + ass.length);
00176                 Vector<Adjacency> intersection = new Vector<Adjacency>(); 
00177                 for (Adjacency a: ass){
00178                         if (!refSet.contains(a)) 
00179                                 typeI.add(a);
00180                         else
00181                                 intersection.add(a);
00182                 }
00183 
00184         //      System.err.println("num typeI errors: " + typeI.size());
00185                 
00186                 TreeSet<Adjacency> assSet = new TreeSet<Adjacency>(comp);
00187                 for (Adjacency a: ass) { assSet.add(a);}
00188                 
00189         //      System.err.println("num ref adjacencies: " + ref.length);
00190                 
00191                 for (Adjacency a: ref) {
00192                         if (!assSet.contains(a))
00193                                 typeII.add(a);
00194                 }
00195         //      System.err.println("num typeII errors: " + typeII.size());
00196                 
00197         /*      Iterator<Adjacency> it = intersection.iterator();
00198                 while(it.hasNext()){
00199                         System.err.println(it.next().toString());
00200                 }*/
00201         }
00202         
00206         private synchronized void loadInfo(){
00207                 model.setReference(model.getGenomeBySourceIndex(0));
00208                 
00209                 System.out.print("Counting shared bounds between contigs/chromosomes and LCBs...");
00210                 numSharedBnds = PermutationExporter.getSharedBoundaryCount(model);
00211                 System.out.print("done!\n");
00212                 
00213                 System.out.print("Counting interLCB contig/chromosome boundaries...");
00214                 numInterLcbBnds = PermutationExporter.countInterBlockBounds(model);
00215                 System.out.print("done!\n");
00216                 
00217                 System.out.print("Computing signed permutations...");
00218                 String[] perms = PermutationExporter.getPermStrings(model, true); 
00219                 System.out.print("done!\n");
00220                 
00221                 //System.out.println("Permutations: ");
00222                 //System.out.println(perms[0]);
00223                 //System.out.println(perms[1]);
00224                 
00225                 System.out.print("Performing DCJ rearrangement analysis...");
00226                 this.dcj = new DCJ(perms[0], perms[1]);
00227                 System.out.print("done!\n");
00228                 
00229                 System.out.print("Computing adjacency errors...");
00230                 computeAdjacencyErrors();
00231                 System.out.print("done!\n");
00232                 
00233                 System.out.print("Getting SNPs...");
00234                 this.snps = SnpExporter.getSNPs(model);
00235                 System.out.print("done!\n");
00236                 
00237                 System.out.print("Counting base substitutions...");
00238                 this.subs = AssemblyScorer.countSubstitutions(snps);
00239                 summarizeBaseCalls();
00240                 System.out.print("done!\n");
00241                 
00242                 System.out.print("Counting gaps...");
00243                 Gap[][] tmp = SnpExporter.getGaps(model);
00244                 System.out.print("done!\n");
00245                 
00246                 System.out.print("Counting extra contigs...");
00247                 Chromosome[][] unique = SnpExporter.getUniqueChromosomes(model);
00248                 System.out.print("done!\n");
00249                 
00250                 computeContigSizeStats();
00251                 
00252                 refGaps = tmp[0];
00253                 assGaps = tmp[1];
00254                 Arrays.sort(assGaps);
00255                 Arrays.sort(refGaps);
00256                 missingChroms = unique[0];
00257                 extraCtgs = unique[1];
00258                 System.out.flush();
00259                 if (getBrokenCDS){
00260                         computeBrokenCDS();
00261                 }
00262         }
00263         
00264         private void computeContigSizeStats(){
00265                 List<Chromosome> chromos = model.getGenomeBySourceIndex(1).getChromosomes();
00266                 long[] sizer = new long[chromos.size()];
00267                 int i=0;
00268                 long sum = 0;
00269                 for(Chromosome c : chromos){
00270                         sizer[i++] = c.getLength();
00271                         sum += c.getLength();
00272                 }
00273                 
00274                 Arrays.sort(sizer);
00275                 minContigLength = sizer[0];
00276                 maxContigLength = sizer[sizer.length-1];
00277                 long cur = 0;
00278                 for(i=sizer.length-1; i>=0 && cur*2 < sum; i--){
00279                         cur += sizer[i];
00280                 }
00281                 contigN50 = sizer[i+1];
00282                 for(; i>0 && cur < sum*0.9d;){
00283                         cur += sizer[--i];
00284                 }
00285                 contigN90 = sizer[i];
00286         }
00287         
00288         
00289         public void summarizeBaseCalls(){
00290                 int subsum = 0;
00291                 for(int i=0; i<subs.length;i++){
00292                         for(int j=0; j<subs.length;j++)
00293                                 subsum += subs[i][j];
00294                 }
00295                 this.miscalled = subsum;
00296                 this.uncalled = snps.length - subsum;
00297         }
00298 
00299         private void computeBrokenCDS(){
00300                 Iterator<Genome> it = model.getGenomes().iterator();
00301                 boolean haveAnnotations = true;
00302                 while(it.hasNext()){
00303                         haveAnnotations = haveAnnotations &&
00304                                 (it.next().getAnnotationSequence() != null);
00305                 }
00306                 haveAnnotations = model.getGenomeBySourceIndex(0).getAnnotationSequence() != null;
00307                 if (haveAnnotations){
00308                         System.out.print("Getting broken CDS...");
00309                         System.out.flush();
00310                         this.cdsEE = new CDSErrorExporter(model, snps, assGaps, refGaps);
00311                         try {
00312                                 brokenCDS = cdsEE.getBrokenCDS();
00313                                 System.out.print("done!\n");
00314                         } catch (Exception e){
00315                                 System.err.println("\n\nfailed to compute broken CDS. Reason given below");
00316                                 System.err.println(e.getMessage());
00317                                 e.printStackTrace();
00318                         } 
00319                 }
00320         }
00321         
00322         public int getMiscalled() {
00323                 return miscalled;
00324         }
00325 
00326         public void setMiscalled(int miscalled) {
00327                 this.miscalled = miscalled;
00328         }
00329 
00330         public int getUncalled() {
00331                 return uncalled;
00332         }
00333 
00334         public void setUncalled(int uncalled) {
00335                 this.uncalled = uncalled;
00336         }
00337 
00338         public XmfaViewerModel getModel(){
00339                 return this.model;
00340         }
00341 
00342         public DCJ getDCJ(){
00343                 return dcj;
00344         }
00345         
00346         public Gap[] getReferenceGaps(){
00347                 return refGaps;
00348         }
00349         
00350         public Gap[] getAssemblyGaps(){
00351                 return assGaps;
00352         }
00353         
00354         public Chromosome[] getExtraContigs(){
00355                 return extraCtgs;
00356         }
00357         
00358         public Chromosome[] getMissingChromosomes(){
00359                 return missingChroms;
00360         }
00361         
00362         public SNP[] getSNPs(){
00363                 return snps;
00364         }
00365         
00366         public int[][] getSubs(){
00367                 return subs;
00368         }
00369         
00370         public boolean hasBrokenCDS(){
00371                 if (brokenCDS == null) 
00372                         return false;
00373                 else {
00374                         return brokenCDS.length > 0;
00375                 }
00376         }
00377         
00378         public BrokenCDS[] getBrokenCDS(){
00379                 return brokenCDS;
00380         }
00381         
00382         public int numBrokenCDS(){
00383                 return cdsEE.numBrokenCDS();
00384         }
00385         
00386         public int numCompleteCDS(){
00387                 return cdsEE.numCompleteCDS();
00388         }
00389         
00390         public int getSCJdist(){
00391                 return dcj.scjDistance();
00392         }
00393         
00394         public int getDCJdist(){
00395                 return dcj.dcjDistance();
00396         }
00397         
00398         public int getBPdist(){
00399                 return dcj.bpDistance();
00400         }
00401         
00402         public int numBlocks(){
00403                 return dcj.numBlocks();
00404         }
00405 
00406         public double typeIadjErr(){
00407                 return ((double) typeI.size()) / 
00408                         ((double) dcj.getAdjacencyGraph()
00409                                                  .getGenomeB().length);
00410         }
00411         
00412         public double typeIIadjErr(){
00413                 return ((double) typeII.size()) /
00414                         ((double) dcj.getAdjacencyGraph()
00415                                                  .getGenomeA().length);
00416         }
00417         
00418         public Map<Chromosome, Integer> getMisAssemblies(){
00419                 return misAsm;
00420         }
00421         
00422         public Chromosome[] getInverted(){
00423                 return invCtgs.toArray(new Chromosome[invCtgs.size()]);
00424         }
00425         
00426         public int numLCBs(){
00427                 return (int) model.getLcbCount();
00428         }
00429         
00430         public int numContigs(){
00431                 return model.getGenomeBySourceIndex(1).getChromosomes().size();
00432         }
00433 
00434         public long numReplicons(){
00435                 return model.getGenomeBySourceIndex(0).getChromosomes().size();
00436         }
00437         
00438         
00439         public long numBasesAssembly(){
00440                 return model.getGenomeBySourceIndex(1).getLength();
00441         }
00442         
00443         public long numBasesReference(){
00444                 return model.getGenomeBySourceIndex(0).getLength();
00445         }
00446         
00447         public double percentMissedBases(){
00448                 double totalBases = model.getGenomeBySourceIndex(0).getLength();
00449                 double missedBases = 0;
00450                 for(int i = 0; i < assGaps.length; i++){
00451                         missedBases += assGaps[i].getLength();
00452                 }
00453                 return missedBases/totalBases;
00454         }
00455         
00456         public long totalMissedBases(){
00457                 long missedBases = 0;
00458                 for(int i = 0; i < assGaps.length; i++){
00459                         missedBases += assGaps[i].getLength();
00460                 }
00461                 return missedBases;
00462         }
00463         
00464         public long getMaxContigLength() {
00465                 return maxContigLength;
00466         }
00467 
00468         public long getContigN50() {
00469                 return contigN50;
00470         }
00471 
00472         public long getContigN90() {
00473                 return contigN90;
00474         }
00475 
00476         public long getMinContigLength() {
00477                 return minContigLength;
00478         }
00479 
00484         public double percentExtraBases(){
00485                 double totalBases = model.getGenomeBySourceIndex(1).getLength();
00486                 double extraBases = 0;
00487                 for (int i = 0; i < refGaps.length; i++){
00488                         extraBases += refGaps[i].getLength();
00489                 }
00490                 return extraBases/totalBases;
00491         }
00492         
00493         public long totalExtraBases(){
00494                 long extraBases = 0;
00495                 for (int i = 0; i < refGaps.length; i++){
00496                         extraBases += refGaps[i].getLength();
00497                 }
00498                 return extraBases;
00499         }
00500         
00501         public int getSharedBoundaryCount(){
00502                 return numSharedBnds;
00503         }
00504         
00505         public int getInterLcbBoundaryCount(){
00506                 return numInterLcbBnds;
00507         }
00508         
00509         /* 
00510          * calculate GC content of missing bases
00511          * in stretches up to 100nt.
00512          * Useful for determining if we're suffering GC bias
00513          */
00514         public static void calculateMissingGC(AssemblyScorer asmScore, File outDir, String basename){
00515                 try{
00516                         System.out.println("Printing GC contents of gaps < 100nt!");
00517                         Gap[] gaps = asmScore.getAssemblyGaps();
00518                         java.io.BufferedWriter bw = new BufferedWriter(new java.io.FileWriter(new File( outDir, basename + "_missing_gc.txt" )));
00519                         java.io.BufferedWriter bw3 = new BufferedWriter(new java.io.FileWriter(new File( outDir, basename + "_background_gc_distribution.txt")));
00520                         Random randy = new Random();
00521                         for(int i=0; i<gaps.length; i++){
00522                                 if(gaps[i].getLength()>100)
00523                                         continue;
00524                                 long glen = gaps[i].getLength();
00525                                 glen = glen < 50 ? 50 : glen;
00526                                 if(gaps[i].getPosition()+glen/2 >=
00527                                         (int)asmScore.getModel().getGenomeBySourceIndex(gaps[i].getGenomeSrcIdx()).getLength())
00528                                 {
00529                                         glen = ((int)asmScore.getModel().getGenomeBySourceIndex(gaps[i].getGenomeSrcIdx()).getLength() - gaps[i].getPosition() - 3) / 2;
00530                                 }
00531                                 if(gaps[i].getPosition()-glen/2 < 1)
00532                                 {
00533                                         glen = gaps[i].getPosition() / 2;
00534                                 }
00535 
00536                                 long[] left = asmScore.getModel().getLCBAndColumn(gaps[i].getGenomeSrcIdx(), gaps[i].getPosition()-glen/2);
00537                                 long[] right = asmScore.getModel().getLCBAndColumn(gaps[i].getGenomeSrcIdx(), gaps[i].getPosition()+glen/2);
00538                                 if(left[0]!=right[0]){
00539                                         // gap spans LCB.  too hard for this hack.
00540                                         continue;
00541                                 }
00542                                 long ll = left[1] < right[1] ? left[1] : right[1];
00543                                 byte[] rawseq = asmScore.getModel().getXmfa().readRawSequence((int)left[0], 0, ll, Math.abs(right[1]-left[1])+1);
00544                                 double gc = countGC(rawseq);
00545                                 if(!Double.isNaN(gc))
00546                                 {
00547                                         bw.write((new Double(gc)).toString());
00548                                         bw.write("\n");
00549                                 }
00550                                 int upperbound = (int)asmScore.getModel().getGenomeBySourceIndex(gaps[i].getGenomeSrcIdx()).getLength() - (int)glen - 10000;
00551                                 upperbound = upperbound < 0 ? 0 : upperbound;
00552                                 int rpos = randy.nextInt(upperbound);
00553         
00554                                 // evil code copy!!
00555                                 left = asmScore.getModel().getLCBAndColumn(gaps[i].getGenomeSrcIdx(), rpos-glen/2);
00556                                 right = asmScore.getModel().getLCBAndColumn(gaps[i].getGenomeSrcIdx(), rpos+glen/2);
00557                                 if(left[0]!=right[0]){
00558                                         // gap spans LCB.  too hard for this hack.
00559                                         continue;
00560                                 }
00561                                 ll = left[1] < right[1] ? left[1] : right[1];
00562                                 rawseq = asmScore.getModel().getXmfa().readRawSequence((int)left[0], 0, ll, Math.abs(right[1]-left[1])+1);
00563                                 gc = countGC(rawseq);
00564                                 if(!Double.isNaN(gc))
00565                                 {
00566                                         bw3.write((new Double(gc)).toString());
00567                                         bw3.write("\n");
00568                                 }
00569                         }
00570                         bw.flush();
00571                         bw.close();
00572                         bw3.flush();
00573                         bw3.close();
00574                 }catch(IOException ioe){ioe.printStackTrace();};
00575         }
00576 
00577         static double countGC(byte[] rawseq){
00578                 double gc = 0;
00579                 double counts = 0;
00580                 for(int j=0; j<rawseq.length; j++){
00581                         if(rawseq[j]== 'G' || rawseq[j]== 'C' || 
00582                                         rawseq[j]== 'g' || rawseq[j]== 'c')
00583                                 gc++;
00584                         else if(rawseq[j]=='-' || rawseq[j]=='\n')
00585                                 continue;
00586                         counts++;
00587                 }
00588                 return gc /= counts;
00589         }
00590 
00591         static String subsToString(AssemblyScorer assScore){
00592                 // A C T G
00593                 StringBuilder sb = new StringBuilder();
00594                 sb.append("\tA\tC\tT\tG\n");
00595                 char[] ar = {'A','C','T','G'};
00596                 int[][] subs = assScore.getSubs();
00597                 for (int i = 0; i < subs.length; i++){
00598                         sb.append(ar[i]);
00599                         for (int j = 0; j < subs.length; j++){
00600                                 sb.append("\t"+(i==j?"-":subs[i][j]));
00601                         }
00602                         sb.append("\n");
00603                 }
00604                 return sb.toString();
00605         }
00606 
00623         public static int[][] countSubstitutions(SNP[] snps){
00624                 int[][] subs = new int[4][4];
00625                 for (int k = 0; k < snps.length; k++){ 
00626                         char c_0 = snps[k].getChar(0);
00627                         char c_1 = snps[k].getChar(1);
00628                         
00629                         try {
00630                                 if (c_0 != c_1)
00631                                         subs[getBaseIdx(c_0)][getBaseIdx(c_1)]++;
00632                         } catch (IllegalArgumentException e){
00633                                 //System.err.println("Skipping ambiguity: ref = " +c_0 +" assembly = " + c_1 );
00634                         }
00635                 }
00636                 return subs;
00637         }
00638 
00639         static int getBaseIdx(char c) throws IllegalArgumentException {
00640                 switch(c){
00641                   case 'a': return AssemblyScorer.A; 
00642                   case 'A': return AssemblyScorer.A;
00643                   case 'c': return AssemblyScorer.C;
00644                   case 'C': return AssemblyScorer.C;
00645                   case 't': return AssemblyScorer.T;
00646                   case 'T': return AssemblyScorer.T;
00647                   case 'g': return AssemblyScorer.G;
00648                   case 'G': return AssemblyScorer.G;
00649                   default:{ throw new IllegalArgumentException("char " + c);}
00650                 }
00651         }
00652 
00653         public static final int MINIMUM_REPORTABLE_MISSING_LENGTH = 20;
00654         public static void printMissingRegions(AssemblyScorer asmScore, File outDir, String basename)
00655         {
00656                 try{
00657                         BufferedWriter bw = new BufferedWriter(new FileWriter(new File( outDir, basename + "_missing_regions.txt" )));
00658                         Gap[] gaps = asmScore.getReferenceGaps();
00659                         for(int gI=0; gI < gaps.length; gI++){
00660                                 if(gaps[gI].getLength() < MINIMUM_REPORTABLE_MISSING_LENGTH)
00661                                         continue;
00662                                 byte[][] aln = asmScore.getModel().getSequenceRange(gaps[gI].getGenomeSrcIdx(), gaps[gI].getPosition(), gaps[gI].getPosition()+gaps[gI].getLength());
00663                                 StringBuilder sb = new StringBuilder();
00664                                 for(int i=0; i<aln[0].length; i++){
00665                                         if(aln[0][i]!='-' && aln[0][i]!='\n' && aln[0][i]!='\r')
00666                                                 sb.append((char)aln[0][i]);
00667                                 }
00668                                 sb.append("\n");
00669                                 bw.write(">at_contig_" + gaps[gI].getContig() + "_pos_" + gaps[gI].getPositionInContig() + "_assembly_has_" + gaps[gI].getLength() + "_extra_bases\n");
00670                                 bw.write(sb.toString());
00671                         }
00672                         bw.flush();
00673                 }catch(IOException ioe){ioe.printStackTrace();};
00674         }
00675 
00676         public static void printInfo(AssemblyScorer sa, File outDir, String baseName, boolean batch){
00677                 PrintStream gapOut = null;
00678                 PrintStream miscallOut = null;
00679                 PrintStream uncallOut = null;
00680                 PrintStream sumOut = null;
00681                 PrintStream blockOut = null;
00682                 PrintStream chromOut = null;
00683                 if(!outDir.exists()){
00684                         outDir.mkdir();
00685                 }
00686                 try {
00687                         File gapFile = new File(outDir, baseName+"__gaps.txt");
00688                         gapFile.createNewFile();
00689                         gapOut = new PrintStream(gapFile);
00690                         File miscallFile = new File(outDir, baseName+"__miscalls.txt");
00691                         miscallFile.createNewFile();
00692                         miscallOut = new PrintStream(miscallFile);
00693                         File uncallFile = new File(outDir, baseName+"__uncalls.txt");
00694                         uncallFile.createNewFile();
00695                         uncallOut = new PrintStream(uncallFile);
00696                         File sumFile = new File(outDir, baseName+"__sum.txt");
00697                         sumFile.createNewFile();
00698                         sumOut = new PrintStream(sumFile);
00699                         File blockFile = new File(outDir,baseName+"__blocks.txt");
00700                         blockFile.createNewFile();
00701                         blockOut = new PrintStream(blockFile);
00702                         File chromFile = new File(outDir,"chromosomes.txt");
00703                         chromFile.createNewFile();
00704                         chromOut = new PrintStream(chromFile);
00705                 } catch (IOException e){
00706                         e.printStackTrace();
00707                         System.exit(-1);    
00708                 }
00709                 printInfo(sa,miscallOut,uncallOut,gapOut);
00710                 printBlockInfo(blockOut,sa.model);
00711                 if (batch){
00712                     sumOut.print(ScoreAssembly.getSumText(sa, false, true));    
00713                 }else {
00714                     sumOut.print(ScoreAssembly.getSumText(sa, true, true));
00715                 }
00716                 AssemblyScorer.calculateMissingGC(sa, outDir, baseName);
00717                 AssemblyScorer.printMissingRegions(sa, outDir, baseName);
00718                 chromOut.print(getReferenceChromosomes(sa));
00719                 
00720                 blockOut.close();
00721                 gapOut.close();
00722                 miscallOut.close();
00723                 uncallOut.close();
00724                 sumOut.close();
00725                 chromOut.close();
00726         }
00727 
00728         private static String getReferenceChromosomes(AssemblyScorer sa) {
00729                 Genome refGenome = sa.getModel().getGenomeBySourceIndex(0);
00730                 StringBuilder sb = new StringBuilder();
00731                 for(int cI=0; cI<refGenome.getChromosomes().size(); cI++){
00732                         Chromosome c = refGenome.getChromosomes().get(cI);
00733                         sb.append(c.getName());
00734                         sb.append("\t");
00735                         sb.append(c.getStart() + 1);
00736                         sb.append("\n");
00737                 }
00738                 return sb.toString();
00739         }
00740         
00741         private static void printBlockInfo(PrintStream out, XmfaViewerModel model){
00742                 out.println("BlockId\tBlockLength\tRefLeft\tRefRight\tAsmLeft\tAsmRight");
00743                 LCB[] lcbList = model.getSplitLcbList();
00744                 int l = -1;
00745                 int r = -1;
00746                 Genome ref = model.getGenomeBySourceIndex(0);
00747                 Genome asm = model.getGenomeBySourceIndex(1);
00748                 for (LCB lcb: lcbList){
00749                         out.print(lcb.id+"\t"+lcb.getAvgSegmentLength());
00750                         if (lcb.getReverse(ref)){
00751                                 out.print("\t"+-1*lcb.getLeftEnd(ref));
00752                                 out.print("\t"+-1*lcb.getRightEnd(ref));
00753                         } else {
00754                                 out.print("\t"+lcb.getLeftEnd(ref));
00755                                 out.print("\t"+lcb.getRightEnd(ref));
00756                         }
00757                         if (lcb.getReverse(asm)){
00758                                 out.print("\t"+-1*lcb.getLeftEnd(asm));
00759                                 out.print("\t"+-1*lcb.getRightEnd(asm));
00760                         } else {
00761                                 out.print("\t"+lcb.getLeftEnd(asm));
00762                                 out.print("\t"+lcb.getRightEnd(asm));
00763                         }
00764                         out.println();
00765                 }
00766         }
00767         
00779         public static void printInfo(AssemblyScorer sa,  PrintStream miscallOut, PrintStream uncallOut, PrintStream gapOut){
00780                 if (miscallOut!=null){
00781                         StringBuilder sb = new StringBuilder();
00782                         sb.append("SNP_Pattern\tRef_Contig\tRef_PosInContig\tRef_PosGenomeWide\tAssembly_Contig\tAssembly_PosInContig\tAssembly_PosGenomeWide\n");
00783                         StringBuilder sb2 = new StringBuilder();
00784                         sb2.append("SNP_Pattern\tRef_Contig\tRef_PosInContig\tRef_PosGenomeWide\tAssembly_Contig\tAssembly_PosInContig\tAssembly_PosGenomeWide\n");
00785                         for (int i = 0; i < sa.snps.length; i++)
00786                         {
00787                                 if(sa.snps[i].hasAmbiguities())
00788                                         sb.append(sa.snps[i].toString()+"\n");
00789                                 else
00790                                         sb2.append(sa.snps[i].toString()+"\n");
00791                         }
00792                         uncallOut.print(sb.toString());
00793                         uncallOut.flush();
00794                         miscallOut.print(sb2.toString());
00795                         miscallOut.flush();
00796                 }
00797                 if (gapOut!=null){
00798                         StringBuilder sb = new StringBuilder();
00799                         sb.append("Sequence\tContig\tPosition_in_Contig\tGenomeWide_Position\tLength\tGenome0_GlobalPos\tGenome0_contig\tGenome0_LocalPos\tGenome1_GlobalPos\tGenome1_contig\tGenome1_LocalPos\n");
00800                         for (int i = 0; i < sa.refGaps.length; i++)
00801                                 sb.append(sa.refGaps[i].toString("reference")+"\n");
00802                         for (int i = 0; i < sa.assGaps.length; i++)
00803                                 sb.append(sa.assGaps[i].toString("assembly")+"\n");                     
00804                         gapOut.print(sb.toString());
00805                         gapOut.flush();
00806                 }
00807                 
00808         }
00809 }

Generated on Mon Aug 19 06:03:40 2013 for Mauve by doxygen 1.3.6