00001 package org.gel.mauve.assembly;
00002
00003 import java.io.File;
00004
00005
00006 import org.gel.mauve.Chromosome;
00007 import org.gel.mauve.Genome;
00008 import org.gel.mauve.LCB;
00009
00010
00011 import java.io.BufferedWriter;
00012 import java.io.FileWriter;
00013 import java.io.IOException;
00014 import java.io.PrintStream;
00015 import java.text.NumberFormat;
00016 import java.util.Arrays;
00017 import java.util.Comparator;
00018 import java.util.Iterator;
00019 import java.util.Map;
00020 import java.util.Random;
00021 import java.util.TreeMap;
00022 import java.util.TreeSet;
00023 import java.util.Vector;
00024 import java.util.List;
00025
00026 import org.gel.mauve.XmfaViewerModel;
00027 import org.gel.mauve.analysis.BrokenCDS;
00028 import org.gel.mauve.analysis.CDSErrorExporter;
00029 import org.gel.mauve.analysis.Gap;
00030 import org.gel.mauve.analysis.PermutationExporter;
00031 import org.gel.mauve.analysis.SNP;
00032 import org.gel.mauve.analysis.SnpExporter;
00033 import org.gel.mauve.contigs.ContigOrderer;
00034 import org.gel.mauve.dcjx.Adjacency;
00035 import org.gel.mauve.dcjx.Block;
00036 import org.gel.mauve.dcjx.DCJ;
00037 import org.gel.mauve.gui.AlignmentProcessListener;
00038
00039
00040 public class AssemblyScorer implements AlignmentProcessListener {
00041
00042 private ContigOrderer co;
00043 private File alnmtFile;
00044 private File outputDir;
00045 private boolean batch;
00046 private String basename;
00047 private XmfaViewerModel model;
00048 private int[][] subs;
00049 private SNP[] snps;
00050 private Gap[] refGaps;
00051 private Gap[] assGaps;
00052 private Chromosome[] extraCtgs;
00053 private Chromosome[] missingChroms;
00054 private BrokenCDS[] brokenCDS;
00055 private CDSErrorExporter cdsEE;
00056 private DCJ dcj;
00057 private boolean getBrokenCDS = true;
00058
00059 private int numSharedBnds;
00060
00061 private int numInterLcbBnds;
00062
00064 private Vector<Adjacency> typeI;
00065
00067 private Vector<Adjacency> typeII;
00068 private Vector<Chromosome> invCtgs;
00069 private Map<Chromosome,Integer> misAsm;
00070 private int numMisAssemblies;
00071
00072 private int miscalled;
00073 private int uncalled;
00074
00076 private long maxContigLength = -1;
00077 private long contigN50 = -1;
00078 private long contigN90 = -1;
00079 private long minContigLength = -1;
00080
00081 private static int A = 0;
00082 private static int C = 1;
00083 private static int T = 2;
00084 private static int G = 3;
00085
00086
00087 public AssemblyScorer(XmfaViewerModel model){
00088 this.model = model;
00089 loadInfo();
00090 }
00091
00092 public AssemblyScorer(File alnmtFile, File outDir) {
00093 this.alnmtFile = alnmtFile;
00094 this.outputDir = outDir;
00095 basename = alnmtFile.getName();
00096 batch = false;
00097 }
00098
00099 public AssemblyScorer(File alnmtFile, File outDir, String basename) {
00100 this(alnmtFile,outDir);
00101 this.basename = basename;
00102 }
00103
00104 public AssemblyScorer(ContigOrderer co, File outDir) {
00105 this.co = co;
00106 this.outputDir = outDir;
00107 batch = false;
00108 }
00109
00110 public AssemblyScorer(ContigOrderer co, File outDir, String basename) {
00111 this(co,outDir);
00112 this.basename = basename;
00113 }
00114
00115 public void completeAlignment(int retcode){
00116 if (retcode == 0) {
00117 if (co != null){
00118 alnmtFile = co.getAlignmentFile();
00119 if (basename == null) {
00120 basename = alnmtFile.getName();
00121 }
00122 }
00123 try {
00124 this.model = new XmfaViewerModel(alnmtFile,null);
00125 } catch (IOException e) {
00126 System.err.println("Couldn't load alignment file "
00127 + alnmtFile.getAbsolutePath());
00128 e.printStackTrace();
00129 }
00130 if (model != null) {
00131 loadInfo();
00132 printInfo(this, outputDir, basename, batch);
00133 }
00134
00135 } else {
00136 System.err.println("Alignment failed with error code "+ retcode);
00137 }
00138
00139 }
00140
00141 private void computeAdjacencyErrors(){
00142 Adjacency[] ref = dcj.getAdjacencyGraph().getGenomeA();
00143 Adjacency[] ass = dcj.getAdjacencyGraph().getGenomeB();
00144 Comparator<Adjacency> comp = new Comparator<Adjacency>(){
00145 public int compare(Adjacency o1, Adjacency o2) {
00146 String s1 = new String(min(o1.getFirstBlockEnd(),o1.getSecondBlockEnd()));
00147 s1 = s1 + max(o1.getFirstBlockEnd(),o1.getSecondBlockEnd());
00148 String s2 = new String(min(o2.getFirstBlockEnd(),o2.getSecondBlockEnd()));
00149 s2 = s2 + max(o2.getFirstBlockEnd(),o2.getSecondBlockEnd());
00150 return s1.compareTo(s2);
00151 }
00152 private String min(String s1, String s2){
00153 if (s1.compareTo(s2) > 0)
00154 return s2;
00155 else
00156 return s1;
00157 }
00158 private String max(String s1, String s2){
00159 if (s1.compareTo(s2) > 0)
00160 return s1;
00161 else
00162 return s2;
00163 }
00164 };
00165
00166 typeI = new Vector<Adjacency>();
00167
00168 typeII = new Vector<Adjacency>();
00169
00170 TreeSet<Adjacency> refSet = new TreeSet<Adjacency>(comp);
00171 for (Adjacency a: ref) {
00172 refSet.add(a);
00173 }
00174
00175
00176 Vector<Adjacency> intersection = new Vector<Adjacency>();
00177 for (Adjacency a: ass){
00178 if (!refSet.contains(a))
00179 typeI.add(a);
00180 else
00181 intersection.add(a);
00182 }
00183
00184
00185
00186 TreeSet<Adjacency> assSet = new TreeSet<Adjacency>(comp);
00187 for (Adjacency a: ass) { assSet.add(a);}
00188
00189
00190
00191 for (Adjacency a: ref) {
00192 if (!assSet.contains(a))
00193 typeII.add(a);
00194 }
00195
00196
00197
00198
00199
00200
00201 }
00202
00206 private synchronized void loadInfo(){
00207 model.setReference(model.getGenomeBySourceIndex(0));
00208
00209 System.out.print("Counting shared bounds between contigs/chromosomes and LCBs...");
00210 numSharedBnds = PermutationExporter.getSharedBoundaryCount(model);
00211 System.out.print("done!\n");
00212
00213 System.out.print("Counting interLCB contig/chromosome boundaries...");
00214 numInterLcbBnds = PermutationExporter.countInterBlockBounds(model);
00215 System.out.print("done!\n");
00216
00217 System.out.print("Computing signed permutations...");
00218 String[] perms = PermutationExporter.getPermStrings(model, true);
00219 System.out.print("done!\n");
00220
00221
00222
00223
00224
00225 System.out.print("Performing DCJ rearrangement analysis...");
00226 this.dcj = new DCJ(perms[0], perms[1]);
00227 System.out.print("done!\n");
00228
00229 System.out.print("Computing adjacency errors...");
00230 computeAdjacencyErrors();
00231 System.out.print("done!\n");
00232
00233 System.out.print("Getting SNPs...");
00234 this.snps = SnpExporter.getSNPs(model);
00235 System.out.print("done!\n");
00236
00237 System.out.print("Counting base substitutions...");
00238 this.subs = AssemblyScorer.countSubstitutions(snps);
00239 summarizeBaseCalls();
00240 System.out.print("done!\n");
00241
00242 System.out.print("Counting gaps...");
00243 Gap[][] tmp = SnpExporter.getGaps(model);
00244 System.out.print("done!\n");
00245
00246 System.out.print("Counting extra contigs...");
00247 Chromosome[][] unique = SnpExporter.getUniqueChromosomes(model);
00248 System.out.print("done!\n");
00249
00250 computeContigSizeStats();
00251
00252 refGaps = tmp[0];
00253 assGaps = tmp[1];
00254 Arrays.sort(assGaps);
00255 Arrays.sort(refGaps);
00256 missingChroms = unique[0];
00257 extraCtgs = unique[1];
00258 System.out.flush();
00259 if (getBrokenCDS){
00260 computeBrokenCDS();
00261 }
00262 }
00263
00264 private void computeContigSizeStats(){
00265 List<Chromosome> chromos = model.getGenomeBySourceIndex(1).getChromosomes();
00266 long[] sizer = new long[chromos.size()];
00267 int i=0;
00268 long sum = 0;
00269 for(Chromosome c : chromos){
00270 sizer[i++] = c.getLength();
00271 sum += c.getLength();
00272 }
00273
00274 Arrays.sort(sizer);
00275 minContigLength = sizer[0];
00276 maxContigLength = sizer[sizer.length-1];
00277 long cur = 0;
00278 for(i=sizer.length-1; i>=0 && cur*2 < sum; i--){
00279 cur += sizer[i];
00280 }
00281 contigN50 = sizer[i+1];
00282 for(; i>0 && cur < sum*0.9d;){
00283 cur += sizer[--i];
00284 }
00285 contigN90 = sizer[i];
00286 }
00287
00288
00289 public void summarizeBaseCalls(){
00290 int subsum = 0;
00291 for(int i=0; i<subs.length;i++){
00292 for(int j=0; j<subs.length;j++)
00293 subsum += subs[i][j];
00294 }
00295 this.miscalled = subsum;
00296 this.uncalled = snps.length - subsum;
00297 }
00298
00299 private void computeBrokenCDS(){
00300 Iterator<Genome> it = model.getGenomes().iterator();
00301 boolean haveAnnotations = true;
00302 while(it.hasNext()){
00303 haveAnnotations = haveAnnotations &&
00304 (it.next().getAnnotationSequence() != null);
00305 }
00306 haveAnnotations = model.getGenomeBySourceIndex(0).getAnnotationSequence() != null;
00307 if (haveAnnotations){
00308 System.out.print("Getting broken CDS...");
00309 System.out.flush();
00310 this.cdsEE = new CDSErrorExporter(model, snps, assGaps, refGaps);
00311 try {
00312 brokenCDS = cdsEE.getBrokenCDS();
00313 System.out.print("done!\n");
00314 } catch (Exception e){
00315 System.err.println("\n\nfailed to compute broken CDS. Reason given below");
00316 System.err.println(e.getMessage());
00317 e.printStackTrace();
00318 }
00319 }
00320 }
00321
00322 public int getMiscalled() {
00323 return miscalled;
00324 }
00325
00326 public void setMiscalled(int miscalled) {
00327 this.miscalled = miscalled;
00328 }
00329
00330 public int getUncalled() {
00331 return uncalled;
00332 }
00333
00334 public void setUncalled(int uncalled) {
00335 this.uncalled = uncalled;
00336 }
00337
00338 public XmfaViewerModel getModel(){
00339 return this.model;
00340 }
00341
00342 public DCJ getDCJ(){
00343 return dcj;
00344 }
00345
00346 public Gap[] getReferenceGaps(){
00347 return refGaps;
00348 }
00349
00350 public Gap[] getAssemblyGaps(){
00351 return assGaps;
00352 }
00353
00354 public Chromosome[] getExtraContigs(){
00355 return extraCtgs;
00356 }
00357
00358 public Chromosome[] getMissingChromosomes(){
00359 return missingChroms;
00360 }
00361
00362 public SNP[] getSNPs(){
00363 return snps;
00364 }
00365
00366 public int[][] getSubs(){
00367 return subs;
00368 }
00369
00370 public boolean hasBrokenCDS(){
00371 if (brokenCDS == null)
00372 return false;
00373 else {
00374 return brokenCDS.length > 0;
00375 }
00376 }
00377
00378 public BrokenCDS[] getBrokenCDS(){
00379 return brokenCDS;
00380 }
00381
00382 public int numBrokenCDS(){
00383 return cdsEE.numBrokenCDS();
00384 }
00385
00386 public int numCompleteCDS(){
00387 return cdsEE.numCompleteCDS();
00388 }
00389
00390 public int getSCJdist(){
00391 return dcj.scjDistance();
00392 }
00393
00394 public int getDCJdist(){
00395 return dcj.dcjDistance();
00396 }
00397
00398 public int getBPdist(){
00399 return dcj.bpDistance();
00400 }
00401
00402 public int numBlocks(){
00403 return dcj.numBlocks();
00404 }
00405
00406 public double typeIadjErr(){
00407 return ((double) typeI.size()) /
00408 ((double) dcj.getAdjacencyGraph()
00409 .getGenomeB().length);
00410 }
00411
00412 public double typeIIadjErr(){
00413 return ((double) typeII.size()) /
00414 ((double) dcj.getAdjacencyGraph()
00415 .getGenomeA().length);
00416 }
00417
00418 public Map<Chromosome, Integer> getMisAssemblies(){
00419 return misAsm;
00420 }
00421
00422 public Chromosome[] getInverted(){
00423 return invCtgs.toArray(new Chromosome[invCtgs.size()]);
00424 }
00425
00426 public int numLCBs(){
00427 return (int) model.getLcbCount();
00428 }
00429
00430 public int numContigs(){
00431 return model.getGenomeBySourceIndex(1).getChromosomes().size();
00432 }
00433
00434 public long numReplicons(){
00435 return model.getGenomeBySourceIndex(0).getChromosomes().size();
00436 }
00437
00438
00439 public long numBasesAssembly(){
00440 return model.getGenomeBySourceIndex(1).getLength();
00441 }
00442
00443 public long numBasesReference(){
00444 return model.getGenomeBySourceIndex(0).getLength();
00445 }
00446
00447 public double percentMissedBases(){
00448 double totalBases = model.getGenomeBySourceIndex(0).getLength();
00449 double missedBases = 0;
00450 for(int i = 0; i < assGaps.length; i++){
00451 missedBases += assGaps[i].getLength();
00452 }
00453 return missedBases/totalBases;
00454 }
00455
00456 public long totalMissedBases(){
00457 long missedBases = 0;
00458 for(int i = 0; i < assGaps.length; i++){
00459 missedBases += assGaps[i].getLength();
00460 }
00461 return missedBases;
00462 }
00463
00464 public long getMaxContigLength() {
00465 return maxContigLength;
00466 }
00467
00468 public long getContigN50() {
00469 return contigN50;
00470 }
00471
00472 public long getContigN90() {
00473 return contigN90;
00474 }
00475
00476 public long getMinContigLength() {
00477 return minContigLength;
00478 }
00479
00484 public double percentExtraBases(){
00485 double totalBases = model.getGenomeBySourceIndex(1).getLength();
00486 double extraBases = 0;
00487 for (int i = 0; i < refGaps.length; i++){
00488 extraBases += refGaps[i].getLength();
00489 }
00490 return extraBases/totalBases;
00491 }
00492
00493 public long totalExtraBases(){
00494 long extraBases = 0;
00495 for (int i = 0; i < refGaps.length; i++){
00496 extraBases += refGaps[i].getLength();
00497 }
00498 return extraBases;
00499 }
00500
00501 public int getSharedBoundaryCount(){
00502 return numSharedBnds;
00503 }
00504
00505 public int getInterLcbBoundaryCount(){
00506 return numInterLcbBnds;
00507 }
00508
00509
00510
00511
00512
00513
00514 public static void calculateMissingGC(AssemblyScorer asmScore, File outDir, String basename){
00515 try{
00516 System.out.println("Printing GC contents of gaps < 100nt!");
00517 Gap[] gaps = asmScore.getAssemblyGaps();
00518 java.io.BufferedWriter bw = new BufferedWriter(new java.io.FileWriter(new File( outDir, basename + "_missing_gc.txt" )));
00519 java.io.BufferedWriter bw3 = new BufferedWriter(new java.io.FileWriter(new File( outDir, basename + "_background_gc_distribution.txt")));
00520 Random randy = new Random();
00521 for(int i=0; i<gaps.length; i++){
00522 if(gaps[i].getLength()>100)
00523 continue;
00524 long glen = gaps[i].getLength();
00525 glen = glen < 50 ? 50 : glen;
00526 if(gaps[i].getPosition()+glen/2 >=
00527 (int)asmScore.getModel().getGenomeBySourceIndex(gaps[i].getGenomeSrcIdx()).getLength())
00528 {
00529 glen = ((int)asmScore.getModel().getGenomeBySourceIndex(gaps[i].getGenomeSrcIdx()).getLength() - gaps[i].getPosition() - 3) / 2;
00530 }
00531 if(gaps[i].getPosition()-glen/2 < 1)
00532 {
00533 glen = gaps[i].getPosition() / 2;
00534 }
00535
00536 long[] left = asmScore.getModel().getLCBAndColumn(gaps[i].getGenomeSrcIdx(), gaps[i].getPosition()-glen/2);
00537 long[] right = asmScore.getModel().getLCBAndColumn(gaps[i].getGenomeSrcIdx(), gaps[i].getPosition()+glen/2);
00538 if(left[0]!=right[0]){
00539
00540 continue;
00541 }
00542 long ll = left[1] < right[1] ? left[1] : right[1];
00543 byte[] rawseq = asmScore.getModel().getXmfa().readRawSequence((int)left[0], 0, ll, Math.abs(right[1]-left[1])+1);
00544 double gc = countGC(rawseq);
00545 if(!Double.isNaN(gc))
00546 {
00547 bw.write((new Double(gc)).toString());
00548 bw.write("\n");
00549 }
00550 int upperbound = (int)asmScore.getModel().getGenomeBySourceIndex(gaps[i].getGenomeSrcIdx()).getLength() - (int)glen - 10000;
00551 upperbound = upperbound < 0 ? 0 : upperbound;
00552 int rpos = randy.nextInt(upperbound);
00553
00554
00555 left = asmScore.getModel().getLCBAndColumn(gaps[i].getGenomeSrcIdx(), rpos-glen/2);
00556 right = asmScore.getModel().getLCBAndColumn(gaps[i].getGenomeSrcIdx(), rpos+glen/2);
00557 if(left[0]!=right[0]){
00558
00559 continue;
00560 }
00561 ll = left[1] < right[1] ? left[1] : right[1];
00562 rawseq = asmScore.getModel().getXmfa().readRawSequence((int)left[0], 0, ll, Math.abs(right[1]-left[1])+1);
00563 gc = countGC(rawseq);
00564 if(!Double.isNaN(gc))
00565 {
00566 bw3.write((new Double(gc)).toString());
00567 bw3.write("\n");
00568 }
00569 }
00570 bw.flush();
00571 bw.close();
00572 bw3.flush();
00573 bw3.close();
00574 }catch(IOException ioe){ioe.printStackTrace();};
00575 }
00576
00577 static double countGC(byte[] rawseq){
00578 double gc = 0;
00579 double counts = 0;
00580 for(int j=0; j<rawseq.length; j++){
00581 if(rawseq[j]== 'G' || rawseq[j]== 'C' ||
00582 rawseq[j]== 'g' || rawseq[j]== 'c')
00583 gc++;
00584 else if(rawseq[j]=='-' || rawseq[j]=='\n')
00585 continue;
00586 counts++;
00587 }
00588 return gc /= counts;
00589 }
00590
00591 static String subsToString(AssemblyScorer assScore){
00592
00593 StringBuilder sb = new StringBuilder();
00594 sb.append("\tA\tC\tT\tG\n");
00595 char[] ar = {'A','C','T','G'};
00596 int[][] subs = assScore.getSubs();
00597 for (int i = 0; i < subs.length; i++){
00598 sb.append(ar[i]);
00599 for (int j = 0; j < subs.length; j++){
00600 sb.append("\t"+(i==j?"-":subs[i][j]));
00601 }
00602 sb.append("\n");
00603 }
00604 return sb.toString();
00605 }
00606
00623 public static int[][] countSubstitutions(SNP[] snps){
00624 int[][] subs = new int[4][4];
00625 for (int k = 0; k < snps.length; k++){
00626 char c_0 = snps[k].getChar(0);
00627 char c_1 = snps[k].getChar(1);
00628
00629 try {
00630 if (c_0 != c_1)
00631 subs[getBaseIdx(c_0)][getBaseIdx(c_1)]++;
00632 } catch (IllegalArgumentException e){
00633
00634 }
00635 }
00636 return subs;
00637 }
00638
00639 static int getBaseIdx(char c) throws IllegalArgumentException {
00640 switch(c){
00641 case 'a': return AssemblyScorer.A;
00642 case 'A': return AssemblyScorer.A;
00643 case 'c': return AssemblyScorer.C;
00644 case 'C': return AssemblyScorer.C;
00645 case 't': return AssemblyScorer.T;
00646 case 'T': return AssemblyScorer.T;
00647 case 'g': return AssemblyScorer.G;
00648 case 'G': return AssemblyScorer.G;
00649 default:{ throw new IllegalArgumentException("char " + c);}
00650 }
00651 }
00652
00653 public static final int MINIMUM_REPORTABLE_MISSING_LENGTH = 20;
00654 public static void printMissingRegions(AssemblyScorer asmScore, File outDir, String basename)
00655 {
00656 try{
00657 BufferedWriter bw = new BufferedWriter(new FileWriter(new File( outDir, basename + "_missing_regions.txt" )));
00658 Gap[] gaps = asmScore.getReferenceGaps();
00659 for(int gI=0; gI < gaps.length; gI++){
00660 if(gaps[gI].getLength() < MINIMUM_REPORTABLE_MISSING_LENGTH)
00661 continue;
00662 byte[][] aln = asmScore.getModel().getSequenceRange(gaps[gI].getGenomeSrcIdx(), gaps[gI].getPosition(), gaps[gI].getPosition()+gaps[gI].getLength());
00663 StringBuilder sb = new StringBuilder();
00664 for(int i=0; i<aln[0].length; i++){
00665 if(aln[0][i]!='-' && aln[0][i]!='\n' && aln[0][i]!='\r')
00666 sb.append((char)aln[0][i]);
00667 }
00668 sb.append("\n");
00669 bw.write(">at_contig_" + gaps[gI].getContig() + "_pos_" + gaps[gI].getPositionInContig() + "_assembly_has_" + gaps[gI].getLength() + "_extra_bases\n");
00670 bw.write(sb.toString());
00671 }
00672 bw.flush();
00673 }catch(IOException ioe){ioe.printStackTrace();};
00674 }
00675
00676 public static void printInfo(AssemblyScorer sa, File outDir, String baseName, boolean batch){
00677 PrintStream gapOut = null;
00678 PrintStream miscallOut = null;
00679 PrintStream uncallOut = null;
00680 PrintStream sumOut = null;
00681 PrintStream blockOut = null;
00682 PrintStream chromOut = null;
00683 if(!outDir.exists()){
00684 outDir.mkdir();
00685 }
00686 try {
00687 File gapFile = new File(outDir, baseName+"__gaps.txt");
00688 gapFile.createNewFile();
00689 gapOut = new PrintStream(gapFile);
00690 File miscallFile = new File(outDir, baseName+"__miscalls.txt");
00691 miscallFile.createNewFile();
00692 miscallOut = new PrintStream(miscallFile);
00693 File uncallFile = new File(outDir, baseName+"__uncalls.txt");
00694 uncallFile.createNewFile();
00695 uncallOut = new PrintStream(uncallFile);
00696 File sumFile = new File(outDir, baseName+"__sum.txt");
00697 sumFile.createNewFile();
00698 sumOut = new PrintStream(sumFile);
00699 File blockFile = new File(outDir,baseName+"__blocks.txt");
00700 blockFile.createNewFile();
00701 blockOut = new PrintStream(blockFile);
00702 File chromFile = new File(outDir,"chromosomes.txt");
00703 chromFile.createNewFile();
00704 chromOut = new PrintStream(chromFile);
00705 } catch (IOException e){
00706 e.printStackTrace();
00707 System.exit(-1);
00708 }
00709 printInfo(sa,miscallOut,uncallOut,gapOut);
00710 printBlockInfo(blockOut,sa.model);
00711 if (batch){
00712 sumOut.print(ScoreAssembly.getSumText(sa, false, true));
00713 }else {
00714 sumOut.print(ScoreAssembly.getSumText(sa, true, true));
00715 }
00716 AssemblyScorer.calculateMissingGC(sa, outDir, baseName);
00717 AssemblyScorer.printMissingRegions(sa, outDir, baseName);
00718 chromOut.print(getReferenceChromosomes(sa));
00719
00720 blockOut.close();
00721 gapOut.close();
00722 miscallOut.close();
00723 uncallOut.close();
00724 sumOut.close();
00725 chromOut.close();
00726 }
00727
00728 private static String getReferenceChromosomes(AssemblyScorer sa) {
00729 Genome refGenome = sa.getModel().getGenomeBySourceIndex(0);
00730 StringBuilder sb = new StringBuilder();
00731 for(int cI=0; cI<refGenome.getChromosomes().size(); cI++){
00732 Chromosome c = refGenome.getChromosomes().get(cI);
00733 sb.append(c.getName());
00734 sb.append("\t");
00735 sb.append(c.getStart() + 1);
00736 sb.append("\n");
00737 }
00738 return sb.toString();
00739 }
00740
00741 private static void printBlockInfo(PrintStream out, XmfaViewerModel model){
00742 out.println("BlockId\tBlockLength\tRefLeft\tRefRight\tAsmLeft\tAsmRight");
00743 LCB[] lcbList = model.getSplitLcbList();
00744 int l = -1;
00745 int r = -1;
00746 Genome ref = model.getGenomeBySourceIndex(0);
00747 Genome asm = model.getGenomeBySourceIndex(1);
00748 for (LCB lcb: lcbList){
00749 out.print(lcb.id+"\t"+lcb.getAvgSegmentLength());
00750 if (lcb.getReverse(ref)){
00751 out.print("\t"+-1*lcb.getLeftEnd(ref));
00752 out.print("\t"+-1*lcb.getRightEnd(ref));
00753 } else {
00754 out.print("\t"+lcb.getLeftEnd(ref));
00755 out.print("\t"+lcb.getRightEnd(ref));
00756 }
00757 if (lcb.getReverse(asm)){
00758 out.print("\t"+-1*lcb.getLeftEnd(asm));
00759 out.print("\t"+-1*lcb.getRightEnd(asm));
00760 } else {
00761 out.print("\t"+lcb.getLeftEnd(asm));
00762 out.print("\t"+lcb.getRightEnd(asm));
00763 }
00764 out.println();
00765 }
00766 }
00767
00779 public static void printInfo(AssemblyScorer sa, PrintStream miscallOut, PrintStream uncallOut, PrintStream gapOut){
00780 if (miscallOut!=null){
00781 StringBuilder sb = new StringBuilder();
00782 sb.append("SNP_Pattern\tRef_Contig\tRef_PosInContig\tRef_PosGenomeWide\tAssembly_Contig\tAssembly_PosInContig\tAssembly_PosGenomeWide\n");
00783 StringBuilder sb2 = new StringBuilder();
00784 sb2.append("SNP_Pattern\tRef_Contig\tRef_PosInContig\tRef_PosGenomeWide\tAssembly_Contig\tAssembly_PosInContig\tAssembly_PosGenomeWide\n");
00785 for (int i = 0; i < sa.snps.length; i++)
00786 {
00787 if(sa.snps[i].hasAmbiguities())
00788 sb.append(sa.snps[i].toString()+"\n");
00789 else
00790 sb2.append(sa.snps[i].toString()+"\n");
00791 }
00792 uncallOut.print(sb.toString());
00793 uncallOut.flush();
00794 miscallOut.print(sb2.toString());
00795 miscallOut.flush();
00796 }
00797 if (gapOut!=null){
00798 StringBuilder sb = new StringBuilder();
00799 sb.append("Sequence\tContig\tPosition_in_Contig\tGenomeWide_Position\tLength\tGenome0_GlobalPos\tGenome0_contig\tGenome0_LocalPos\tGenome1_GlobalPos\tGenome1_contig\tGenome1_LocalPos\n");
00800 for (int i = 0; i < sa.refGaps.length; i++)
00801 sb.append(sa.refGaps[i].toString("reference")+"\n");
00802 for (int i = 0; i < sa.assGaps.length; i++)
00803 sb.append(sa.assGaps[i].toString("assembly")+"\n");
00804 gapOut.print(sb.toString());
00805 gapOut.flush();
00806 }
00807
00808 }
00809 }