package ca.bcgsc.abyssexplorer.parsers; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; import ca.bcgsc.abyssexplorer.graph.AbyssGraph; /** * Custom parser for DOT output. * * WARNING: VERSION NOT YET STABLE * * @author Cydney Nielsen * */ public class DotParser implements GraphParser { BufferedReader inputStream; // first line of a graph protected static Pattern headerPattern = Pattern.compile("^digraph\\s*(\\S+)\\s*\\{\\s*$", Pattern.MULTILINE); // last line of a graph protected static Pattern footerPattern = Pattern.compile("^\\s*\\}\\s*$", Pattern.MULTILINE); protected static Pattern modePattern = Pattern.compile("^\\s*(\")?mode(\")?\\s*=\\s*(\")?(\\w+)(\")?;\\s*$", Pattern.MULTILINE); // vertex pattern - e.g. "0+" protected static Pattern vertexPattern = Pattern.compile("\"((\\d+)([+-]))\"", Pattern.MULTILINE); // adj pattern - e.g. "0+" -> { "513714-" "540790+" }; protected static Pattern adjPattern = Pattern.compile("^\\s*\"(\\d+)([+-])\"(\\s*->\\s*\\{(( \"\\d+[+-]\")+) \\})?;\\s*$", Pattern.MULTILINE); // dist pattern - e.g. "0+"->"39+"[label=-25]; protected static Pattern distPattern = Pattern.compile("^\\s*\"(\\d+[+-])\"\\s*->\\s*\"(\\d+[+-])\"\\s*\\[label\\s*=\\s*([+-]?\\d+)\\];\\s*$", Pattern.MULTILINE); protected Matcher hp; protected Matcher fp; protected Matcher mp; protected Matcher vp; protected Matcher ap; protected Matcher dp; protected Object dummyO = new Object(); protected void regexSetup(CharSequence s) { hp = headerPattern.matcher(s); fp = footerPattern.matcher(s); mp = modePattern.matcher(s); vp = vertexPattern.matcher(s); ap = adjPattern.matcher(s); dp = distPattern.matcher(s); } public void open(File f) throws IOException { String fName = f.getAbsolutePath(); try { inputStream = new BufferedReader(new FileReader(fName)); } catch (FileNotFoundException e) { throw (new IOException("No such file: " + fName)); } } public void close() throws IOException { inputStream.close(); } public AbyssGraph initializeGraph() throws IOException { boolean foundAdjGraph = false; // count the number of vertices int vCount = 0; // find the largest contig label id // not equivalent to single-end contig count in ABySS int maxE = 0; while (inputStream.ready()) { Object o = parseNextLine(); if (o == null) {continue;} // no match to known patterns if (o instanceof AdjacentContigs) { foundAdjGraph = true; vCount += 1; // one connection from single source vertex to each target String label = ((AdjacentContigs) o).getLabel(); int e = Integer.parseInt(label.replace("+", "").replace("-", "")); if (e > maxE) { maxE = e; } // System.out.println(vCount + " " + maxE); } } int eCount = maxE + 1; // 0 based labels if (!foundAdjGraph) { throw (new IOException("DOT file must contain an 'adj' graph")); } System.out.println("found " + vCount + " vertices and " + eCount + " edges"); // actual vCount typically < eCount, but > eCount/2, so using vCount=eCount reasonable estimate AbyssGraph g = new AbyssGraph(eCount, eCount); return g; } /** * Returns null at end of file */ public Object parseNextLine() throws IOException { String s = inputStream.readLine(); if (s == null) { return null; } regexSetup(s); // first try to match adjaceny line Object o = matchAdj(s); if (o == null) { // then try to match dist line o = matchDist(s); if (o == null) { o = dummyO; } } return o; } public AdjacentContigs matchAdj(String s) { if (!ap.find()) { return null; } AdjacentContigs aContigs; int mStart = ap.start(); // find source vertex pattern vp.find(mStart); aContigs = new AdjacentContigs(vp.group(1)); mStart = vp.end(); // find all partner vertex patterns while (vp.find(mStart)) { aContigs.addOutbound(vp.group(1)); mStart = vp.end(); } return aContigs; } public PairedEndPartners matchDist(String s) { if (!dp.find()) { return null; } String src = dp.group(1); int sId; byte sStrand; if (src.endsWith("+")) { sId = Integer.parseInt(src.replace("+","")); sStrand = 0; } else { sId = Integer.parseInt(src.replace("-", "")); sStrand = 1; } String des = dp.group(2); int dId; byte dStrand; if (des.endsWith("+")) { dId = Integer.parseInt(des.replace("+", "")); dStrand = 0; } else { dId = Integer.parseInt(des.replace("-", "")); dStrand = 1; } int dist = Integer.parseInt(dp.group(3)); PairedEndPartners d = new PairedEndPartners(sId, sStrand, dId, dStrand, dist); return d; } }