options{ LOOKAHEAD = 2; FORCE_LA_CHECK = true; } PARSER_BEGIN(NewickToTriplet) //! Be very careful with leaf indexing...because we're doing multiple trees!! BE VERY CAREFUL!!! //! This is based on the Newick parser found at http://olduvai.sourceforge.net/tj/tj-javadoc-public/Parser/Newick.html, thanks to the author! import java.util.*; class TreeNode { TreeNode parent; Vector children; String name; int num; public TreeNode() { parent = null; children = new Vector(); name = null; num = -1; } public void addChild( TreeNode c ) { children.add(c); } public TreeNode getParent() { return parent; } public void setParent( TreeNode p ) { this.parent = p; } public void setName( String s ) { name = s; } public String getName() { return name; } public void setNumber(int n) { num = n; } public Vector getChildren() { return children; } public void explore( Vector leaves ) { if( name != null ) leaves.add(this); Enumeration e = children.elements(); while( e.hasMoreElements() ) { ((TreeNode) e.nextElement()).explore( leaves ); } } public boolean isAncestorOf( TreeNode t ) { if( this == t ) return true; TreeNode at = t.parent; while( at != null ) { if( at == this ) return true; at = at.parent; } return false; } } //! ------------------------------------------------------------------- class ExploreTree { private Vector leaves; public ExploreTree( TreeNode root ) { leaves = new Vector(); root.explore(leaves); } public TreeNode getLCA( TreeNode a, TreeNode b ) { if( a == b ) return a; TreeNode current = a; do { if( current.isAncestorOf(b) ) return current; current = current.parent; } while( true ); } public void getTriplets( TripletSet ts ) { int n = leaves.size(); for( int a=1; a <=n; a++ ) for( int b=(a+1); b<=n; b++ ) for( int c=(b+1); c<=n; c++ ) { TreeNode ta = (TreeNode) leaves.elementAt(a-1); TreeNode tb = (TreeNode) leaves.elementAt(b-1); TreeNode tc = (TreeNode) leaves.elementAt(c-1); int anum = NewickToTriplet.getLeafNumber( ta.getName() ); int bnum = NewickToTriplet.getLeafNumber( tb.getName() ); int cnum = NewickToTriplet.getLeafNumber( tc.getName() ); TreeNode v = getLCA( ta, tb ); TreeNode u = getLCA( ta, tc ); if( v != u ) { if( u.isAncestorOf(v) ) { //! System.out.println( ta.getName() + " " + tb.getName() + " " + tc.getName() ); ts.addTriplet( anum, bnum, cnum, 1 ); continue; } } v = getLCA( ta, tc ); u = getLCA( ta, tb ); if( v != u ) { if( u.isAncestorOf(v) ) { //! System.out.println( ta.getName() + " " + tc.getName() + " " + tb.getName() ); ts.addTriplet(anum, cnum, bnum, 1 ); continue; } } v = getLCA( tb, tc ); u = getLCA( tb, ta ); if( v != u ) { if( u.isAncestorOf(v) ) { //! System.out.println( tb.getName() + " " + tc.getName() + " " + ta.getName() ); ts.addTriplet(bnum, cnum, anum, 1 ); continue; } } System.out.println("// Note: no triplet on leaves "+ta.getName()+" "+tb.getName()+" "+tc.getName()+" in this tree"); } } //! ----------- } //! ------------------------------------------ public class NewickToTriplet { public static Hashtable nameToNum; public static Hashtable numToName; public static int seenLeaves = 0; public static int getLeafNumber( String leaf ) { if( nameToNum == null ) nameToNum = new Hashtable(); Integer i = (Integer) nameToNum.get( leaf ); if( i != null ) { return i.intValue(); } seenLeaves++; i = new Integer(seenLeaves); System.out.println("// Leaf '"+leaf+"' gets internal number "+seenLeaves); nameToNum.put( leaf, i ); if( numToName == null ) { numToName = new Hashtable(); } numToName.put( i, leaf ); return seenLeaves; } public static String getLeafName( int num ) { Integer i = new Integer(num); if( numToName == null ) { numToName = new Hashtable(); } String s = (String) numToName.get( i ); return s; } public static TreeNode tree; public static TreeNode current_node; private static TreeNode tn; public static int leaves = 0; public static int trees = 0; public static void parseTrees() { TripletSet ts = new TripletSet(); NewickToTriplet n = new NewickToTriplet(System.in); try{ n.Input(ts); } catch( ParseException e ) { System.out.println("Parsing error!"); e.printStackTrace(); System.exit(0); } ts.dumpTripletsWithHash( numToName, WEIGHTED ); } public static boolean WEIGHTED = true; public static String VERSION = "NewickToTriplet.jj Version 1.0, 21st September 2009"; public static void main(String args[]) { System.out.println("// "+VERSION); System.out.println("// Note: this program reads its input from stdin, not from a file, so if the program"); System.out.println("// seems to have hung then that is probably the reason."); if(args.length == 1 ) { if( args[0].equals("--noweight") ) { WEIGHTED = false; System.out.println("// USER-SETTING: All generated triplets will be unweighted i.e. have weight 1."); } else if( args[0].equals("--help") ) { System.out.println(); System.out.println("// Printing command-line options:"); System.out.println(); System.out.println("--noweight : normally each triplet gets a weight equal to the number of input trees it is in,"); System.out.println(" but specifying this option ensures that each triplet only has weight 1."); System.out.println(); System.exit(0); } } if( WEIGHTED ) { System.out.println("// Each triplet will receive a weight equal to the number of input trees it is in."); } parseTrees(); } } PARSER_END(NewickToTriplet) SKIP : { " " | "\t" | "\n" | "\r" | "\f" | } void Input(TripletSet ts) : { String s; double len; } { ( { tree = new TreeNode(); current_node = tree; leaves = 0; } descendant_list() ( s = label() {} )? ( ":" len = branch_length() )? ";" { trees++; System.out.println("// Processing the triplets from tree "+trees+"..."); ExploreTree et = new ExploreTree(tree); et.getTriplets(ts); System.out.println("// ...done"); } )* } void descendant_list(): { int children = 0;} { "(" { children++; tn = new TreeNode(); tn.setParent(current_node); current_node.addChild(tn); current_node = tn; } subtree() ( "," { children++; tn = new TreeNode(); tn.setParent(current_node); current_node.addChild(tn); current_node = tn; } subtree() )* ")" } /** function subtree will set name, length and weight for each tree node */ void subtree(): { String s; double len; } { descendant_list() {} ( s = label() {} )? ( ":" len = branch_length() {} )? { current_node = current_node.getParent(); } | ( s = label() { leaves++; int x = getLeafNumber(s); current_node.setName(s); current_node.setNumber(x); } )? ( ":" len = branch_length() {} )? { current_node = current_node.parent; } } String label(): { String s; } { s = unquoted_label() { return s; } | s = quoted_label() { return s; } } /** for each unquoted label, we need to replace '_' by ' ' */ String unquoted_label(): { Token t; } { t = { String s = new String(t.toString()); return s; // return s.replace('_', ' '); } | t = { return new String(t.toString()); } } /** for each quoted label, we remove double quotes from the string */ String quoted_label(): { Token t; } { t = { String s = new String(t.toString()); return s.substring(1, s.length()-1); } } double branch_length(): { Token t; } { t = { return Double.parseDouble(t.toString()); } } TOKEN: { <#digit: ["0"-"9"] > | <#alpha: ["a"-"z", "A"-"Z"] > | <#only_quote_char: [ "(", ")", "[", "]", ":", ";", "," ]> | <#single_quote: "''"> | <#both_char: [ "~", "`", "!", "@", "#", "$", "%", "^", "&", "*", "-", "_", "+", "=", "{", "}", "|", ".", "?", "/", "[", "]", "<", ">" ] > | <#whitespace: [ " " , "\t" , "\n" , "\r" ] > | <#unquoted_char: ( | | ) > | <#quoted_char: ( | | )> | <#number: ( )+ ("." ( )* )? | "." ( )+ > | <#exponent: ["e", "E"] ("+"|"-")? ()+ > | ()? > | )+ > | | )+ "'" > }