1 package jasper;
2 
3 import java.io.BufferedReader;
4 import java.io.FileNotFoundException;
5 import java.io.FileReader;
6 import java.io.IOException;
7 import java.util.ArrayList;
8 
9 public class SparseSimilarityMatrix {
10 
11 	/*--------------------------------------------------------------*/
12 	/*----------------        Initialization        ----------------*/
13 	/*--------------------------------------------------------------*/
14 
15 	/**
16 	 * Takes in a file of sketch similarity percentages from SketchCompare.
17 	 * Returns a sparse matrix object containing each percentage
18 	 *
19 	 * @param inputFile The file containing pairwise comparisons of each sketch
20 	 * @throws FileNotFoundException
21 	 * @throws IOException
22 	 */
SparseSimilarityMatrix(String inputFile, SparseTree tree_)23 	public SparseSimilarityMatrix(String inputFile, SparseTree tree_) throws FileNotFoundException, IOException {
24 
25 		//Assigns the input tree object to the tree variable.
26 		tree = tree_;
27 
28 		//Take file name as input for building tree of related nodes
29 		in = inputFile;
30 
31 		//Read in file, add header line and add to header variable
32 		try (BufferedReader br = new BufferedReader(new FileReader(in))) {
33 			String line;
34 
35 			//while line isn't empty, process
36 			while ((line = br.readLine()) != null) {
37 
38 				//if line is the header line, split and assign to variable.
39 				//may be used when header becomes more complex
40 				if(line.startsWith("#")) {header=line.split("\t");
41 				} else {
42 
43 					//If not a header line, split on tab.
44 					String[] data = line.split("\t");
45 
46 					//Query organism is column 0.
47 					String queryName = data[0];
48 					//String refName = data[1];
49 
50 				}
51 			}
52 		}
53 
54 		//Get the total number of organisms in the tree.
55 		orgCount = tree.getOrgCount();
56 
57 		//Initialize the matrix with the appropriate size of all nodes.
58 		sparseMatrix = new ArrayList[orgCount + 1];
59 
60 		//Iterate over the matrix and add an ArrayList<Comparison> to each ArrayList.
61 		for(int i=0; i<sparseMatrix.length; i++) {
62 
63 			sparseMatrix[i] = new ArrayList<Comparison>();
64 
65 		}
66 
67 		//Begin reading the file a second time.
68 		try (BufferedReader br = new BufferedReader(new FileReader(in))) {
69 			String line;
70 
71 			//while line isn't empty, process
72 			while ((line = br.readLine()) != null) {
73 
74 				//If line is the header line, split and assign to variable.
75 				//may be used when header becomes more complex
76 				if(line.startsWith("#")) {assert true;
77 				} else {
78 
79 					//If not a header line, split on tab.
80 					String[] data = line.split("\t");
81 
82 					//Column 0 is query name.
83 					String queryName = data[0];
84 
85 					//Column 1 is reference name.
86 					String refName = data[1];
87 
88 					//Column 2 is the similarity percentage.
89 					double similarity = Double.parseDouble(data[2]);
90 
91 					//Check that both names are in the HashMap (too slow?)
92 					if(tree.containsName(queryName)==true && tree.containsName(refName)) {
93 
94 						//Get the positions assigned to both organisms.
95 						int queryPos = nameToNodeId(queryName);
96 						int refPos = nameToNodeId(refName);
97 
98 						Comparison currentComparison = new Comparison(queryPos, refPos, similarity);
99 
100 						//Add the similarity percentage to the appropriate matrix position.
101 						sparseMatrix[queryPos].add(currentComparison);
102 					}
103 				}
104 			}
105 		}
106 	}
107 
108 	/**
109 	 * Method for taking the node name and returning the node ID value
110 	 * @param orgName the organism node name (String).
111 	 * @return int The node ID of the organism name taken as input.
112 	 */
nameToNodeId(String orgName)113 	public int nameToNodeId(String orgName) {
114 
115 		//Get the node associated with the input name.
116 		TreeNode org = tree.getNode(orgName);
117 
118 		//Asserts the org nod is in the tree.
119 		assert(org != null) : orgName;
120 
121 		//Return the int node ID.
122 		return org.nodeId;
123 	}
124 
125 
126 	/**
127 	 * Prints out the entire matrix.
128 	 * Impractical in cases of large input datasets.
129 	 *
130 	 */
toString()131 	public String toString() {
132 		StringBuilder sb=new StringBuilder();
133 		for (int i = 0; i < sparseMatrix.length; i++) {
134 		    for (int j = 0; j < sparseMatrix[i].size(); j++) {
135 		        sb.append(sparseMatrix[i].get(j) + " ");
136 		    }
137 		    sb.append('\n');
138 		}
139 		return sb.toString();
140 	}
141 
142 
143 //TODO: This method is slow and doesnt work, need something better.
144 //	/**
145 //	 * Returns the similarity of two specified organisms.
146 //	 * Both organisms must have been compared using SketchCompare.
147 //	 *
148 //	 * @param org1 The Name of an organism.
149 //	 * @param org2 The name of a second organism.
150 //	 * @return similarity The Double percentage similarity between the two sketches.
151 //	 */
152 //	public Comparison getComparison(String org1, String org2) {
153 //		int orgName1 = nameToNodeId(org1);
154 //		int orgName2 = nameToNodeId(org2);
155 //
156 //		return sparseMatrix[orgName1].get(orgName2);
157 //	}
158 
159 
getSize()160 	public int getSize() {
161 		return orgCount;
162 	}
163 
164 
getOrgRow(String orgName)165 	public ArrayList<Comparison> getOrgRow(String orgName) {
166 		int rowNum = tree.nodeMap.get(orgName).nodeId;
167 		return sparseMatrix[rowNum];
168 	}
169 
170 
171 
172 	/*--------------------------------------------------------------*/
173 	/*----------------            Fields            ----------------*/
174 	/*--------------------------------------------------------------*/
175 
176 	/**
177 	 * A SparseTree object that contains taxonomic information relevant to this matrix.
178 	 */
179 	final SparseTree tree;
180 
181 	/**
182 	 * An arraylist containing comparisons between nodes in the tree.
183 	 */
184 	private final ArrayList<Comparison>[] sparseMatrix;
185 
186 	/**
187 	 * The number of sketches being analyzed.
188 	 */
189 	private int orgCount;
190 
191 	/**
192 	 * ArrayList that will hold the lines of the input file.
193 	 */
194 	ArrayList<String> lines = new ArrayList<String>();
195 
196 	/**
197 	 * Header line of the comparison input file.
198 	 */
199 	private String[] header;
200 
201 	/**
202 	 * Input file name.
203 	 */
204 	private String in=null;
205 
206 	/**
207 	 * Number of lines processed from the sketch comparison file.
208 	 */
209 	private long linesProcessed=0;
210 
211 }
212