[21] | 1 | /* |
---|
| 2 | * Cloud9: A MapReduce Library for Hadoop |
---|
| 3 | * |
---|
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you |
---|
| 5 | * may not use this file except in compliance with the License. You may |
---|
| 6 | * obtain a copy of the License at |
---|
| 7 | * |
---|
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
| 9 | * |
---|
| 10 | * Unless required by applicable law or agreed to in writing, software |
---|
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
---|
| 13 | * implied. See the License for the specific language governing |
---|
| 14 | * permissions and limitations under the License. |
---|
| 15 | */ |
---|
| 16 | |
---|
| 17 | package tw.org.nchc.demo; |
---|
| 18 | |
---|
| 19 | import java.io.BufferedReader; |
---|
| 20 | import java.io.FileInputStream; |
---|
| 21 | import java.io.IOException; |
---|
| 22 | import java.io.InputStreamReader; |
---|
| 23 | import java.util.Collection; |
---|
| 24 | import java.util.List; |
---|
| 25 | |
---|
| 26 | import edu.uci.ics.jung.algorithms.cluster.WeakComponentGraphClusterer; |
---|
| 27 | import edu.uci.ics.jung.algorithms.importance.PageRank; |
---|
| 28 | import edu.uci.ics.jung.algorithms.importance.Ranking; |
---|
| 29 | import edu.uci.ics.jung.graph.DirectedSparseGraph; |
---|
| 30 | import edu.uci.ics.jung.graph.Graph; |
---|
| 31 | |
---|
| 32 | /** |
---|
| 33 | * <p> |
---|
| 34 | * Program that computes PageRank for a graph using the <a |
---|
| 35 | * href="http://jung.sourceforge.net/">JUNG</a> package (2.0 alpha1). Program |
---|
| 36 | * takes two command-line arguments: the first is a file containing the graph |
---|
| 37 | * data, and the second is the random jump factor (a typical setting is 0.15). |
---|
| 38 | * </p> |
---|
| 39 | * |
---|
| 40 | * <p> |
---|
| 41 | * The graph should be represented as an adjacency list. Each line should have |
---|
| 42 | * at least one token; tokens should be tab delimited. The first token |
---|
| 43 | * represents the unique id of the source node; subsequent tokens represent its |
---|
| 44 | * link targets (i.e., outlinks from the source node). For completeness, there |
---|
| 45 | * should be a line representing all nodes, even nodes without outlinks (those |
---|
| 46 | * lines will simply contain one token, the source node id). |
---|
| 47 | * </p> |
---|
| 48 | * |
---|
| 49 | */ |
---|
| 50 | public class SequentialPageRank { |
---|
| 51 | |
---|
| 52 | private SequentialPageRank() { |
---|
| 53 | } |
---|
| 54 | |
---|
| 55 | /** |
---|
| 56 | * Runs the program |
---|
| 57 | */ |
---|
| 58 | public static void main(String[] args) throws IOException { |
---|
| 59 | if (args.length != 2) { |
---|
| 60 | System.err |
---|
| 61 | .println("usage: SequentialPageRage [graph-adjacency-list] [random-jump-factor]"); |
---|
| 62 | System.exit(-1); |
---|
| 63 | } |
---|
| 64 | String infile = args[0]; |
---|
| 65 | float alpha = Float.parseFloat(args[1]); |
---|
| 66 | |
---|
| 67 | int edgeCnt = 0; |
---|
| 68 | DirectedSparseGraph<String, Integer> graph = new DirectedSparseGraph<String, Integer>(); |
---|
| 69 | |
---|
| 70 | BufferedReader data = new BufferedReader(new InputStreamReader( |
---|
| 71 | new FileInputStream(infile))); |
---|
| 72 | |
---|
| 73 | String line; |
---|
| 74 | while ((line = data.readLine()) != null) { |
---|
| 75 | line.trim(); |
---|
| 76 | String[] arr = line.split("\\t"); |
---|
| 77 | |
---|
| 78 | for (int i = 1; i < arr.length; i++) { |
---|
| 79 | graph.addEdge(new Integer(edgeCnt++), arr[0], arr[i]); |
---|
| 80 | } |
---|
| 81 | } |
---|
| 82 | |
---|
| 83 | data.close(); |
---|
| 84 | |
---|
| 85 | WeakComponentGraphClusterer<String, Integer> clusterer = new WeakComponentGraphClusterer<String, Integer>(); |
---|
| 86 | |
---|
| 87 | Collection<Graph<String, Integer>> components = clusterer |
---|
| 88 | .transform(graph); |
---|
| 89 | int numComponents = components.size(); |
---|
| 90 | System.out.println("Number of components: " + numComponents); |
---|
| 91 | System.out.println("Number of edges: " + graph.getEdgeCount()); |
---|
| 92 | System.out.println("Number of nodes: " + graph.getVertexCount()); |
---|
| 93 | System.out.println("Random jump factor: " + alpha); |
---|
| 94 | |
---|
| 95 | PageRank<String, Integer> ranker = new PageRank<String, Integer>(graph, |
---|
| 96 | alpha); |
---|
| 97 | ranker.evaluate(); |
---|
| 98 | |
---|
| 99 | System.out.println("\nPageRank of nodes, in descending order:"); |
---|
| 100 | for (Ranking<?> s : (List<Ranking<?>>) ranker.getRankings()) { |
---|
| 101 | String pmid = s.getRanked().toString(); |
---|
| 102 | |
---|
| 103 | System.out.println(pmid + " " + s.rankScore); |
---|
| 104 | } |
---|
| 105 | } |
---|
| 106 | |
---|
| 107 | } |
---|