1 | /* |
---|
2 | * Cloud9: A MapReduce Library for Hadoop |
---|
3 | * |
---|
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you |
---|
5 | * may not use this file except in compliance with the License. You may |
---|
6 | * obtain a copy of the License at |
---|
7 | * |
---|
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
9 | * |
---|
10 | * Unless required by applicable law or agreed to in writing, software |
---|
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
---|
13 | * implied. See the License for the specific language governing |
---|
14 | * permissions and limitations under the License. |
---|
15 | */ |
---|
16 | |
---|
17 | package tw.org.nchc.demo; |
---|
18 | |
---|
19 | import java.io.BufferedReader; |
---|
20 | import java.io.FileInputStream; |
---|
21 | import java.io.IOException; |
---|
22 | import java.io.InputStreamReader; |
---|
23 | import java.util.Collection; |
---|
24 | import java.util.List; |
---|
25 | |
---|
26 | import edu.uci.ics.jung.algorithms.cluster.WeakComponentGraphClusterer; |
---|
27 | import edu.uci.ics.jung.algorithms.importance.PageRank; |
---|
28 | import edu.uci.ics.jung.algorithms.importance.Ranking; |
---|
29 | import edu.uci.ics.jung.graph.DirectedSparseGraph; |
---|
30 | import edu.uci.ics.jung.graph.Graph; |
---|
31 | |
---|
32 | /** |
---|
33 | * <p> |
---|
34 | * Program that computes PageRank for a graph using the <a |
---|
35 | * href="http://jung.sourceforge.net/">JUNG</a> package (2.0 alpha1). Program |
---|
36 | * takes two command-line arguments: the first is a file containing the graph |
---|
37 | * data, and the second is the random jump factor (a typical setting is 0.15). |
---|
38 | * </p> |
---|
39 | * |
---|
40 | * <p> |
---|
41 | * The graph should be represented as an adjacency list. Each line should have |
---|
42 | * at least one token; tokens should be tab delimited. The first token |
---|
43 | * represents the unique id of the source node; subsequent tokens represent its |
---|
44 | * link targets (i.e., outlinks from the source node). For completeness, there |
---|
45 | * should be a line representing all nodes, even nodes without outlinks (those |
---|
46 | * lines will simply contain one token, the source node id). |
---|
47 | * </p> |
---|
48 | * |
---|
49 | */ |
---|
50 | public class SequentialPageRank { |
---|
51 | |
---|
52 | private SequentialPageRank() { |
---|
53 | } |
---|
54 | |
---|
55 | /** |
---|
56 | * Runs the program |
---|
57 | */ |
---|
58 | public static void main(String[] args) throws IOException { |
---|
59 | if (args.length != 2) { |
---|
60 | System.err |
---|
61 | .println("usage: SequentialPageRage [graph-adjacency-list] [random-jump-factor]"); |
---|
62 | System.exit(-1); |
---|
63 | } |
---|
64 | String infile = args[0]; |
---|
65 | float alpha = Float.parseFloat(args[1]); |
---|
66 | |
---|
67 | int edgeCnt = 0; |
---|
68 | DirectedSparseGraph<String, Integer> graph = new DirectedSparseGraph<String, Integer>(); |
---|
69 | |
---|
70 | BufferedReader data = new BufferedReader(new InputStreamReader( |
---|
71 | new FileInputStream(infile))); |
---|
72 | |
---|
73 | String line; |
---|
74 | while ((line = data.readLine()) != null) { |
---|
75 | line.trim(); |
---|
76 | String[] arr = line.split("\\t"); |
---|
77 | |
---|
78 | for (int i = 1; i < arr.length; i++) { |
---|
79 | graph.addEdge(new Integer(edgeCnt++), arr[0], arr[i]); |
---|
80 | } |
---|
81 | } |
---|
82 | |
---|
83 | data.close(); |
---|
84 | |
---|
85 | WeakComponentGraphClusterer<String, Integer> clusterer = new WeakComponentGraphClusterer<String, Integer>(); |
---|
86 | |
---|
87 | Collection<Graph<String, Integer>> components = clusterer |
---|
88 | .transform(graph); |
---|
89 | int numComponents = components.size(); |
---|
90 | System.out.println("Number of components: " + numComponents); |
---|
91 | System.out.println("Number of edges: " + graph.getEdgeCount()); |
---|
92 | System.out.println("Number of nodes: " + graph.getVertexCount()); |
---|
93 | System.out.println("Random jump factor: " + alpha); |
---|
94 | |
---|
95 | PageRank<String, Integer> ranker = new PageRank<String, Integer>(graph, |
---|
96 | alpha); |
---|
97 | ranker.evaluate(); |
---|
98 | |
---|
99 | System.out.println("\nPageRank of nodes, in descending order:"); |
---|
100 | for (Ranking<?> s : (List<Ranking<?>>) ranker.getRankings()) { |
---|
101 | String pmid = s.getRanked().toString(); |
---|
102 | |
---|
103 | System.out.println(pmid + " " + s.rankScore); |
---|
104 | } |
---|
105 | } |
---|
106 | |
---|
107 | } |
---|