--- /dev/null
+public class GlobalQuery {
+ GlobalString hostname;
+ GlobalString path;
+ int depth;
+
+ public GlobalQuery(GlobalString hostname) {
+ this.hostname = global new GlobalString(hostname);
+ this.path = global new GlobalString("");
+ this.depth = 0;
+ }
+
+ public GlobalQuery(GlobalString hostname, GlobalString path, int depth) {
+ this.hostname = global new GlobalString(hostname);
+ this.path = global new GlobalString(path);
+ this.depth = depth;
+ }
+
+ public int getDepth() {
+ return depth;
+ }
+
+ public GlobalString getHostName() {
+ return hostname;
+ }
+
+ public GlobalString getPath() {
+ return path;
+ }
+
+ public GlobalString makewebcanonical(GlobalString page) {
+ GlobalStringBuffer b = global new GlobalStringBuffer(getHostName(page));
+ b.append("/");
+ b.append(getPathName(page));
+ return b.toGlobalString();
+ }
+
+ public GlobalString getHostName(GlobalString page) {
+ GlobalString http = global new GlobalString("http://");
+ GlobalString https = global new GlobalString("https://");
+ int beginindex;
+ int endindex;
+
+ if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
+ return getHostName();
+ }
+ else if (page.indexOf(https) != -1) {
+ beginindex = page.indexOf(https) + https.length();
+ }
+ else {
+ beginindex = page.indexOf(http) + http.length();
+ }
+ endindex = page.indexOf('/',beginindex+1);
+
+ if ((beginindex == -1)) {
+ System.printString("ERROR");
+ }
+ if (endindex == -1)
+ endindex = page.length();
+
+ return page.subString(beginindex, endindex);
+ }
+
+
+ public GlobalString getPathName(GlobalString page) {
+ GlobalString http = global new GlobalString("http://");
+ GlobalString https = global new GlobalString("https://");
+ int beginindex;
+ int nextindex;
+
+ if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
+ GlobalString path = getPath();
+ int lastindex = path.lastindexOf('/');
+ if (lastindex == -1)
+ return page;
+
+ GlobalStringBuffer sb = global new GlobalStringBuffer(path.subString(0,lastindex+1));
+ sb.append(page);
+ return sb.toGlobalString();
+ }
+ else if (page.indexOf(https) != -1) {
+ beginindex = page.indexOf(https) + https.length();
+ }
+ else {
+ beginindex = page.indexOf(http) + http.length();
+ }
+ nextindex = page.indexOf('/',beginindex+1);
+
+ if ((beginindex == -1) || (nextindex == -1))
+ return global new GlobalString("index.html");
+ return page.subString(nextindex+1, page.length());
+ }
+}
--- /dev/null
+public class LocalQuery {
+ String hostname;
+ String path;
+ StringBuffer response;
+ int depth;
+
+ public LocalQuery(String hostname, String path, int depth) {
+ this.hostname = new String(hostname);
+ this.path = new String(path);
+ response = new StringBuffer();
+ this.depth = depth;
+ }
+
+ public int getDepth() {
+ return depth;
+ }
+
+ public String getHostName() {
+ return hostname;
+ }
+
+ public String getPath() {
+ return path;
+ }
+
+ public void outputFile() {
+ StringBuffer sb = new StringBuffer(hostname);
+ sb.append(path);
+ FileOutputStream fos = new FileOutputStream(sb.toString().replace('/','#'));
+ fos.write(response.toString().getBytes());
+ fos.close();
+ }
+
+ public String makewebcanonical(String page) {
+ StringBuffer b = new StringBuffer(getHostName(page));
+ b.append("/");
+ b.append(getPathName(page));
+ return b.toString();
+ }
+
+ public String getHostName(String page) {
+ String http = new String("http://");
+ String https = new String("https://");
+ int beginindex;
+ int endindex;
+
+ if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
+ return getHostName();
+ }
+ else if (page.indexOf(https) != -1) {
+ beginindex = page.indexOf(https) + https.length();
+ }
+ else {
+ beginindex = page.indexOf(http) + http.length();
+ }
+ endindex = page.indexOf('/',beginindex+1);
+
+ if ((beginindex == -1)) {
+ System.printString("ERROR");
+ }
+ if (endindex == -1)
+ endindex = page.length();
+
+ return page.subString(beginindex, endindex);
+ }
+
+ public String getPathName(String page) {
+ String http = new String("http://");
+ String https = new String("https://");
+ int beginindex;
+ int nextindex;
+
+ if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
+ String path = getPath();
+ int lastindex = path.lastindexOf('/');
+ if (lastindex == -1)
+ return page;
+
+ StringBuffer sb = new StringBuffer(path.subString(0,lastindex+1));
+ sb.append(page);
+ return sb.toString();
+ }
+ else if (page.indexOf(https) != -1) {
+ beginindex = page.indexOf(https) + https.length();
+ }
+ else {
+ beginindex = page.indexOf(http) + http.length();
+ }
+ nextindex = page.indexOf('/',beginindex+1);
+
+ if ((beginindex==-1) || (nextindex==-1))
+ return new String("index.html");
+ return page.subString(nextindex+1, page.length());
+ }
+}
--- /dev/null
+public class QueryList extends Queue {
+ Queue queries;
+
+ public QueryList() {
+ queries = global new Queue();
+ }
+
+ public boolean checkQuery(GlobalString x) {
+ boolean set = false;;
+ for (int i = 0 ; i < size; i++) {
+ if (x.equals((GlobalString)elements[i])) {
+ set = true;
+ break;
+ }
+ }
+ return set;
+ }
+
+ public void addQuery(GlobalString x) {
+ queries.push(x);
+ }
+}
--- /dev/null
+public class QueryQueue {
+ HashSet queries;
+ int size;
+
+ public QueryQueue() {
+ queries = new HashSet();
+ size = 0;
+ }
+
+ public LocalQuery pop() {
+ if (queries.isEmpty())
+ return null;
+ LocalQuery q = (LocalQuery) queries.iterator().next();
+ queries.remove(q);
+ size--;
+ return q;
+ }
+
+ public void push(LocalQuery x) {
+ queries.add(x);
+ size++;
+ }
+
+ public int size() {
+ return size;
+ }
+
+ public boolean isEmpty() {
+ if (size == 0)
+ return true;
+ else
+ return false;
+ }
+}
--- /dev/null
+public class QueryTask extends Task {
+ int maxDepth;
+ Queue toprocess;
+ DistributedHashMap results;
+ GlobalString workingURL;
+
+ public QueryTask(Queue todoList, DistributedHashMap doneList, int maxDepth, DistributedHashMap results) {
+ this.todoList = todoList;
+ this.doneList = doneList;
+ this.maxDepth = maxDepth;
+ this.results = results;
+ }
+
+ public void execute() {
+ int depth;
+ int max;
+
+ atomic {
+ depth = ((GlobalQuery)myWork).getDepth();
+ max = this.maxDepth;
+ }
+
+ if (depth < max) {
+ /* global variables */
+ GlobalQuery gq;
+
+ /* local variables */
+ LocalQuery lq;
+ String hostname;
+ String path;
+
+ atomic {
+ gq = (GlobalQuery)myWork;
+ hostname = new String(GlobalString.toLocalCharArray(gq.getHostName()));
+ path = new String(GlobalString.toLocalCharArray(gq.getPath()));
+
+ GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
+ gsb.append("/");
+ gsb.append(path);
+ workingURL = global new GlobalString(gsb.toGlobalString());
+ }
+ lq = new LocalQuery(hostname, path, depth);
+
+ System.printString(lq.getDepth()+" ");
+ System.printString("Processing - Hostname : ");
+ System.printString(hostname);
+ System.printString(", Path : ");
+ System.printString(path);
+ System.printString("\n");
+
+ Socket s = new Socket(hostname, 80);
+
+ requestQuery(hostname, path, s);
+ readResponse(lq, s);
+
+ atomic {
+ processList(lq, workingURL, results);
+ }
+
+ atomic {
+ toprocess = processPage(lq);
+ }
+
+ s.close();
+ }
+ }
+
+ public void done(Object obj) {
+ GlobalString str = global new GlobalString("true");
+ doneList.put(workingURL, str);
+
+ while(!toprocess.isEmpty()) {
+ GlobalQuery q = (GlobalQuery)toprocess.pop();
+
+ GlobalString hostname = global new GlobalString(q.getHostName());
+ GlobalString path = global new GlobalString(q.getPath());
+
+ GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
+ gsb.append("/");
+ gsb.append(path);
+
+ if (!doneList.containsKey(gsb.toGlobalString())) {
+ todoList.push(q);
+ }
+ }
+ }
+
+ public static void requestQuery(String hostname, String path, Socket sock) {
+ StringBuffer req = new StringBuffer("GET ");
+ req.append("/");
+ req.append(path);
+ req.append(" HTTP/1.1\r\nHost:");
+ req.append(hostname);
+ req.append("\r\n\r\n");
+ sock.write(req.toString().getBytes());
+ }
+
+ public static void readResponse(LocalQuery lq, Socket sock) {
+ // state 0 - nothing
+ // state 1 - \r
+ // state 2 - \r\n
+ // state 3 - \r\n\r
+ // state 4 - \r\n\r\n
+ int state=0;
+ while(true) {
+ if (state<4) {
+ if (state==0) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if ((numchars==1)) {
+ if (b[0]=='\r') {
+ state++;
+ }
+ } else
+ return;
+ } else if (state==1) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if (numchars==1) {
+ if (b[0]=='\n')
+ state++;
+ else
+ state=0;
+ } else return;
+ } else if (state==2) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if (numchars==1) {
+ if (b[0]=='\r')
+ state++;
+ else
+ state=0;
+ } else return;
+ } else if (state==3) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if (numchars==1) {
+ if (b[0]=='\n')
+ state++;
+ else
+ state=0;
+ } else return;
+ }
+ } else {
+ byte[] buffer=new byte[1024];
+ int numchars=sock.read(buffer);
+ if (numchars==0)
+ return;
+ else {
+ String curr=(new String(buffer)).subString(0,numchars);
+ lq.response.append(curr);
+ }
+ }
+ }
+ }
+
+ public static void processList(LocalQuery lq, GlobalString url, DistributedHashMap results) {
+ String sTitle = new String("<title>");
+ String eTitle = new String("</title>");
+ String searchstr = lq.response.toString();
+ LinkedList ll;
+
+ int sIndex = searchstr.indexOf(sTitle);
+ if (sIndex != -1) {
+ int eIndex = searchstr.indexOf(eTitle, sIndex+sTitle.length());
+ String title = new String(searchstr.subString(sIndex+sTitle.length(), eIndex));
+ ll = tokenize(title);
+
+ Queue q;
+ while (!ll.isEmpty()) {
+ GlobalString word = global new GlobalString(ll.pop().toString());
+// q = (Queue)(results.get(word));
+
+// if (q == null) {
+ if (!results.containsKey(word)) {
+ q = global new Queue();
+ }
+ else {
+ q = (Queue)(results.get(word));
+ }
+ q.push(url);
+ results.put(word, q);
+
+ System.out.println("Key : ["+word.toLocalString()+"],["+q.size()+"]");
+/*
+ for (int i = 0; i < q.size(); i++) {
+ Object obj = q.elements[i];
+ GlobalString str = global new GlobalString((GlobalString)obj);
+ System.out.println("\t["+i+"] : "+str.toLocalString());
+ }*/
+ }
+ }
+ }
+
+ public static LinkedList tokenize(String str) {
+ LinkedList ll;
+ int sIndex = 0;
+ int eIndex = 0;
+ String token;
+
+ ll = new LinkedList();
+
+ // and, or, of, at, but, '.', ',', ':' ';', '"', ' ', '-', '='
+ while (true) {
+ eIndex = str.indexOf(' ', sIndex);
+ if (eIndex == -1) {
+ token = str.subString(sIndex);
+ ll.add(token);
+ break;
+ }
+ else {
+ token = str.subString(sIndex, eIndex);
+ ll.add(token);
+ sIndex = eIndex+1;
+ }
+ }
+
+ return ll;
+ }
+
+ public static Queue processPage(LocalQuery lq) {
+ int index = 0;
+ String href = new String("href=\"");
+ String searchstr = lq.response.toString();
+ int depth;
+ boolean cont = true;
+ Queue toprocess;
+
+ depth = lq.getDepth() + 1;
+
+ toprocess = global new Queue();
+
+ while(cont) {
+ int mindex = searchstr.indexOf(href,index);
+ if (mindex != -1) {
+ int endquote = searchstr.indexOf('"', mindex+href.length());
+ if (endquote != -1) {
+ String match = searchstr.subString(mindex+href.length(), endquote);
+ String match2 = lq.makewebcanonical(match);
+
+ GlobalString ghostname;
+ GlobalString gpath;
+
+ ghostname = global new GlobalString(lq.getHostName(match));
+ gpath = global new GlobalString(lq.getPathName(match));
+
+ if (match2 != null) {
+ GlobalQuery gq = global new GlobalQuery(ghostname, gpath, depth);
+ toprocess.push(gq);
+ }
+ index = endquote;
+ } else cont = false;
+ } else cont = false;
+ }
+ return toprocess;
+ }
+}
--- /dev/null
+public class Spider {
+ public static void main(String[] args) {
+ int NUM_THREADS = 3;
+ int maxDepth = 3;
+ int i, j;
+ Work[] works;
+ QueryTask[] qt;
+ GlobalQuery[] currentWorkList;
+
+ NUM_THREADS = Integer.parseInt(args[0]);
+
+ if (args.length == 3) {
+ maxDepth = Integer.parseInt(args[2]);
+ }
+
+ GlobalString firstmachine;
+
+ int mid[] = new int[NUM_THREADS];
+ mid[0] = (128<<24)|(195<<16)|(180<<8)|21;
+ mid[1] = (128<<24)|(195<<16)|(180<<8)|24;
+ mid[2] = (128<<24)|(195<<16)|(180<<8)|26;
+
+ atomic {
+ firstmachine = global new GlobalString(args[1]);
+
+ works = global new Work[NUM_THREADS];
+ qt = global new QueryTask[NUM_THREADS];
+ currentWorkList = global new GlobalQuery[NUM_THREADS];
+
+ GlobalQuery firstquery = global new GlobalQuery(firstmachine);
+
+ Queue todoList = global new Queue();
+ DistributedHashMap doneList = global new DistributedHashMap(500, 500, 0.75f);
+ DistributedHashMap results = global new DistributedHashMap(100, 100, 0.75f);
+
+ todoList.push(firstquery);
+
+ for (i = 0; i < NUM_THREADS; i++) {
+ qt[i] = global new QueryTask(todoList, doneList, maxDepth, results);
+ works[i] = global new Work(qt[i], NUM_THREADS, i, currentWorkList);
+ }
+ }
+ System.printString("Finished to create Objects\n");
+
+ Work tmp;
+ for (i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = works[i];
+ }
+ Thread.myStart(tmp, mid[i]);
+ }
+
+ for (i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = works[i];
+ }
+ tmp.join();
+ }
+ }
+}
--- /dev/null
+128.195.180.21
+128.195.180.24
+128.195.180.26