From 00644dca5130d965953974c0625ed09114cbb57c Mon Sep 17 00:00:00 2001 From: hkhang Date: Mon, 2 Nov 2009 21:48:27 +0000 Subject: [PATCH] *** empty log message *** --- .../Spider/recovery/GlobalQuery.java | 92 +++++++ .../Spider/recovery/LocalQuery.java | 53 ++-- .../Spider/recovery/QueryQueue.java | 1 - .../Benchmarks/Spider/recovery/QueryTask.java | 257 ++++++++++++++++++ .../Benchmarks/Spider/recovery/Spider.java | 82 ++---- .../src/Benchmarks/Spider/recovery/dstm.conf | 12 +- .../src/Benchmarks/Spider/recovery/makefile | 9 +- .../JavaDSM/DistributedHashMap.java | 46 ++-- Robust/src/ClassLibrary/JavaDSM/Task.java | 3 +- 9 files changed, 436 insertions(+), 119 deletions(-) create mode 100644 Robust/src/Benchmarks/Spider/recovery/GlobalQuery.java create mode 100644 Robust/src/Benchmarks/Spider/recovery/QueryTask.java diff --git a/Robust/src/Benchmarks/Spider/recovery/GlobalQuery.java b/Robust/src/Benchmarks/Spider/recovery/GlobalQuery.java new file mode 100644 index 00000000..0a9d1630 --- /dev/null +++ b/Robust/src/Benchmarks/Spider/recovery/GlobalQuery.java @@ -0,0 +1,92 @@ +public class GlobalQuery { + GlobalString hostname; + GlobalString path; + int depth; + + public GlobalQuery(GlobalString hostname) { + this.hostname = global new GlobalString(hostname); + this.path = global new GlobalString(""); + this.depth = 0; + } + + public GlobalQuery(GlobalString hostname, GlobalString path, int depth) { + this.hostname = global new GlobalString(hostname); + this.path = global new GlobalString(path); + this.depth = depth; + } + + public int getDepth() { + return depth; + } + + public GlobalString getHostName() { + return hostname; + } + + public GlobalString getPath() { + return path; + } + + public GlobalString makewebcanonical(GlobalString page) { + GlobalStringBuffer b = global new GlobalStringBuffer(getHostName(page)); + b.append("/"); + b.append(getPathName(page)); + return b.toGlobalString(); + } + + public GlobalString getHostName(GlobalString page) { + GlobalString http = global new GlobalString("http://"); + GlobalString https = global new GlobalString("https://"); + int beginindex; + int endindex; + + if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) { + return getHostName(); + } + else if (page.indexOf(https) != -1) { + beginindex = page.indexOf(https) + https.length(); + } + else { + beginindex = page.indexOf(http) + http.length(); + } + endindex = page.indexOf('/',beginindex+1); + + if ((beginindex == -1)) { + System.printString("ERROR"); + } + if (endindex == -1) + endindex = page.length(); + + return page.subString(beginindex, endindex); + } + + + public GlobalString getPathName(GlobalString page) { + GlobalString http = global new GlobalString("http://"); + GlobalString https = global new GlobalString("https://"); + int beginindex; + int nextindex; + + if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) { + GlobalString path = getPath(); + int lastindex = path.lastindexOf('/'); + if (lastindex == -1) + return page; + + GlobalStringBuffer sb = global new GlobalStringBuffer(path.subString(0,lastindex+1)); + sb.append(page); + return sb.toGlobalString(); + } + else if (page.indexOf(https) != -1) { + beginindex = page.indexOf(https) + https.length(); + } + else { + beginindex = page.indexOf(http) + http.length(); + } + nextindex = page.indexOf('/',beginindex+1); + + if ((beginindex == -1) || (nextindex == -1)) + return global new GlobalString("index.html"); + return page.subString(nextindex+1, page.length()); + } +} diff --git a/Robust/src/Benchmarks/Spider/recovery/LocalQuery.java b/Robust/src/Benchmarks/Spider/recovery/LocalQuery.java index 2315b1e5..1beeadbe 100644 --- a/Robust/src/Benchmarks/Spider/recovery/LocalQuery.java +++ b/Robust/src/Benchmarks/Spider/recovery/LocalQuery.java @@ -40,23 +40,37 @@ public class LocalQuery { public String getHostName(String page) { String http = new String("http://"); - if (page.indexOf(http) == -1) { + String https = new String("https://"); + int beginindex; + int endindex; + + if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) { return getHostName(); - } else { - int beginindex = page.indexOf(http) + http.length(); - int endindex = page.indexOf('/',beginindex+1); - if ((beginindex == -1)) { - System.printString("ERROR"); - } - if (endindex == -1) - endindex=page.length(); - return page.subString(beginindex, endindex); + } + else if (page.indexOf(https) != -1) { + beginindex = page.indexOf(https) + https.length(); + } + else { + beginindex = page.indexOf(http) + http.length(); } + endindex = page.indexOf('/',beginindex+1); + + if ((beginindex == -1)) { + System.printString("ERROR"); + } + if (endindex == -1) + endindex = page.length(); + + return page.subString(beginindex, endindex); } public String getPathName(String page) { String http = new String("http://"); - if (page.indexOf(http) == -1) { + String https = new String("https://"); + int beginindex; + int nextindex; + + if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) { String path = getPath(); int lastindex = path.lastindexOf('/'); if (lastindex == -1) @@ -65,12 +79,17 @@ public class LocalQuery { StringBuffer sb = new StringBuffer(path.subString(0,lastindex+1)); sb.append(page); return sb.toString(); - } else { - int beginindex = page.indexOf(http) + http.length(); - int nextindex = page.indexOf('/',beginindex+1); - if ((beginindex==-1) || (nextindex==-1)) - return new String("index.html"); - return page.subString(nextindex+1, page.length()); } + else if (page.indexOf(https) != -1) { + beginindex = page.indexOf(https) + https.length(); + } + else { + beginindex = page.indexOf(http) + http.length(); + } + nextindex = page.indexOf('/',beginindex+1); + + if ((beginindex==-1) || (nextindex==-1)) + return new String("index.html"); + return page.subString(nextindex+1, page.length()); } } diff --git a/Robust/src/Benchmarks/Spider/recovery/QueryQueue.java b/Robust/src/Benchmarks/Spider/recovery/QueryQueue.java index da9cc72b..915bb4b9 100644 --- a/Robust/src/Benchmarks/Spider/recovery/QueryQueue.java +++ b/Robust/src/Benchmarks/Spider/recovery/QueryQueue.java @@ -1,7 +1,6 @@ public class QueryQueue { HashSet queries; int size; - int ddddddddddd; public QueryQueue() { queries = new HashSet(); diff --git a/Robust/src/Benchmarks/Spider/recovery/QueryTask.java b/Robust/src/Benchmarks/Spider/recovery/QueryTask.java new file mode 100644 index 00000000..e3339846 --- /dev/null +++ b/Robust/src/Benchmarks/Spider/recovery/QueryTask.java @@ -0,0 +1,257 @@ +public class QueryTask extends Task { + int maxDepth; + Queue toprocess; + DistributedHashMap results; + GlobalString workingURL; + + public QueryTask(Queue todoList, DistributedHashMap doneList, int maxDepth, DistributedHashMap results) { + this.todoList = todoList; + this.doneList = doneList; + this.maxDepth = maxDepth; + this.results = results; + } + + public void execute() { + int depth; + int max; + + atomic { + depth = ((GlobalQuery)myWork).getDepth(); + max = this.maxDepth; + } + + if (depth < max) { + /* global variables */ + GlobalQuery gq; + + /* local variables */ + LocalQuery lq; + String hostname; + String path; + + atomic { + gq = (GlobalQuery)myWork; + hostname = new String(GlobalString.toLocalCharArray(gq.getHostName())); + path = new String(GlobalString.toLocalCharArray(gq.getPath())); + + GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname); + gsb.append("/"); + gsb.append(path); + workingURL = global new GlobalString(gsb.toGlobalString()); + } + lq = new LocalQuery(hostname, path, depth); + + System.printString(lq.getDepth()+" "); + System.printString("Processing - Hostname : "); + System.printString(hostname); + System.printString(", Path : "); + System.printString(path); + System.printString("\n"); + + Socket s = new Socket(hostname, 80); + + requestQuery(hostname, path, s); + readResponse(lq, s); + + atomic { + processList(lq, workingURL, results); + } + + atomic { + toprocess = processPage(lq); + } + + s.close(); + } + } + + public void done(Object obj) { + GlobalString str = global new GlobalString("true"); + doneList.put(workingURL, str); + + while(!toprocess.isEmpty()) { + GlobalQuery q = (GlobalQuery)toprocess.pop(); + + GlobalString hostname = global new GlobalString(q.getHostName()); + GlobalString path = global new GlobalString(q.getPath()); + + GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname); + gsb.append("/"); + gsb.append(path); + + if (!doneList.containsKey(gsb.toGlobalString())) { + todoList.push(q); + } + } + } + + public static void requestQuery(String hostname, String path, Socket sock) { + StringBuffer req = new StringBuffer("GET "); + req.append("/"); + req.append(path); + req.append(" HTTP/1.1\r\nHost:"); + req.append(hostname); + req.append("\r\n\r\n"); + sock.write(req.toString().getBytes()); + } + + public static void readResponse(LocalQuery lq, Socket sock) { + // state 0 - nothing + // state 1 - \r + // state 2 - \r\n + // state 3 - \r\n\r + // state 4 - \r\n\r\n + int state=0; + while(true) { + if (state<4) { + if (state==0) { + byte[] b=new byte[1]; + int numchars=sock.read(b); + if ((numchars==1)) { + if (b[0]=='\r') { + state++; + } + } else + return; + } else if (state==1) { + byte[] b=new byte[1]; + int numchars=sock.read(b); + if (numchars==1) { + if (b[0]=='\n') + state++; + else + state=0; + } else return; + } else if (state==2) { + byte[] b=new byte[1]; + int numchars=sock.read(b); + if (numchars==1) { + if (b[0]=='\r') + state++; + else + state=0; + } else return; + } else if (state==3) { + byte[] b=new byte[1]; + int numchars=sock.read(b); + if (numchars==1) { + if (b[0]=='\n') + state++; + else + state=0; + } else return; + } + } else { + byte[] buffer=new byte[1024]; + int numchars=sock.read(buffer); + if (numchars==0) + return; + else { + String curr=(new String(buffer)).subString(0,numchars); + lq.response.append(curr); + } + } + } + } + + public static void processList(LocalQuery lq, GlobalString url, DistributedHashMap results) { + String sTitle = new String(""); + String eTitle = new String(""); + String searchstr = lq.response.toString(); + LinkedList ll; + + int sIndex = searchstr.indexOf(sTitle); + if (sIndex != -1) { + int eIndex = searchstr.indexOf(eTitle, sIndex+sTitle.length()); + String title = new String(searchstr.subString(sIndex+sTitle.length(), eIndex)); + ll = tokenize(title); + + Queue q; + while (!ll.isEmpty()) { + GlobalString word = global new GlobalString(ll.pop().toString()); +// q = (Queue)(results.get(word)); + +// if (q == null) { + if (!results.containsKey(word)) { + q = global new Queue(); + } + else { + q = (Queue)(results.get(word)); + } + q.push(url); + results.put(word, q); + + System.out.println("Key : ["+word.toLocalString()+"],["+q.size()+"]"); +/* + for (int i = 0; i < q.size(); i++) { + Object obj = q.elements[i]; + GlobalString str = global new GlobalString((GlobalString)obj); + System.out.println("\t["+i+"] : "+str.toLocalString()); + }*/ + } + } + } + + public static LinkedList tokenize(String str) { + LinkedList ll; + int sIndex = 0; + int eIndex = 0; + String token; + + ll = new LinkedList(); + + // and, or, of, at, but, '.', ',', ':' ';', '"', ' ', '-', '=' + while (true) { + eIndex = str.indexOf(' ', sIndex); + if (eIndex == -1) { + token = str.subString(sIndex); + ll.add(token); + break; + } + else { + token = str.subString(sIndex, eIndex); + ll.add(token); + sIndex = eIndex+1; + } + } + + return ll; + } + + public static Queue processPage(LocalQuery lq) { + int index = 0; + String href = new String("href=\""); + String searchstr = lq.response.toString(); + int depth; + boolean cont = true; + Queue toprocess; + + depth = lq.getDepth() + 1; + + toprocess = global new Queue(); + + while(cont) { + int mindex = searchstr.indexOf(href,index); + if (mindex != -1) { + int endquote = searchstr.indexOf('"', mindex+href.length()); + if (endquote != -1) { + String match = searchstr.subString(mindex+href.length(), endquote); + String match2 = lq.makewebcanonical(match); + + GlobalString ghostname; + GlobalString gpath; + + ghostname = global new GlobalString(lq.getHostName(match)); + gpath = global new GlobalString(lq.getPathName(match)); + + if (match2 != null) { + GlobalQuery gq = global new GlobalQuery(ghostname, gpath, depth); + toprocess.push(gq); + } + index = endquote; + } else cont = false; + } else cont = false; + } + return toprocess; + } +} diff --git a/Robust/src/Benchmarks/Spider/recovery/Spider.java b/Robust/src/Benchmarks/Spider/recovery/Spider.java index d69179ec..9335ef33 100644 --- a/Robust/src/Benchmarks/Spider/recovery/Spider.java +++ b/Robust/src/Benchmarks/Spider/recovery/Spider.java @@ -1,46 +1,42 @@ public class Spider { public static void main(String[] args) { - int NUM_THREADS = 4; - int maxDepth = 5; - int searchDepth = 10; + int NUM_THREADS = 3; + int maxDepth = 3; int i, j; Work[] works; - QueryThread[] qt; - Query[] currentWorkList; + QueryTask[] qt; + GlobalQuery[] currentWorkList; NUM_THREADS = Integer.parseInt(args[0]); + + if (args.length == 3) { + maxDepth = Integer.parseInt(args[2]); + } + GlobalString firstmachine; - GlobalString firstpage; -// int[] mid = getMID(NUM_THREADS); int mid[] = new int[NUM_THREADS]; -/* mid[0] = (128<<24)|(195<<16)|(180<<8)|21; //dc-4 - mid[1] = (128<<24)|(195<<16)|(180<<8)|24; //dc-5 - mid[2] = (128<<24)|(195<<16)|(180<<8)|26; //dc-6 - */ - mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dc-1 - mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dc-2 - mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dc-3 - mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc-3 - mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dc-3 - mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dc-3 + mid[0] = (128<<24)|(195<<16)|(180<<8)|21; + mid[1] = (128<<24)|(195<<16)|(180<<8)|24; + mid[2] = (128<<24)|(195<<16)|(180<<8)|26; atomic { firstmachine = global new GlobalString(args[1]); - firstpage = global new GlobalString(args[2]); works = global new Work[NUM_THREADS]; - qt = global new QueryThread[NUM_THREADS]; - currentWorkList = global new Query[NUM_THREADS]; + qt = global new QueryTask[NUM_THREADS]; + currentWorkList = global new GlobalQuery[NUM_THREADS]; - Query firstquery = global new Query(firstmachine, firstpage, 0); + GlobalQuery firstquery = global new GlobalQuery(firstmachine); Queue todoList = global new Queue(); - Queue doneList = global new Queue(); + DistributedHashMap doneList = global new DistributedHashMap(500, 500, 0.75f); + DistributedHashMap results = global new DistributedHashMap(100, 100, 0.75f); + todoList.push(firstquery); for (i = 0; i < NUM_THREADS; i++) { - qt[i] = global new QueryThread(todoList, doneList, maxDepth, searchDepth); + qt[i] = global new QueryTask(todoList, doneList, maxDepth, results); works[i] = global new Work(qt[i], NUM_THREADS, i, currentWorkList); } } @@ -61,44 +57,4 @@ public class Spider { tmp.join(); } } - - public static int[] getMID (int num_threads) { - int[] mid = new int[num_threads]; - - FileInputStream ifs = new FileInputStream("dstm.conf"); - String str; - String sub; - int fromIndex; - int endIndex; - double num; - - for (int i = 0; i < num_threads; i++) { - int power = 3 - i; - fromIndex = 0; - num = 0; - - str = ifs.readLine(); - - endIndex = str.indexOf('.', fromIndex); - sub = str.subString(fromIndex, endIndex); - num += (Integer.parseInt(sub) << 24); - - fromIndex = endIndex + 1; - endIndex = str.indexOf('.', fromIndex); - sub = str.subString(fromIndex, endIndex); - num += (Integer.parseInt(sub) << 16); - - fromIndex = endIndex + 1; - endIndex = str.indexOf('.', fromIndex); - sub = str.subString(fromIndex, endIndex); - num += (Integer.parseInt(sub) << 8); - - fromIndex = endIndex + 1; - sub = str.subString(fromIndex); - num += Integer.parseInt(sub); - - mid[i] = (int)num; - } - return mid; - } } diff --git a/Robust/src/Benchmarks/Spider/recovery/dstm.conf b/Robust/src/Benchmarks/Spider/recovery/dstm.conf index 1ecc1322..935ef319 100644 --- a/Robust/src/Benchmarks/Spider/recovery/dstm.conf +++ b/Robust/src/Benchmarks/Spider/recovery/dstm.conf @@ -1,9 +1,3 @@ -#128.195.180.21 -#128.195.180.24 -#128.195.180.26 -128.195.136.162 -128.195.136.163 -128.195.136.164 -128.195.136.165 -128.195.136.166 -128.195.136.167 +128.195.180.21 +128.195.180.24 +128.195.180.26 diff --git a/Robust/src/Benchmarks/Spider/recovery/makefile b/Robust/src/Benchmarks/Spider/recovery/makefile index c9ab7801..841242a0 100644 --- a/Robust/src/Benchmarks/Spider/recovery/makefile +++ b/Robust/src/Benchmarks/Spider/recovery/makefile @@ -1,18 +1,13 @@ MAINCLASS=Spider SUBCLASS=Query SRC1=${MAINCLASS}.java -SRC2=${SUBCLASS}.java +SRC2=Global${SUBCLASS}.java SRC3=${SUBCLASS}Queue.java SRC4=${SUBCLASS}Task.java -FLAGS= -dsm -dsmtask -32bit -nooptimize -debug -recovery -mainclass ${MAINCLASS} +FLAGS= -recovery -dsmtask -dsm -dsmtask -32bit -nooptimize -debug -mainclass ${MAINCLASS} default: ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC4} ${SRC1} clean: rm -rf tmpbuilddirectory rm *.bin - rm *.php - rm *.css - rm www* - rm eee* - rm web* diff --git a/Robust/src/ClassLibrary/JavaDSM/DistributedHashMap.java b/Robust/src/ClassLibrary/JavaDSM/DistributedHashMap.java index 76d16fb4..b81ca777 100644 --- a/Robust/src/ClassLibrary/JavaDSM/DistributedHashMap.java +++ b/Robust/src/ClassLibrary/JavaDSM/DistributedHashMap.java @@ -38,11 +38,11 @@ public class DistributedHashMap { for(int i=0; i