From 75f3b2d8931cacbfa4807acc33b1b0509314307e Mon Sep 17 00:00:00 2001 From: hkhang Date: Tue, 3 Nov 2009 02:28:15 +0000 Subject: [PATCH] *** empty log message *** --- .../src/Benchmarks/Spider/recovery/Query.java | 60 ------ .../Benchmarks/Spider/recovery/QueryList.java | 22 --- .../Spider/recovery/QueryQueue.java | 34 ---- .../Benchmarks/Spider/recovery/QueryTask.java | 141 ++++++++------ .../Spider/recovery/QueryThread.java | 175 ------------------ .../Benchmarks/Spider/recovery/Spider.java | 9 +- .../src/Benchmarks/Spider/recovery/dstm.conf | 9 +- .../src/Benchmarks/Spider/recovery/makefile | 5 +- 8 files changed, 101 insertions(+), 354 deletions(-) delete mode 100644 Robust/src/Benchmarks/Spider/recovery/Query.java delete mode 100644 Robust/src/Benchmarks/Spider/recovery/QueryList.java delete mode 100644 Robust/src/Benchmarks/Spider/recovery/QueryQueue.java delete mode 100644 Robust/src/Benchmarks/Spider/recovery/QueryThread.java diff --git a/Robust/src/Benchmarks/Spider/recovery/Query.java b/Robust/src/Benchmarks/Spider/recovery/Query.java deleted file mode 100644 index 7812fff7..00000000 --- a/Robust/src/Benchmarks/Spider/recovery/Query.java +++ /dev/null @@ -1,60 +0,0 @@ -public class Query { - GlobalString hostname; - GlobalString path; - int depth; - - public Query(GlobalString hostname, GlobalString path, int depth) { - this.hostname = global new GlobalString(hostname); - this.path = global new GlobalString(path); - this.depth = depth; - } - - public int getDepth() { - return depth; - } - - public GlobalString getHostName() { - return hostname; - } - - public GlobalString getPath() { - return path; - } - - public GlobalString getHostName(GlobalString page) { - GlobalString http = global new GlobalString("http://"); - if (page.indexOf(http) == -1) { - return getHostName(); - } else { - int beginindex = page.indexOf(http) + http.length(); - int endindex = page.indexOf('/',beginindex+1); - if ((beginindex == -1)) { - System.printString("ERROR"); - } - if (endindex == -1) - endindex = page.length(); - return page.subString(beginindex, endindex); - } - } - - - public GlobalString getPathName(GlobalString page) { - GlobalString http = global new GlobalString("http://"); - if (page.indexOf(http) == -1) { - GlobalString path = getPath(); - int lastindex = path.lastindexOf('/'); - if (lastindex == -1) - return page; - - GlobalStringBuffer sb = global new GlobalStringBuffer(path.subString(0,lastindex+1)); - sb.append(page); - return sb.toGlobalString(); - } else { - int beginindex = page.indexOf(http)+http.length(); - int nextindex = page.indexOf('/',beginindex+1); - if ((beginindex == -1) || (nextindex == -1)) - return global new GlobalString("index.html"); - return page.subString(nextindex+1, page.length()); - } - } -} diff --git a/Robust/src/Benchmarks/Spider/recovery/QueryList.java b/Robust/src/Benchmarks/Spider/recovery/QueryList.java deleted file mode 100644 index d09167b0..00000000 --- a/Robust/src/Benchmarks/Spider/recovery/QueryList.java +++ /dev/null @@ -1,22 +0,0 @@ -public class QueryList extends Queue { - Queue queries; - - public QueryList() { - queries = global new Queue(); - } - - public boolean checkQuery(GlobalString x) { - boolean set = false;; - for (int i = 0 ; i < size; i++) { - if (x.equals((GlobalString)elements[i])) { - set = true; - break; - } - } - return set; - } - - public void addQuery(GlobalString x) { - queries.push(x); - } -} diff --git a/Robust/src/Benchmarks/Spider/recovery/QueryQueue.java b/Robust/src/Benchmarks/Spider/recovery/QueryQueue.java deleted file mode 100644 index 915bb4b9..00000000 --- a/Robust/src/Benchmarks/Spider/recovery/QueryQueue.java +++ /dev/null @@ -1,34 +0,0 @@ -public class QueryQueue { - HashSet queries; - int size; - - public QueryQueue() { - queries = new HashSet(); - size = 0; - } - - public LocalQuery pop() { - if (queries.isEmpty()) - return null; - LocalQuery q = (LocalQuery) queries.iterator().next(); - queries.remove(q); - size--; - return q; - } - - public void push(LocalQuery x) { - queries.add(x); - size++; - } - - public int size() { - return size; - } - - public boolean isEmpty() { - if (size == 0) - return true; - else - return false; - } -} diff --git a/Robust/src/Benchmarks/Spider/recovery/QueryTask.java b/Robust/src/Benchmarks/Spider/recovery/QueryTask.java index e3339846..fa90e0fc 100644 --- a/Robust/src/Benchmarks/Spider/recovery/QueryTask.java +++ b/Robust/src/Benchmarks/Spider/recovery/QueryTask.java @@ -2,6 +2,7 @@ public class QueryTask extends Task { int maxDepth; Queue toprocess; DistributedHashMap results; + GlobalString gTitle; GlobalString workingURL; public QueryTask(Queue todoList, DistributedHashMap doneList, int maxDepth, DistributedHashMap results) { @@ -28,6 +29,7 @@ public class QueryTask extends Task { LocalQuery lq; String hostname; String path; + String title; atomic { gq = (GlobalQuery)myWork; @@ -38,10 +40,11 @@ public class QueryTask extends Task { gsb.append("/"); gsb.append(path); workingURL = global new GlobalString(gsb.toGlobalString()); + gTitle = null; } lq = new LocalQuery(hostname, path, depth); - System.printString(lq.getDepth()+" "); + System.printString("["+lq.getDepth()+"] "); System.printString("Processing - Hostname : "); System.printString(hostname); System.printString(", Path : "); @@ -53,8 +56,10 @@ public class QueryTask extends Task { requestQuery(hostname, path, s); readResponse(lq, s); - atomic { - processList(lq, workingURL, results); + if ((title = grabTitle(lq)) != null) { + atomic { + gTitle = global new GlobalString(title); + } } atomic { @@ -66,7 +71,11 @@ public class QueryTask extends Task { } public void done(Object obj) { + if (gTitle != null) + processList(); + GlobalString str = global new GlobalString("true"); + doneList.put(workingURL, str); while(!toprocess.isEmpty()) { @@ -85,6 +94,21 @@ public class QueryTask extends Task { } } + public static String grabTitle(LocalQuery lq) { + String sTitle = new String(""); + String eTitle = new String(""); + String searchstr = lq.response.toString(); + String title = null; + + int mindex = searchstr.indexOf(sTitle); + if (mindex != -1) { + int endquote = searchstr.indexOf(eTitle, mindex+sTitle.length()); + title = new String(searchstr.subString(mindex+sTitle.length(), endquote)); + } + + return title; + } + public static void requestQuery(String hostname, String path, Socket sock) { StringBuffer req = new StringBuffer("GET "); req.append("/"); @@ -154,68 +178,78 @@ public class QueryTask extends Task { } } - public static void processList(LocalQuery lq, GlobalString url, DistributedHashMap results) { - String sTitle = new String(""); - String eTitle = new String(""); - String searchstr = lq.response.toString(); + public void processList() { LinkedList ll; + GlobalString token = null; + int mindex = 0; + int endquote = 0; - int sIndex = searchstr.indexOf(sTitle); - if (sIndex != -1) { - int eIndex = searchstr.indexOf(eTitle, sIndex+sTitle.length()); - String title = new String(searchstr.subString(sIndex+sTitle.length(), eIndex)); - ll = tokenize(title); - - Queue q; - while (!ll.isEmpty()) { - GlobalString word = global new GlobalString(ll.pop().toString()); -// q = (Queue)(results.get(word)); + while (endquote != -1) { + endquote = gTitle.indexOf(' ', mindex); -// if (q == null) { - if (!results.containsKey(word)) { - q = global new Queue(); - } - else { - q = (Queue)(results.get(word)); + if (endquote != -1) { + token = gTitle.subString(mindex, endquote); + mindex = endquote + 1; + if (censor(token)) { + continue; } - q.push(url); - results.put(word, q); + token = refinement(token); + } + else { + token = gTitle.subString(mindex); + token = refinement(token); + } - System.out.println("Key : ["+word.toLocalString()+"],["+q.size()+"]"); /* - for (int i = 0; i < q.size(); i++) { - Object obj = q.elements[i]; - GlobalString str = global new GlobalString((GlobalString)obj); - System.out.println("\t["+i+"] : "+str.toLocalString()); - }*/ + Queue q; + if ((q = (Queue)(results.remove(token))) == null) { + q = global new Queue(); } + else { + q = (Queue)(results.get(token)); + } + // bug here <- object id changed?? + q.push(workingURL); + results.put(token, q); + + System.out.println("Key : ["+token.toLocalString()+"],["+q.size()+"]"); + */ } } - public static LinkedList tokenize(String str) { - LinkedList ll; - int sIndex = 0; - int eIndex = 0; - String token; + public boolean censor(GlobalString str) { + if (str.equals("of")) return true; + else if (str.equals("for")) return true; + else if (str.equals("a")) return true; + else if (str.equals("an")) return true; + else if (str.equals("the")) return true; + else if (str.equals("at")) return true; + else if (str.equals("and")) return true; + else if (str.equals("or")) return true; + else if (str.equals("but")) return true; + else if (str.equals(".")) return true; + else if (str.equals("=")) return true; + else if (str.equals("-")) return true; + else if (str.equals(":")) return true; + else if (str.equals(";")) return true; + else if (str.equals("\'")) return true; + else if (str.equals("\"")) return true; + else if (str.equals("@")) return true; + else return false; + } - ll = new LinkedList(); - - // and, or, of, at, but, '.', ',', ':' ';', '"', ' ', '-', '=' - while (true) { - eIndex = str.indexOf(' ', sIndex); - if (eIndex == -1) { - token = str.subString(sIndex); - ll.add(token); - break; - } - else { - token = str.subString(sIndex, eIndex); - ll.add(token); - sIndex = eIndex+1; - } + public GlobalString refinement(GlobalString str) { + if (str.charAt(str.length()-1) == ',') { + return str.subString(0, str.length()-1); } - - return ll; + else if (str.charAt(str.length()-1) == ':') { + return str.subString(0, str.length()-1); + } + else if (str.charAt(str.length()-1) == 's') { + if (str.charAt(str.length()-2) == '\'') + return str.subString(0, str.length()-2); + } + return str; } public static Queue processPage(LocalQuery lq) { @@ -229,7 +263,6 @@ public class QueryTask extends Task { depth = lq.getDepth() + 1; toprocess = global new Queue(); - while(cont) { int mindex = searchstr.indexOf(href,index); if (mindex != -1) { diff --git a/Robust/src/Benchmarks/Spider/recovery/QueryThread.java b/Robust/src/Benchmarks/Spider/recovery/QueryThread.java deleted file mode 100644 index 7d6e3530..00000000 --- a/Robust/src/Benchmarks/Spider/recovery/QueryThread.java +++ /dev/null @@ -1,175 +0,0 @@ -public class QueryThread extends Task { - int maxDepth; - int maxSearchDepth; - - public QueryThread(Queue todoList, Queue doneList, int maxDepth, int maxSearchDepth) { - this.todoList = todoList; - this.doneList = doneList; - this.maxDepth = maxDepth; - this.maxSearchDepth = maxSearchDepth; - } - - public void execute() { - int depth; - int max; - int maxSearch; - - atomic { - depth = ((Query)myWork).getDepth(); - max = this.maxDepth; - maxSearch = this.maxSearchDepth; - } - - if (depth < max) { - /* global variables */ - Query q; - GlobalString ghostname; - GlobalString gpath; - - /* local variables */ - QueryQueue toprocess; - LocalQuery lq; - String hostname; - String path; - - atomic { - q = (Query)myWork; - ghostname = q.getHostName(); - gpath = q.getPath(); - hostname = new String(GlobalString.toLocalCharArray(ghostname)); - path = new String(GlobalString.toLocalCharArray(gpath)); - } - lq = new LocalQuery(hostname, path, depth); - - System.printString("Processing - Hostname : "); - System.printString(hostname); - System.printString(", Path : "); - System.printString(path); - System.printString("\n"); - - Socket s = new Socket(hostname, 80); - - requestQuery(hostname, path, s); - readResponse(lq, s); - toprocess = processPage(lq,maxSearch); - s.close(); - - atomic { - while(!toprocess.isEmpty()) { - lq = toprocess.pop(); - ghostname = global new GlobalString(lq.getHostName()); - gpath = global new GlobalString(lq.getPath()); - - q = global new Query(ghostname, gpath, lq.getDepth()); - todoList.push(q); - } - } - } - } - - public static void requestQuery(String hostname, String path, Socket sock) { - StringBuffer req = new StringBuffer("GET "); - req.append("/"); - req.append(path); - req.append(" HTTP/1.1\r\nHost:"); - req.append(hostname); - req.append("\r\n\r\n"); - sock.write(req.toString().getBytes()); - } - - public static void readResponse(LocalQuery lq, Socket sock) { - // state 0 - nothing - // state 1 - \r - // state 2 - \r\n - // state 3 - \r\n\r - // state 4 - \r\n\r\n - int state=0; - while(true) { - if (state<4) { - if (state==0) { - byte[] b=new byte[1]; - int numchars=sock.read(b); - if ((numchars==1)) { - if (b[0]=='\r') { - state++; - } - } else - return; - } else if (state==1) { - byte[] b=new byte[1]; - int numchars=sock.read(b); - if (numchars==1) { - if (b[0]=='\n') - state++; - else - state=0; - } else return; - } else if (state==2) { - byte[] b=new byte[1]; - int numchars=sock.read(b); - if (numchars==1) { - if (b[0]=='\r') - state++; - else - state=0; - } else return; - } else if (state==3) { - byte[] b=new byte[1]; - int numchars=sock.read(b); - if (numchars==1) { - if (b[0]=='\n') - state++; - else - state=0; - } else return; - } - } else { - byte[] buffer=new byte[1024]; - int numchars=sock.read(buffer); - if (numchars==0) - return; - else { - String curr=(new String(buffer)).subString(0,numchars); - lq.response.append(curr); - } - } - } - } - - public void done(Object obj) { - doneList.push(obj); - } - - public static QueryQueue processPage(LocalQuery lq,int maxSearchDepth) { - int index = 0; - String href = new String("href=\""); - String searchstr = lq.response.toString(); - int depth; - boolean cont = true; - - QueryQueue toprocess = new QueryQueue(); - depth = lq.getDepth() + 1; - - int searchDepthCnt = 0; - while(cont && (searchDepthCnt < maxSearchDepth)) { - int mindex = searchstr.indexOf(href,index); - if (mindex != -1) { - int endquote = searchstr.indexOf('"', mindex+href.length()); - if (endquote != -1) { - String match = searchstr.subString(mindex+href.length(), endquote); - String match2 = lq.makewebcanonical(match); - - if (match2 != null) { - LocalQuery newlq = new LocalQuery(lq.getHostName(match), lq.getPathName(match), depth); - - toprocess.push(newlq); - searchDepthCnt++; - } - index = endquote; - } else cont = false; - } else cont = false; - } - - return toprocess; - } -} diff --git a/Robust/src/Benchmarks/Spider/recovery/Spider.java b/Robust/src/Benchmarks/Spider/recovery/Spider.java index 9335ef33..356d6fa8 100644 --- a/Robust/src/Benchmarks/Spider/recovery/Spider.java +++ b/Robust/src/Benchmarks/Spider/recovery/Spider.java @@ -16,9 +16,12 @@ public class Spider { GlobalString firstmachine; int mid[] = new int[NUM_THREADS]; - mid[0] = (128<<24)|(195<<16)|(180<<8)|21; - mid[1] = (128<<24)|(195<<16)|(180<<8)|24; - mid[2] = (128<<24)|(195<<16)|(180<<8)|26; +// mid[0] = (128<<24)|(195<<16)|(180<<8)|21; +// mid[1] = (128<<24)|(195<<16)|(180<<8)|24; +// mid[2] = (128<<24)|(195<<16)|(180<<8)|26; + mid[0] = (128<<24)|(195<<16)|(136<<8)|162; + mid[1] = (128<<24)|(195<<16)|(136<<8)|163; + mid[2] = (128<<24)|(195<<16)|(136<<8)|164; atomic { firstmachine = global new GlobalString(args[1]); diff --git a/Robust/src/Benchmarks/Spider/recovery/dstm.conf b/Robust/src/Benchmarks/Spider/recovery/dstm.conf index 935ef319..eff447b8 100644 --- a/Robust/src/Benchmarks/Spider/recovery/dstm.conf +++ b/Robust/src/Benchmarks/Spider/recovery/dstm.conf @@ -1,3 +1,6 @@ -128.195.180.21 -128.195.180.24 -128.195.180.26 +#128.195.180.21 +#128.195.180.24 +#128.195.180.26 +128.195.136.162 +128.195.136.163 +128.195.136.164 diff --git a/Robust/src/Benchmarks/Spider/recovery/makefile b/Robust/src/Benchmarks/Spider/recovery/makefile index 841242a0..eafdf663 100644 --- a/Robust/src/Benchmarks/Spider/recovery/makefile +++ b/Robust/src/Benchmarks/Spider/recovery/makefile @@ -2,11 +2,10 @@ MAINCLASS=Spider SUBCLASS=Query SRC1=${MAINCLASS}.java SRC2=Global${SUBCLASS}.java -SRC3=${SUBCLASS}Queue.java -SRC4=${SUBCLASS}Task.java +SRC3=${SUBCLASS}Task.java FLAGS= -recovery -dsmtask -dsm -dsmtask -32bit -nooptimize -debug -mainclass ${MAINCLASS} default: - ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC4} ${SRC1} + ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC1} clean: rm -rf tmpbuilddirectory -- 2.34.1