From: adash Date: Tue, 18 May 2010 22:29:16 +0000 (+0000) Subject: changes to Spider benchmark X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=b9295c18606cdbf5d976a6ac6201c3199149cb88;p=IRC.git changes to Spider benchmark --- diff --git a/Robust/src/Benchmarks/Recovery/Spider/java/QueryTask.java b/Robust/src/Benchmarks/Recovery/Spider/java/QueryTask.java index a00b4d29..9b44deb6 100644 --- a/Robust/src/Benchmarks/Recovery/Spider/java/QueryTask.java +++ b/Robust/src/Benchmarks/Recovery/Spider/java/QueryTask.java @@ -49,31 +49,23 @@ public class QueryTask { //System.printString(path); //System.printString("\n"); - if (isDocument(path)) { - lq = (LocalQuery)(todoList.pop()); - depth = lq.getDepth(); - continue; - } - Socket s = new Socket(); if(s.connect(hostname, 80) == -1) { - lq = (LocalQuery)(todoList.pop()); - depth = lq.getDepth(); - continue; - } - -// System.out.println("AAA"); - requestQuery(hostname, path, s); -// System.out.println("BBB"); - readResponse(lq, s); - -// System.out.println("CCC"); - if ((title = grabTitle(lq)) != null) { - toprocess = processPage(lq); + //lq = (LocalQuery)(todoList.pop()); + //depth = lq.getDepth(); + //continue; + return; } -// System.out.println("DDD"); + if(requestQuery(hostname, path, s) == 0) { + readResponse(lq, s); + if ((title = grabTitle(lq)) != null) { + toprocess = processPage(lq); + } + } else { + ; + } s.close(); done(toprocess); lq = (LocalQuery)(todoList.pop()); @@ -102,10 +94,11 @@ public class QueryTask { } public void done(Queue toprocess) { + /* if ((title != null) && (title.length() > 0)) { processedList(); } - + */ int searchCnt = 0; while(!toprocess.isEmpty()) { LocalQuery q = (LocalQuery)toprocess.pop(); @@ -134,7 +127,6 @@ public class QueryTask { while (iter.hasNext() == true) { str = ((String)(iter.next())); - //System.printString(str + "\n"); } } @@ -212,16 +204,20 @@ public class QueryTask { else return false; } - - public static void requestQuery(String hostname, String path, Socket sock) { - StringBuffer req = new StringBuffer("GET "); - req.append("/"); - req.append(path); - req.append(" HTTP/1.0\r\nHost: "); - req.append(hostname); - req.append("\r\n\r\n"); - sock.write(req.toString().getBytes()); - } + + public static int requestQuery(String hostname, String path, Socket sock) { + StringBuffer req = new StringBuffer("GET "); + req.append("/"); + req.append(path); + req.append(" HTTP/1.0\r\nHost: "); + req.append(hostname); + req.append("\r\n\r\n"); + if(sock.write(req.toString().getBytes()) == -1) { + return -1; + } else { + return 0; + } + } public static void readResponse(LocalQuery lq, Socket sock) { // state 0 - nothing @@ -344,39 +340,38 @@ public class QueryTask { return str; } - public static Queue processPage(LocalQuery lq) { - int index = 0; - String href = new String("href=\""); - String searchstr = lq.response.toString(); - int depth; - boolean cont = true; - Queue toprocess; - - depth = lq.getDepth() + 1; - - toprocess = new Queue(); - while(cont) { - int mindex = searchstr.indexOf(href,index); - if (mindex != -1) { - int endquote = searchstr.indexOf('"', mindex+href.length()); - if (endquote != -1) { - String match = searchstr.subString(mindex+href.length(), endquote); - String match2 = lq.makewebcanonical(match); - - String hostname; - String path; - - hostname = new String(lq.getHostName(match)); - path = new String(lq.getPathName(match)); - - if (match2 != null) { - LocalQuery gq = new LocalQuery(hostname, path, depth); - toprocess.push(gq); - } - index = endquote; - } else cont = false; - } else cont = false; + public static Queue processPage(LocalQuery lq) { + int index = 0; + String href = new String("href=\""); + String searchstr = lq.response.toString(); + int depth; + Queue toprocess; + + depth = lq.getDepth() + 1; + + toprocess = new Queue(); + while(true) { + int mindex = searchstr.indexOf(href,index); + if (mindex != -1) { + int endquote = searchstr.indexOf('"', mindex+href.length()); + if (endquote != -1) { + String match = searchstr.subString(mindex+href.length(), endquote); + String match2 = lq.makewebcanonical(match); + + String hostname; + String path; + + hostname = new String(lq.getHostName(match)); + path = new String(lq.getPathName(match)); + + if (match2 != null) { + LocalQuery gq = new LocalQuery(hostname, path, depth); + toprocess.push(gq); + } + index = endquote; + } else break; + } else break; + } + return toprocess; } - return toprocess; - } } diff --git a/Robust/src/Benchmarks/Recovery/Spider/java/Spider.java b/Robust/src/Benchmarks/Recovery/Spider/java/Spider.java index b65a9588..63d17e29 100644 --- a/Robust/src/Benchmarks/Recovery/Spider/java/Spider.java +++ b/Robust/src/Benchmarks/Recovery/Spider/java/Spider.java @@ -2,10 +2,11 @@ public class Spider { public static void main(String[] args) { int NUM_THREADS = 3; int maxDepth = 3; - int maxSearchDepth = 10; + int maxSearchDepth = 20; int i, j; QueryTask qt; - String fm = "www.uci.edu"; + //String fm = "www.uci.edu"; + String fm = "dc-11.calit2.uci.edu"; String firstmachine; String firstpage; @@ -20,7 +21,8 @@ public class Spider { } firstmachine = new String(fm); - firstpage = new String("");; + //firstpage = new String("");; + firstpage = new String("1.html"); HashMap visitedList = new HashMap(500, 0.75f); HashMap results = new HashMap(100, 0.75f); diff --git a/Robust/src/Benchmarks/Recovery/Spider/java/makefile b/Robust/src/Benchmarks/Recovery/Spider/java/makefile index cff29434..a2d79c15 100644 --- a/Robust/src/Benchmarks/Recovery/Spider/java/makefile +++ b/Robust/src/Benchmarks/Recovery/Spider/java/makefile @@ -3,7 +3,7 @@ SUBCLASS=Query SRC1=${MAINCLASS}.java SRC2=Local${SUBCLASS}.java SRC3=${SUBCLASS}Task.java -FLAGS= -optimize -thread -mainclass ${MAINCLASS} +FLAGS=-debug -optimize -thread -mainclass ${MAINCLASS} default: ../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC1} diff --git a/Robust/src/Benchmarks/Recovery/Spider/recovery/QueryTask.java b/Robust/src/Benchmarks/Recovery/Spider/recovery/QueryTask.java index 79011c4d..a7d57002 100644 --- a/Robust/src/Benchmarks/Recovery/Spider/recovery/QueryTask.java +++ b/Robust/src/Benchmarks/Recovery/Spider/recovery/QueryTask.java @@ -27,58 +27,90 @@ public class QueryTask extends Task { int ldepth; atomic { - System.out.println("trans 2"); max = this.maxDepth; maxSearch = this.maxSearchDepth; ldepth=this.depth; } - + if (ldepth < max) { /* local variables */ - String hostname; - String path; - String title; - + String hostname=null; + String path=null; + String title=null; + atomic { - System.out.println("trans 3"); - hostname = new String(GlobalString.toLocalCharArray(getHostName())); - path = new String(GlobalString.toLocalCharArray(getPath())); - - GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname); - gsb.append("/"); - gsb.append(path); - workingURL = global new GlobalString(gsb.toGlobalString()); - gTitle = null; + hostname = new String(GlobalString.toLocalCharArray(getHostName())); + path = new String(GlobalString.toLocalCharArray(getPath())); + System.out.println("hostname= " + hostname + " path= " + path); + GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname); + gsb.append("/"); + gsb.append(path); + workingURL = global new GlobalString(gsb.toGlobalString()); + gTitle = null; } LocalQuery lq = new LocalQuery(hostname, path, ldepth); + /* if (isDocument(path)) { - return; + return; } - + */ + Socket s = new Socket(); if(s.connect(hostname, 80) == -1) { - return; + return; } - + + if(requestQuery(hostname, path, s) == 0) { + readResponse(lq, s); + if ((title = grabTitle(lq)) != null) { + atomic { + //commits everything...either works or fails + gTitle = global new GlobalString(title); + processPage(lq); + dequeueTask(); + } + } + } else { + atomic { + dequeueTask(); + } + } + + /* + if(requestQuery(hostname, path, s) == -1) { + atomic { + dequeueTask(); + } + } else { + readResponse(lq, s); + if ((title = grabTitle(lq)) != null) { + atomic { + //commits everything...either works or fails + gTitle = global new GlobalString(title); + processPage(lq); + dequeueTask(); + } + } + } + */ + /* requestQuery(hostname, path, s); readResponse(lq, s); - if ((title = grabTitle(lq)) != null) { - atomic { - System.out.println("trans 4"); - //commits everything...either works or fails - gTitle = global new GlobalString(title); - processPage(lq); - dequeueTask(); - } + atomic { + //commits everything...either works or fails + gTitle = global new GlobalString(title); + processPage(lq); + dequeueTask(); + } } + */ s.close(); } else { atomic { - System.out.println("trans 5"); - dequeueTask(); + dequeueTask(); } } } @@ -130,8 +162,8 @@ public class QueryTask extends Task { public GlobalString getPathName(GlobalString page) { GlobalString http = global new GlobalString("http://"); GlobalString https = global new GlobalString("https://"); - int beginindex; - int nextindex; + int beginindex=0; + int nextindex=0; if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) { GlobalString path = getPath(); @@ -148,7 +180,6 @@ public class QueryTask extends Task { beginindex = page.indexOf(http) + http.length(); } nextindex = page.indexOf('/',beginindex+1); - if ((beginindex == -1) || (nextindex == -1)) return global new GlobalString("index.html"); return page.subString(nextindex+1, page.length()); @@ -156,7 +187,6 @@ public class QueryTask extends Task { public static boolean isDocument(String str) { int index = str.lastindexOf('.'); - if (index != -1) { if ((str.subString(index+1)).equals("pdf")) return true; else if ((str.subString(index+1)).equals("ps")) return true; @@ -176,12 +206,14 @@ public class QueryTask extends Task { return false; } + /* public void output() { String str; Iterator iter = results_list.iterator(); System.out.println("Size = " + results_list.size()); } + */ public static String grabTitle(LocalQuery lq) { String sBrace = new String("<"); @@ -220,26 +252,28 @@ public class QueryTask extends Task { if (mindex != -1) { title = searchstr.subString(mindex, endquote); if (Character.isWhitespace(title.charAt(0))){ - mindex=0; - while (Character.isWhitespace(title.charAt(mindex++))); - mindex--; - if (mindex >= title.length()) return null; - title = new String(title.subString(mindex)); + mindex=0; + while (Character.isWhitespace(title.charAt(mindex++))); + mindex--; + if (mindex >= title.length()) return null; + title = new String(title.subString(mindex)); } - + if (Character.isWhitespace(title.charAt(title.length()-1))) { - endquote=title.length()-1; - while (Character.isWhitespace(title.charAt(endquote--))); - endquote += 2; - if (mindex >= endquote) return null; - title = new String(title.subString(0, endquote)); + endquote=title.length()-1; + while (Character.isWhitespace(title.charAt(endquote--))); + endquote += 2; + if (mindex >= endquote) { + return null; + } + title = new String(title.subString(0, endquote)); } - + if (isErrorPage(title)) { - return null; + return null; } } - + return title; } @@ -258,14 +292,18 @@ public class QueryTask extends Task { return false; } - public static void requestQuery(String hostname, String path, Socket sock) { + public static int requestQuery(String hostname, String path, Socket sock) { StringBuffer req = new StringBuffer("GET "); req.append("/"); req.append(path); req.append(" HTTP/1.0\r\nHost: "); req.append(hostname); req.append("\r\n\r\n"); - sock.write(req.toString().getBytes()); + if(sock.write(req.toString().getBytes()) == -1) { + return -1; //error in openning this webpage + } else { + return 0; + } } public static void readResponse(LocalQuery lq, Socket sock) { @@ -290,25 +328,25 @@ public class QueryTask extends Task { GlobalString token = null; int mindex = 0; int endquote = 0; - + while (endquote != -1) { endquote = gTitle.indexOf(' ', mindex); - + if (endquote != -1) { - token = gTitle.subString(mindex, endquote); - mindex = endquote + 1; - if (filter(token)) { - continue; - } - token = refine(token); + token = gTitle.subString(mindex, endquote); + mindex = endquote + 1; + if (filter(token)) { + continue; + } + token = refine(token); } else { - token = gTitle.subString(mindex); - token = refine(token); + token = gTitle.subString(mindex); + token = refine(token); } - + GlobalQueue q = (GlobalQueue)results.get(token); if (q == null) { - q = global new GlobalQueue(); + q = global new GlobalQueue(); } q.push(workingURL); results.put(token, q); @@ -380,11 +418,14 @@ public class QueryTask extends Task { return str; } - + public void processPage(LocalQuery lq) { + //System.out.println("Inside processPage"); + /* if ((gTitle != null) && (gTitle.length() > 0)) { processedList(); } + */ int index = 0; String href = new String("href=\""); @@ -393,36 +434,45 @@ public class QueryTask extends Task { while(true) { int mindex = searchstr.indexOf(href,index); if (mindex != -1) { - int endquote = searchstr.indexOf('"', mindex+href.length()); - if (endquote != -1) { - String match = searchstr.subString(mindex+href.length(), endquote); - String match2 = lq.makewebcanonical(match); - - GlobalString ghostname; - GlobalString gpath; - - ghostname = global new GlobalString(lq.getHostName(match)); - gpath = global new GlobalString(lq.getPathName(match)); - - GlobalStringBuffer gsb = global new GlobalStringBuffer(ghostname); - gsb.append("/"); - gsb.append(gpath); + int endquote = searchstr.indexOf('"', mindex+href.length()); + if (endquote != -1) { + String match = searchstr.subString(mindex+href.length(), endquote); + String match2 = lq.makewebcanonical(match); + //System.out.println("match= " + match + " match2= " + match2); - if (match2 != null) { - if (!visitedList.containsKey(gsb.toGlobalString()) && (searchCnt < maxSearchDepth)) { - GlobalString str = global new GlobalString("1"); - visitedList.put(gsb.toGlobalString(), str); - results_list.add(gsb.toGlobalString()); - searchCnt++; - QueryTask gq = global new QueryTask(visitedList, maxDepth, maxSearchDepth, results, results_list, ghostname, gpath, lq.getDepth()+1); - enqueueTask(gq); - } - } - index = endquote; - } else - break; - } else - break; + GlobalString ghostname; + GlobalString gpath; + + ghostname = global new GlobalString(lq.getHostName(match)); + gpath = global new GlobalString(lq.getPathName(match)); + + GlobalStringBuffer gsb = global new GlobalStringBuffer(ghostname); + gsb.append("/"); + gsb.append(gpath); + //System.out.println("match2=" + match2 + lq.getHostName(match)+"/"+lq.getPathName(match)); + + if (match2 != null) { + if (!visitedList.containsKey(gsb.toGlobalString()) && (searchCnt < maxSearchDepth)) { + //System.out.println("I am here"); + GlobalString str = global new GlobalString("1"); + visitedList.put(gsb.toGlobalString(), str); + //results_list.add(gsb.toGlobalString()); + searchCnt++; + QueryTask gq = global new QueryTask(visitedList, maxDepth, maxSearchDepth, results, results_list, ghostname, gpath, lq.getDepth()+1); + enqueueTask(gq); + } + } + index = endquote; + } else { + //System.out.println("mindex= " + mindex + " index= " + index + " endquote= " + endquote + " href.length()= " + href.length()); + break; + } + } else { + //System.out.println("mindex= " + mindex + " index= " + index); + break; + } } + //System.out.println("End of processPage"); + //System.out.println("\n"); } } diff --git a/Robust/src/Benchmarks/Recovery/Spider/recovery/Spider.java b/Robust/src/Benchmarks/Recovery/Spider/recovery/Spider.java index 5a290186..88a659e3 100644 --- a/Robust/src/Benchmarks/Recovery/Spider/recovery/Spider.java +++ b/Robust/src/Benchmarks/Recovery/Spider/recovery/Spider.java @@ -11,7 +11,7 @@ public class Spider { int maxDepth = 3; int maxSearchDepth = 10; int i, j; - String fm = "www.uci.edu"; + String fm = ""; String fp = ""; if(args.length != 3) { @@ -19,10 +19,11 @@ public class Spider { System.exit(0); } else { NUM_THREADS = Integer.parseInt(args[0]); - fm = args[1]; + fm = "dc-11.calit2.uci.edu"; maxDepth = Integer.parseInt(args[2]); } + int nQueue = 3; int mid[] = new int[8]; mid[0] = (128<<24)|(195<<16)|(136<<8)|162; @@ -38,17 +39,17 @@ public class Spider { atomic { //set up workers ts=global new TaskSet(NUM_THREADS); - for (i = 0; i < NUM_THREADS; i++) { - ts.threads[i] = global new Worker(ts,i,(NUM_THREADS/2)); - } - for (i = 0; i < NUM_THREADS/2; i++) { + for (i = 0; i < nQueue; i++) { ts.todo[i] = global new GlobalQueue(); } + for (i = 0; i < NUM_THREADS; i++) { + ts.threads[i] = global new Worker(ts,i,nQueue); + } } atomic { GlobalString firstmachine = global new GlobalString(fm); - GlobalString firstpage = global new GlobalString(""); + GlobalString firstpage = global new GlobalString("1.html"); DistributedHashMap visitedList = global new DistributedHashMap(500, 500, 0.75f); DistributedHashMap results = global new DistributedHashMap(100, 100, 0.75f); DistributedLinkedList results_list = global new DistributedLinkedList(); diff --git a/Robust/src/Benchmarks/Recovery/Spider/recovery/Task.java b/Robust/src/Benchmarks/Recovery/Spider/recovery/Task.java index c45c25e8..9d5cdfe6 100644 --- a/Robust/src/Benchmarks/Recovery/Spider/recovery/Task.java +++ b/Robust/src/Benchmarks/Recovery/Spider/recovery/Task.java @@ -12,7 +12,6 @@ public class Task { w.workingtask=null; } public void enqueueTask(Task t) { - //System.out.println("queueid= " + queueid); w.tasks.todo[queueid].push(t); } public native void execution(); diff --git a/Robust/src/Benchmarks/Recovery/Spider/recovery/TaskSet.java b/Robust/src/Benchmarks/Recovery/Spider/recovery/TaskSet.java index 1904a89e..27a8da14 100644 --- a/Robust/src/Benchmarks/Recovery/Spider/recovery/TaskSet.java +++ b/Robust/src/Benchmarks/Recovery/Spider/recovery/TaskSet.java @@ -2,7 +2,7 @@ public class TaskSet { public TaskSet(int nt) { numthreads=nt; threads=global new Worker[nt]; - todo=global new GlobalQueue[(nt/2)]; + todo=global new GlobalQueue[3]; } //Tasks to be executed diff --git a/Robust/src/Benchmarks/Recovery/Spider/recovery/Worker.java b/Robust/src/Benchmarks/Recovery/Spider/recovery/Worker.java index 0c8991e4..b6add810 100644 --- a/Robust/src/Benchmarks/Recovery/Spider/recovery/Worker.java +++ b/Robust/src/Benchmarks/Recovery/Spider/recovery/Worker.java @@ -7,7 +7,7 @@ public class Worker extends Thread { Worker(TaskSet tasks, int id, int numQueue) { this.tasks = tasks; this.id = id; - this.numQueue = 3; // Correct this 3 should be hash defined + this.numQueue = numQueue; } public void run() { @@ -17,9 +17,7 @@ public class Worker extends Thread { while(notdone) { Task t=null; atomic { - System.out.println("Transacion 1"); int qindex = (id%numQueue); - //System.out.println("id= " + id + " numQueue= " + numQueue); if (!tasks.todo[qindex].isEmpty()) { //grab segment from todo list t=workingtask=(Task) tasks.todo[qindex].pop(); diff --git a/Robust/src/Benchmarks/Recovery/Spider/recovery/dstm.conf b/Robust/src/Benchmarks/Recovery/Spider/recovery/dstm.conf deleted file mode 100644 index cbce2d59..00000000 --- a/Robust/src/Benchmarks/Recovery/Spider/recovery/dstm.conf +++ /dev/null @@ -1,8 +0,0 @@ -128.195.136.162 -128.195.136.163 -128.195.136.164 -128.195.136.165 -128.195.136.166 -128.195.136.167 -128.195.136.168 -128.195.136.169 diff --git a/Robust/src/Benchmarks/Recovery/Spider/recovery/makefile b/Robust/src/Benchmarks/Recovery/Spider/recovery/makefile index c98fd3f0..358d3a80 100644 --- a/Robust/src/Benchmarks/Recovery/Spider/recovery/makefile +++ b/Robust/src/Benchmarks/Recovery/Spider/recovery/makefile @@ -3,10 +3,11 @@ SUBCLASS=Query SRC1=${MAINCLASS}.java SRC2=Global${SUBCLASS}.java SRC3=${SUBCLASS}Task.java -FLAGS=-recoverystats -recovery -transstats -dsmcaching -dsm -dsmtask -optimize -mainclass ${MAINCLASS} -DSMFLAGS= -dsm -dsmtask -sandbox -transstats -optimize -mainclass ${MAINCLASS} +FLAGS=-recoverystats -recovery -transstats -dsmcaching -32bit -dsm -dsmtask -optimize -mainclass ${MAINCLASS} +DSMFLAGS=-dsm -dsmtask -transstats -dsmcaching -debug -optimize -mainclass ${MAINCLASS} default: # ../../../../buildscript ${DSMFLAGS} -o ${MAINCLASS}DSM GlobalQuery.java LocalQuery.java QueryTask.java Spider.java Task.java GlobalQueue.java Worker.java TaskSet.java +# ../../../../buildscript ${FLAGS} -o ${MAINCLASS} GlobalQuery.java LocalQuery.java QueryTask.java Spider.java Task.java GlobalQueue.java Worker.java TaskSet.java DistributedHashMap.java ../../../../buildscript ${FLAGS} -o ${MAINCLASS} GlobalQuery.java LocalQuery.java QueryTask.java Spider.java Task.java GlobalQueue.java Worker.java TaskSet.java clean: