From: hkhang Date: Thu, 17 Sep 2009 23:04:22 +0000 (+0000) Subject: new dsm web crawler benchmark X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=2dce9cc9eb8c6b1097f2b1959b5b0fdb19d8539c;p=IRC.git new dsm web crawler benchmark --- diff --git a/Robust/src/Benchmarks/Spider/dsm/Query.java b/Robust/src/Benchmarks/Spider/dsm/Query.java new file mode 100644 index 00000000..6cf78ad0 --- /dev/null +++ b/Robust/src/Benchmarks/Spider/dsm/Query.java @@ -0,0 +1,72 @@ +public class Query { + GlobalString hostname; + GlobalString path; + GlobalStringBuffer response; + + public Query(GlobalString hostname, GlobalString path) { + this.hostname = global new GlobalString(hostname); + this.path = global new GlobalString(path); + response = global new GlobalStringBuffer(); + } + + public GlobalString getHostName() { + return hostname; + } + + public GlobalString getPath() { + return path; + } + + public void outputFile() { + StringBuffer sb = new StringBuffer(hostname.toLocalString()); + sb.append(path.toLocalString()); + FileOutputStream fos = new FileOutputStream(sb.toString().replace('/','#')); + fos.write(response.toLocalString().getBytes()); + fos.close(); + } + + + public GlobalString makewebcanonical(GlobalString page) { + GlobalStringBuffer b = global new GlobalStringBuffer(getHostName(page)); + b.append("/"); + b.append(getPathName(page)); + return b.toGlobalString(); + } + + public GlobalString getHostName(GlobalString page) { + GlobalString http = global new GlobalString("http://"); + if (page.indexOf(http) == -1) { + return getHostName(); + } else { + int beginindex = page.indexOf(http) + http.length(); + int endindex = page.indexOf('/',beginindex+1); + if ((beginindex == -1)) { + System.printString("ERROR"); + } + if (endindex == -1) + endindex = page.length(); + return page.subString(beginindex, endindex); + } + } + + + public GlobalString getPathName(GlobalString page) { + GlobalString http = global new GlobalString("http://"); + if (page.indexOf(http) == -1) { + GlobalString path = getPath(); + int lastindex = path.lastindexOf('/'); + if (lastindex == -1) + return page; + + GlobalStringBuffer sb = global new GlobalStringBuffer(path.subString(0,lastindex+1)); + sb.append(page); + return sb.toGlobalString(); + } else { + int beginindex = page.indexOf(http)+http.length(); + int nextindex = page.indexOf('/',beginindex+1); + if ((beginindex == -1) || (nextindex == -1)) + return global new GlobalString("index.html"); + return page.subString(nextindex+1, page.length()); + } + } +} diff --git a/Robust/src/Benchmarks/Spider/dsm/QueryList.java b/Robust/src/Benchmarks/Spider/dsm/QueryList.java new file mode 100644 index 00000000..fa4a9fff --- /dev/null +++ b/Robust/src/Benchmarks/Spider/dsm/QueryList.java @@ -0,0 +1,16 @@ +public class QueryList extends Queue { + public QueryList() { + Queue(); // ?? + } + + public boolean checkQuery(GlobalString x) { + boolean set = false;; + for (int i = 0 ; i < size; i++) { + if (x.equals((GlobalString)elements[i])) { + set = true; + break; + } + } + return set; + } +} diff --git a/Robust/src/Benchmarks/Spider/dsm/QueryThread.java b/Robust/src/Benchmarks/Spider/dsm/QueryThread.java new file mode 100644 index 00000000..d9dc3690 --- /dev/null +++ b/Robust/src/Benchmarks/Spider/dsm/QueryThread.java @@ -0,0 +1,169 @@ +public class QueryThread extends Task { + int maxDepth; + int depthCnt; + int maxSearchDepth; + int searchDepthCnt; + + public QueryThread(Queue qq, Queue ql, int depth, int searchDepth) { + this.todoList = qq; + this.doneList = ql; + this.maxDepth = depth; + this.maxSearchDepth = searchDepth; + depthCnt = 1; + searchDepthCnt = 0; + } + + public void execute(Object mywork) { + Query q = (Query)mywork; + GlobalString ghostname; + GlobalString gpath; + + atomic { + ghostname = q.getHostName(); + gpath = q.getPath(); + } + + String hostname = new String(GlobalString.toLocalCharArray(ghostname)); + String path = new String(GlobalString.toLocalCharArray(gpath)); + + System.printString("Processing "); + System.printString(hostname + "\n"); + System.printString(" "); + System.printString(path); + System.printString("\n"); + + Socket s = new Socket(hostname, 80); + + requestQuery(hostname, path, s); +// System.printString("Wait for 5 secs\n"); +// Thread.sleep(2000000); + + readResponse(q, s); +// System.printString("Wait for 5 secs\n"); +// Thread.sleep(2000000); + + q.outputFile(); +// System.printString("Wait for 5 secs\n"); +// Thread.sleep(2000000); + + processPage(q, (QueryList)doneList); + s.close(); + } + + public void requestQuery(String hostname, String path, Socket sock) { + StringBuffer req = new StringBuffer("GET "); + req.append("/"); + req.append(path); + req.append(" HTTP/1.1\r\nHost:"); + req.append(hostname); + req.append("\r\n\r\n"); + System.printString("req : " + req + "\n"); + sock.write(req.toString().getBytes()); + } + + public void readResponse(Query q, Socket sock) { + // state 0 - nothing + // state 1 - \r + // state 2 - \r\n + // state 3 - \r\n\r + // state 4 - \r\n\r\n + int state=0; + while(true) { + if (state<4) { + if (state==0) { + byte[] b=new byte[1]; + int numchars=sock.read(b); + if ((numchars==1)) { + if (b[0]=='\r') { + state++; + } + } else + return; + } else if (state==1) { + byte[] b=new byte[1]; + int numchars=sock.read(b); + if (numchars==1) { + if (b[0]=='\n') + state++; + else + state=0; + } else return; + } else if (state==2) { + byte[] b=new byte[1]; + int numchars=sock.read(b); + if (numchars==1) { + if (b[0]=='\r') + state++; + else + state=0; + } else return; + } else if (state==3) { + byte[] b=new byte[1]; + int numchars=sock.read(b); + if (numchars==1) { + if (b[0]=='\n') + state++; + else + state=0; + } else return; + } + } else { + byte[] buffer=new byte[1024]; + int numchars=sock.read(buffer); + if (numchars==0) + return; + else { + String curr=(new String(buffer)).subString(0,numchars); + q.response.append(curr); + } + } + } + } + + public void done(Object obj) { + doneList.push(obj); +// System.printString("Size of todoList : " + todoList.size() + "\n"); +// Thread.sleep(5000000); + } + + public void processPage(Query q, QueryList doneList) { + int index = 0; + String href = new String("href=\""); + String searchstr = q.response.toLocalString(); + boolean cont = true; + + while(cont && (searchDepthCnt < maxSearchDepth)) { + int mindex = searchstr.indexOf(href,index); + if (mindex != -1) { + int endquote = searchstr.indexOf('"', mindex+href.length()); + if (endquote != -1) { + String match = searchstr.subString(mindex+href.length(), endquote); + GlobalString gmatch; + GlobalString gmatch2; + + atomic { + gmatch = global new GlobalString(match); + gmatch2 = q.makewebcanonical(gmatch); + } + if (gmatch2 != null && !doneList.checkQuery(gmatch2)) { +// doneList.push(gmatch2); + done(gmatch2); + if (depthCnt < maxDepth) { + Query newq; + System.printString("Depth : " + depthCnt + "\n"); + atomic { + newq = global new Query(q.getHostName(gmatch), q.getPathName(gmatch)); + todoList.push(newq); + System.printString("Size of todoList : " + todoList.size() + "\n"); + searchDepthCnt++; + } + } + } + index = endquote; + } else cont = false; + } else cont = false; + } + depthCnt++; + searchDepthCnt = 0; + } +} diff --git a/Robust/src/Benchmarks/Spider/dsm/Spider.java b/Robust/src/Benchmarks/Spider/dsm/Spider.java new file mode 100644 index 00000000..66504dc7 --- /dev/null +++ b/Robust/src/Benchmarks/Spider/dsm/Spider.java @@ -0,0 +1,93 @@ +public class Spider { + public static int[] getMID (int num_threads) { + int[] mid = new int[num_threads]; + + FileInputStream ifs = new FileInputStream("dstm.conf"); + String str; + String sub; + int fromIndex = 0; + int endIndex = 0; + int[] tmp = new int[4]; + + for (int i = 0; i < num_threads; i++) { + str = ifs.readLine(); + endIndex = str.indexOf('.', fromIndex); + sub = str.subString(fromIndex, endIndex); + + fromIndex = endIndex + 1; + endIndex = str.indexOf('.', fromIndex); + sub = str.subString(fromIndex, endIndex); + + fromIndex = endIndex + 1; + endIndex = str.indexOf('.', fromIndex); + sub = str.subString(fromIndex, endIndex); + + fromIndex = endIndex + 1; + sub = str.subString(fromIndex); + + fromIndex = 0; + } + return mid; + } + + public static void main(String[] args) { + int NUM_THREADS = 3; + int depth = 5; + int searchDepth = 5; + int i, j; + Work[] works; + QueryThread[] qt; + Query[] currentWorkList; + + NUM_THREADS = Integer.parseInt(args[0]); + GlobalString firstmachine; + GlobalString firstpage; + + int[] mid = getMID(NUM_THREADS); + +/* int mid[] = new int[NUM_THREADS]; + mid[0] = (128<<24)|(195<<16)|(136<<8)|166; //dc-4 + mid[1] = (128<<24)|(195<<16)|(136<<8)|167; //dc-5 + mid[2] = (128<<24)|(195<<16)|(136<<8)|168; //dc-6 +*/ + atomic { + firstmachine = global new GlobalString(args[1]); + firstpage = global new GlobalString(args[2]); + + works = global new Work[NUM_THREADS]; + qt = global new QueryThread[NUM_THREADS]; + currentWorkList = global new Query[NUM_THREADS]; + + Query firstquery = global new Query(firstmachine, firstpage); + + Queue todoList = global new Queue(); + todoList.push(firstquery); + QueryList doneList = global new QueryList(); + + for (i = 0; i < NUM_THREADS; i++) { + qt[i] = global new QueryThread(todoList, doneList, depth, searchDepth); + works[i] = global new Work(qt[i], NUM_THREADS, i, currentWorkList); + } + } + System.printString("Finished to create Objects\n"); + + Work tmp; + for (i = 0; i < NUM_THREADS; i++) { + atomic { + tmp = works[i]; + } + tmp.start(mid[i]); + } + + for (i = 0; i < NUM_THREADS; i++) { + atomic { + tmp = works[i]; + } + tmp.join(); + } + +// while(true) +// Thread.sleep(1000000); + + } +} diff --git a/Robust/src/Benchmarks/Spider/dsm/dstm.conf b/Robust/src/Benchmarks/Spider/dsm/dstm.conf new file mode 100644 index 00000000..6b3f3e96 --- /dev/null +++ b/Robust/src/Benchmarks/Spider/dsm/dstm.conf @@ -0,0 +1,3 @@ +128.195.136.166 +128.195.136.167 +128.195.136.168 diff --git a/Robust/src/Benchmarks/Spider/dsm/makefile b/Robust/src/Benchmarks/Spider/dsm/makefile new file mode 100644 index 00000000..0e561faf --- /dev/null +++ b/Robust/src/Benchmarks/Spider/dsm/makefile @@ -0,0 +1,13 @@ +MAINCLASS=Spider +SUBCLASS=Query +SRC1=${MAINCLASS}.java +SRC2=${SUBCLASS}.java +SRC3=${SUBCLASS}List.java +SRC4=${SUBCLASS}Thread.java +FLAGS= -dsm -32bit -nooptimize -debug -recovery -mainclass ${MAINCLASS} +default: + ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC4} ${SRC1} + +clean: + rm -rf tmpbuilddirectory + rm *.bin