new dsm web crawler benchmark
authorhkhang <hkhang>
Thu, 17 Sep 2009 23:04:22 +0000 (23:04 +0000)
committerhkhang <hkhang>
Thu, 17 Sep 2009 23:04:22 +0000 (23:04 +0000)
Robust/src/Benchmarks/Spider/dsm/Query.java [new file with mode: 0644]
Robust/src/Benchmarks/Spider/dsm/QueryList.java [new file with mode: 0644]
Robust/src/Benchmarks/Spider/dsm/QueryThread.java [new file with mode: 0644]
Robust/src/Benchmarks/Spider/dsm/Spider.java [new file with mode: 0644]
Robust/src/Benchmarks/Spider/dsm/dstm.conf [new file with mode: 0644]
Robust/src/Benchmarks/Spider/dsm/makefile [new file with mode: 0644]

diff --git a/Robust/src/Benchmarks/Spider/dsm/Query.java b/Robust/src/Benchmarks/Spider/dsm/Query.java
new file mode 100644 (file)
index 0000000..6cf78ad
--- /dev/null
@@ -0,0 +1,72 @@
+public class Query {
+  GlobalString hostname;
+  GlobalString path;
+  GlobalStringBuffer response;
+  
+  public Query(GlobalString hostname, GlobalString path) {
+    this.hostname = global new GlobalString(hostname);
+    this.path = global new GlobalString(path);
+    response = global new GlobalStringBuffer();
+  }
+
+  public GlobalString getHostName() {
+    return hostname;
+  }
+  public GlobalString getPath() {
+    return path;
+  }
+   
+  public void outputFile() {
+               StringBuffer sb = new StringBuffer(hostname.toLocalString());
+               sb.append(path.toLocalString());
+    FileOutputStream fos = new FileOutputStream(sb.toString().replace('/','#'));
+    fos.write(response.toLocalString().getBytes());
+    fos.close();
+  }
+       
+
+  public GlobalString makewebcanonical(GlobalString page) {
+    GlobalStringBuffer b = global new GlobalStringBuffer(getHostName(page));
+    b.append("/");
+               b.append(getPathName(page));
+    return b.toGlobalString();
+  }
+
+  public GlobalString getHostName(GlobalString page) {
+    GlobalString http = global new GlobalString("http://");
+    if (page.indexOf(http) == -1) {
+      return getHostName();
+    } else {
+      int beginindex = page.indexOf(http) + http.length();
+           int endindex = page.indexOf('/',beginindex+1);
+           if ((beginindex == -1)) {
+        System.printString("ERROR");
+           }
+           if (endindex == -1)
+        endindex = page.length();
+      return page.subString(beginindex, endindex);
+    }
+  }
+
+  
+       public GlobalString getPathName(GlobalString page) {
+    GlobalString http = global new GlobalString("http://");
+    if (page.indexOf(http) == -1) {
+      GlobalString path = getPath();
+           int lastindex = path.lastindexOf('/');
+           if (lastindex == -1)
+        return page;
+           
+      GlobalStringBuffer sb = global new GlobalStringBuffer(path.subString(0,lastindex+1));
+           sb.append(page);
+      return sb.toGlobalString();
+    } else {
+      int beginindex = page.indexOf(http)+http.length();
+           int nextindex = page.indexOf('/',beginindex+1);
+           if ((beginindex == -1) || (nextindex == -1))
+        return global new GlobalString("index.html");
+      return page.subString(nextindex+1, page.length());
+    }
+  }
+}
diff --git a/Robust/src/Benchmarks/Spider/dsm/QueryList.java b/Robust/src/Benchmarks/Spider/dsm/QueryList.java
new file mode 100644 (file)
index 0000000..fa4a9ff
--- /dev/null
@@ -0,0 +1,16 @@
+public class QueryList extends Queue {
+  public QueryList() {
+               Queue();                        // ??
+  }
+
+  public boolean checkQuery(GlobalString x) {
+               boolean set = false;;
+               for (int i = 0 ; i < size; i++) {
+                       if (x.equals((GlobalString)elements[i])) {
+                               set = true;
+                               break;
+                       }
+               }
+               return set;
+  }
+}
diff --git a/Robust/src/Benchmarks/Spider/dsm/QueryThread.java b/Robust/src/Benchmarks/Spider/dsm/QueryThread.java
new file mode 100644 (file)
index 0000000..d9dc369
--- /dev/null
@@ -0,0 +1,169 @@
+public class QueryThread extends Task {
+       int maxDepth;
+       int depthCnt;
+       int maxSearchDepth;
+       int searchDepthCnt;
+
+  public QueryThread(Queue qq, Queue ql, int depth, int searchDepth) {
+    this.todoList = qq;
+               this.doneList = ql;
+               this.maxDepth = depth;
+               this.maxSearchDepth = searchDepth;
+               depthCnt = 1;
+               searchDepthCnt = 0;
+  }
+
+  public void execute(Object mywork) {
+               Query q = (Query)mywork;
+               GlobalString ghostname;
+               GlobalString gpath;
+
+               atomic {
+                       ghostname = q.getHostName();
+                       gpath = q.getPath();
+               }
+
+               String hostname = new String(GlobalString.toLocalCharArray(ghostname));
+               String path = new String(GlobalString.toLocalCharArray(gpath));
+
+               System.printString("Processing ");
+               System.printString(hostname + "\n");
+               System.printString(" ");
+               System.printString(path);
+               System.printString("\n");
+
+               Socket s = new Socket(hostname, 80);
+
+               requestQuery(hostname, path, s);
+//             System.printString("Wait for 5 secs\n");
+//             Thread.sleep(2000000);
+
+               readResponse(q, s);
+//             System.printString("Wait for 5 secs\n");
+//             Thread.sleep(2000000);
+
+               q.outputFile();
+//             System.printString("Wait for 5 secs\n");
+//             Thread.sleep(2000000);
+
+               processPage(q, (QueryList)doneList);
+               s.close();
+  }
+       
+       public void requestQuery(String hostname, String path, Socket sock) {
+    StringBuffer req = new StringBuffer("GET "); 
+    req.append("/");
+               req.append(path);
+    req.append(" HTTP/1.1\r\nHost:");
+    req.append(hostname);
+    req.append("\r\n\r\n");
+               System.printString("req : " + req + "\n");
+    sock.write(req.toString().getBytes());
+  }
+
+       public void readResponse(Query q, Socket sock) {
+       //    state 0 - nothing
+       //    state 1 - \r
+       //    state 2 - \r\n
+       //    state 3 - \r\n\r
+       //    state 4 - \r\n\r\n
+    int state=0;
+    while(true) {
+      if (state<4) {
+        if (state==0) {
+          byte[] b=new byte[1];
+          int numchars=sock.read(b);
+          if ((numchars==1)) {
+            if (b[0]=='\r') {
+              state++;
+            }
+          } else
+                                               return;
+        } else if (state==1) {
+          byte[] b=new byte[1];
+          int numchars=sock.read(b);
+          if (numchars==1) {
+            if (b[0]=='\n')
+              state++;
+            else
+              state=0;
+          } else return;
+        } else if (state==2) {
+          byte[] b=new byte[1];
+          int numchars=sock.read(b);
+          if (numchars==1) {
+            if (b[0]=='\r')
+              state++;
+            else
+              state=0;
+          } else return;
+        } else if (state==3) {
+          byte[] b=new byte[1];
+          int numchars=sock.read(b);
+          if (numchars==1) {
+            if (b[0]=='\n')
+              state++;
+            else
+              state=0;
+          } else return;
+        }
+      } else {
+                               byte[] buffer=new byte[1024];
+        int numchars=sock.read(buffer);
+        if (numchars==0)
+          return;
+        else {
+          String curr=(new String(buffer)).subString(0,numchars);
+                                       q.response.append(curr);
+        }
+      }
+    }
+  }
+       
+       public void done(Object obj) {
+               doneList.push(obj);
+//             System.printString("Size of todoList : " + todoList.size() + "\n");
+//             Thread.sleep(5000000);
+       }
+
+  public void processPage(Query q, QueryList doneList) {
+    int index = 0;
+       String href = new String("href=\"");
+       String searchstr = q.response.toLocalString();
+       boolean cont = true;
+
+               while(cont && (searchDepthCnt < maxSearchDepth)) {
+                       int mindex = searchstr.indexOf(href,index);
+                       if (mindex != -1) {     
+                               int endquote = searchstr.indexOf('"', mindex+href.length());
+               if (endquote != -1) {
+                     String match = searchstr.subString(mindex+href.length(), endquote);
+                                       GlobalString gmatch;
+                                       GlobalString gmatch2;
+
+                                       atomic {
+                                               gmatch = global new GlobalString(match);
+                                               gmatch2 = q.makewebcanonical(gmatch);
+                                       }
+                     if (gmatch2 != null && !doneList.checkQuery(gmatch2)) {
+//                                             doneList.push(gmatch2);
+                                               done(gmatch2);
+                                               if (depthCnt < maxDepth) {
+                                                       Query newq;
+                                                       System.printString("Depth : " + depthCnt + "\n");
+                                                       atomic {
+                                                               newq = global new Query(q.getHostName(gmatch), q.getPathName(gmatch));
+                                                               todoList.push(newq);
+                                                               System.printString("Size of todoList : " + todoList.size() + "\n");
+                                                               searchDepthCnt++;
+                                                       }
+                                               }
+                                       }
+                     index = endquote;
+        } else cont = false;
+      } else cont = false;
+    }
+               depthCnt++;
+               searchDepthCnt = 0;
+  }
+}
diff --git a/Robust/src/Benchmarks/Spider/dsm/Spider.java b/Robust/src/Benchmarks/Spider/dsm/Spider.java
new file mode 100644 (file)
index 0000000..66504dc
--- /dev/null
@@ -0,0 +1,93 @@
+public class Spider {
+       public static int[] getMID (int num_threads) {
+               int[] mid = new int[num_threads];
+
+               FileInputStream ifs = new FileInputStream("dstm.conf");
+               String str;
+               String sub;
+               int fromIndex = 0;
+               int endIndex = 0;
+               int[] tmp = new int[4];
+
+               for (int i = 0; i < num_threads; i++) { 
+                       str = ifs.readLine();
+                       endIndex = str.indexOf('.', fromIndex);
+                       sub = str.subString(fromIndex, endIndex);
+
+                       fromIndex = endIndex + 1;
+                       endIndex = str.indexOf('.', fromIndex);
+                       sub = str.subString(fromIndex, endIndex);
+
+                       fromIndex = endIndex + 1;
+                       endIndex = str.indexOf('.', fromIndex);
+                       sub = str.subString(fromIndex, endIndex);
+
+                       fromIndex = endIndex + 1;
+                       sub = str.subString(fromIndex);
+
+                       fromIndex = 0;
+               }
+               return mid;
+       }
+
+       public static void main(String[] args) {
+               int NUM_THREADS = 3;
+               int depth = 5;
+               int searchDepth = 5;
+               int i, j;
+               Work[] works;
+               QueryThread[] qt;
+               Query[] currentWorkList;
+
+               NUM_THREADS = Integer.parseInt(args[0]);
+               GlobalString firstmachine;
+               GlobalString firstpage;
+
+               int[] mid = getMID(NUM_THREADS);
+
+/*             int mid[] = new int[NUM_THREADS];
+               mid[0] = (128<<24)|(195<<16)|(136<<8)|166;       //dc-4
+               mid[1] = (128<<24)|(195<<16)|(136<<8)|167;       //dc-5
+               mid[2] = (128<<24)|(195<<16)|(136<<8)|168;       //dc-6
+*/
+               atomic {
+                       firstmachine = global new GlobalString(args[1]);
+                       firstpage = global new GlobalString(args[2]);
+
+                       works = global new Work[NUM_THREADS];
+                       qt = global new QueryThread[NUM_THREADS];
+                       currentWorkList = global new Query[NUM_THREADS];
+                       
+                       Query firstquery = global new Query(firstmachine, firstpage);
+
+                       Queue todoList = global new Queue();
+                       todoList.push(firstquery);
+                       QueryList doneList = global new QueryList();
+
+                       for (i = 0; i < NUM_THREADS; i++) {
+                               qt[i] = global new QueryThread(todoList, doneList, depth, searchDepth);
+                               works[i] = global new Work(qt[i], NUM_THREADS, i, currentWorkList);
+                       }
+               }
+               System.printString("Finished to create Objects\n");
+
+               Work tmp;
+               for (i = 0; i < NUM_THREADS; i++) {
+                       atomic {
+                               tmp = works[i];
+                       }
+                       tmp.start(mid[i]);
+               }
+
+               for (i = 0; i < NUM_THREADS; i++) {
+                       atomic {
+                               tmp = works[i];
+                       }
+                       tmp.join();
+               }
+
+//             while(true)
+//                     Thread.sleep(1000000);
+
+       }
+}
diff --git a/Robust/src/Benchmarks/Spider/dsm/dstm.conf b/Robust/src/Benchmarks/Spider/dsm/dstm.conf
new file mode 100644 (file)
index 0000000..6b3f3e9
--- /dev/null
@@ -0,0 +1,3 @@
+128.195.136.166
+128.195.136.167
+128.195.136.168
diff --git a/Robust/src/Benchmarks/Spider/dsm/makefile b/Robust/src/Benchmarks/Spider/dsm/makefile
new file mode 100644 (file)
index 0000000..0e561fa
--- /dev/null
@@ -0,0 +1,13 @@
+MAINCLASS=Spider
+SUBCLASS=Query
+SRC1=${MAINCLASS}.java
+SRC2=${SUBCLASS}.java
+SRC3=${SUBCLASS}List.java
+SRC4=${SUBCLASS}Thread.java
+FLAGS= -dsm -32bit -nooptimize -debug -recovery -mainclass ${MAINCLASS}
+default:
+       ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC4} ${SRC1}
+
+clean:
+       rm -rf tmpbuilddirectory
+       rm *.bin