java sources
authorbdemsky <bdemsky>
Wed, 14 Mar 2007 00:34:15 +0000 (00:34 +0000)
committerbdemsky <bdemsky>
Wed, 14 Mar 2007 00:34:15 +0000 (00:34 +0000)
Robust/src/Benchmarks/Spider/Java/Query.java [new file with mode: 0644]
Robust/src/Benchmarks/Spider/Java/QueryList.java [new file with mode: 0644]
Robust/src/Benchmarks/Spider/Java/QueryQueue.java [new file with mode: 0644]
Robust/src/Benchmarks/Spider/Java/QueryThread.java [new file with mode: 0644]
Robust/src/Benchmarks/Spider/Java/Spider.java [new file with mode: 0644]

diff --git a/Robust/src/Benchmarks/Spider/Java/Query.java b/Robust/src/Benchmarks/Spider/Java/Query.java
new file mode 100644 (file)
index 0000000..cb4a727
--- /dev/null
@@ -0,0 +1,63 @@
+public class Query {
+    private String hostname;
+    private String path;
+
+    private StringBuffer response;
+
+    public Query(String hostname, String path) {
+       this.hostname=hostname;
+       this.path=path;
+       response=new StringBuffer();
+    }
+
+    public String getHostName() {
+       return hostname;
+    }
+
+    public String getPath() {
+       return path;
+    }
+    
+    public String makewebcanonical(String page) {
+       StringBuffer b=new StringBuffer(getHostName(page));
+       b.append("/");
+       b.append(getPathName(page));
+       return b.toString();
+    }
+
+    public String getHostName(String page) {
+       String http=new String("http://");
+       if (page.indexOf(http)==-1) {
+           return getHostName();
+       } else {
+           int beginindex=page.indexOf(http)+http.length();
+           int endindex=page.indexOf('/',beginindex+1);
+           if ((beginindex==-1)) {
+               System.printString("ERROR");
+           }
+           if (endindex==-1)
+               endindex=page.length();
+           return page.subString(beginindex, endindex);
+       }
+    }
+
+    public String getPathName(String page) {
+       String http=new String("http://");
+       if (page.indexOf(http)==-1) {
+           String path=getPath();
+           int lastindex=path.lastindexOf('/');
+           if (lastindex==-1)
+               return page;
+           
+           StringBuffer sb=new StringBuffer(path.subString(0,lastindex+1));
+           sb.append(page);
+           return sb.toString();
+       } else {
+           int beginindex=page.indexOf(http)+http.length();
+           int nextindex=page.indexOf('/',beginindex+1);
+           if ((beginindex==-1)||(nextindex==-1))
+               return new String("index.html");
+           return page.subString(nextindex+1, page.length()-1);
+       }
+    }
+}
diff --git a/Robust/src/Benchmarks/Spider/Java/QueryList.java b/Robust/src/Benchmarks/Spider/Java/QueryList.java
new file mode 100644 (file)
index 0000000..00d66e3
--- /dev/null
@@ -0,0 +1,13 @@
+public class QueryList {
+    HashSet queries;
+
+    public QueryList() {
+       queries=new HashSet();
+    }
+    public boolean checkQuery(String x) {
+       return queries.contains(x);
+    }
+    public void addQuery(String x) {
+       queries.add(x);
+    }
+}
diff --git a/Robust/src/Benchmarks/Spider/Java/QueryQueue.java b/Robust/src/Benchmarks/Spider/Java/QueryQueue.java
new file mode 100644 (file)
index 0000000..f379e1d
--- /dev/null
@@ -0,0 +1,17 @@
+public class QueryQueue {
+    HashSet queries;
+
+    public QueryQueue() {
+       queries=new HashSet();
+    }
+    public synchronized Query getQuery() {
+       if (queries.isEmpty())
+           return null;
+       Query q=(Query) queries.iterator().next();
+       queries.remove(q);
+       return q;
+    }
+    public synchronized void addQuery(Query x) {
+       queries.add(x);
+    }
+}
diff --git a/Robust/src/Benchmarks/Spider/Java/QueryThread.java b/Robust/src/Benchmarks/Spider/Java/QueryThread.java
new file mode 100644 (file)
index 0000000..c334937
--- /dev/null
@@ -0,0 +1,128 @@
+public class QueryThread extends Thread {
+    QueryQueue toprocess;
+    QueryList ql;
+    public QueryThread(QueryQueue qq, QueryList ql) {
+       toprocess=qq;
+       this.ql=ql;
+    }
+
+    public void run() {
+       while(true) {
+           Query q=null;
+           while(q==null) {
+               q=toprocess.getQuery();
+               if (q==null)
+                   Thread.sleep(2);
+           }
+           String hostname=q.getHostName();
+           Socket s=new Socket(hostname, 80);
+           requestQuery(q, s);
+           readResponse(q, s);
+           processPage(q, ql);
+           s.close();
+       }
+    }
+
+    void requestQuery(Query q, Socket sock) {
+       StringBuffer req=new StringBuffer("GET "); 
+       req.append("/");
+       req.append(q.getPath());
+       req.append(" HTTP/1.1\r\nHost:");
+       req.append(q.getHostName());
+       req.append("\r\n\r\n");
+       sock.write(req.toString().getBytes());
+    }
+
+    void readResponse(Query q, Socket sock) {
+       //    state 0 - nothing
+       //    state 1 - \r
+       //    state 2 - \r\n
+       //    state 3 - \r\n\r
+       //    state 4 - \r\n\r\n
+       int state=0;
+       while(true) {
+           if (state<4) {
+               if (state==0) {
+                   byte[] b=new byte[1];
+                   int numchars=sock.read(b);
+                   if ((numchars==1)) {
+                       if (b[0]=='\r') {
+                           state++;
+                           System.printString(new String(b));
+                       }
+                   } else
+                       return;
+               } else if (state==1) {
+                   byte[] b=new byte[1];
+                   int numchars=sock.read(b);
+                   if (numchars==1) {
+                       if (b[0]=='\n')
+                           state++;
+                       else
+                           state=0;
+                       System.printString(new String(b));
+                   } else return;
+               } else if (state==2) {
+                   byte[] b=new byte[1];
+                   int numchars=sock.read(b);
+                   if (numchars==1) {
+                       if (b[0]=='\r')
+                           state++;
+                       else
+                           state=0;
+                       System.printString(new String(b));
+                   } else return;
+               } else if (state==3) {
+                   byte[] b=new byte[1];
+                   int numchars=sock.read(b);
+                   if (numchars==1) {
+                       if (b[0]=='\n')
+                           state++;
+                       else
+                           state=0;
+                       System.printString(new String(b));
+                   } else return;
+               }
+           } else {
+               byte[] buffer=new byte[1024];
+               int numchars=sock.read(buffer);
+               if (numchars==0)
+                   return;
+               else {
+                   String curr=(new String(buffer)).subString(0,numchars);
+                   System.printString(curr);
+                   q.response.append(curr);
+               }
+           }
+       }
+    }
+
+    void processPage(Query q, QueryList ql) {
+       int index=0;
+       String href=new String("href=\"");
+       String searchstr=q.response.toString();
+       boolean cont=true;
+       while(cont) {
+           int mindex=searchstr.indexOf(href,index);
+           if (mindex!=-1) {
+               
+               int endquote=searchstr.indexOf('"', mindex+href.length());
+               if (endquote!=-1) {
+                   String match=searchstr.subString(mindex+href.length(), endquote);
+                   String match2=q.makewebcanonical(match);
+                   if (match2!=null&&!ql.checkQuery(match2)) {
+                       ql.addQuery(match2);
+                       System.printString(q.getHostName(match));
+                       System.printString("        ");
+                       System.printString(q.getPathName(match));
+                       System.printString("\n");
+                       Query newq=new Query(q.getHostName(match), q.getPathName(match));
+                       toprocess.addQuery(newq);
+                   }
+                   index=endquote;
+               } else cont=false;
+           } else cont=false;
+       }
+    }
+
+}
diff --git a/Robust/src/Benchmarks/Spider/Java/Spider.java b/Robust/src/Benchmarks/Spider/Java/Spider.java
new file mode 100644 (file)
index 0000000..e07b404
--- /dev/null
@@ -0,0 +1,24 @@
+public class Spider {
+
+
+
+    public static void main(String[] parameters) {
+       String firstmachine=parameters[0];
+       String firstpage=parameters[1];
+       QueryList ql=new QueryList();
+       QueryQueue toprocess=new QueryQueue();
+       Query firstquery=new Query(firstmachine, firstpage);
+       toprocess.addQuery(firstquery);
+       QueryThread qt1=new QueryThread(toprocess, ql);
+       qt1.run();
+       //      qt1.start();
+       //QueryThread qt2=new QueryThread(toprocess, ql);
+       //qt2.start();
+       //QueryThread qt3=new QueryThread(toprocess, ql);
+       //qt3.start();
+       //while(true)
+       //    Thread.sleep(1000000);
+    }
+
+
+}