From 25050ed28bb8182ae13892bdea9226c43c37f947 Mon Sep 17 00:00:00 2001 From: bdemsky Date: Tue, 13 Mar 2007 02:48:18 +0000 Subject: [PATCH] changes --- Robust/src/Benchmarks/Spider/BR/Query.java | 77 ++++++++++++- .../src/Benchmarks/Spider/BR/QueryList.java | 14 +++ Robust/src/Benchmarks/Spider/BR/Spider.java | 105 ++++++++++++++++++ 3 files changed, 191 insertions(+), 5 deletions(-) create mode 100644 Robust/src/Benchmarks/Spider/BR/QueryList.java create mode 100644 Robust/src/Benchmarks/Spider/BR/Spider.java diff --git a/Robust/src/Benchmarks/Spider/BR/Query.java b/Robust/src/Benchmarks/Spider/BR/Query.java index 627069a9..1eba8f81 100644 --- a/Robust/src/Benchmarks/Spider/BR/Query.java +++ b/Robust/src/Benchmarks/Spider/BR/Query.java @@ -1,11 +1,78 @@ -public class Query { +public class Query extends Socket { + flag requested; flag processed; + flag received; + public int state; - String webpage; - - public Query(String web) { - webpage=web; + private String hostname; + private String path; + + private Socket connection; + private StringBuffer response; + + public Query(String hostname, String path) { + this.hostname=hostname; + this.path=path; + response=new StringBuffer(); + state=0; + } + + public void makeConnection() { + InetAddress address=InetAddress.getByName(hostname); + int port=80; + fd=nativeBind(address.getAddress(), port); + nativeConnect(fd, address.getAddress(), port); + } + + public void setSocket(Socket s) { + connection=s; + } + + public String getHostName() { + return hostname; + } + + public String getPath() { + return path; } + public String makewebcanonical(String page) { + StringBuffer b=new StringBuffer(getHostName(page)); + b.append("/"); + b.append(getPathName(page)); + return b.toString(); + } + public String getHostName(String page) { + String http=new String("http://"); + if (page.indexOf(http)==-1) { + return getHostName(); + } else { + int beginindex=page.indexOf(http)+http.length(); + int endindex=page.indexOf('/',beginindex+1); + if ((beginindex==-1)||(endindex==-1)) + System.printString("ERROR"); + return page.subString(beginindex, endindex); + } + } + + public String getPathName(String page) { + String http=new String("http://"); + if (page.indexOf(http)==-1) { + String path=getPath(); + int lastindex=path.lastindexOf('/'); + if (lastindex==-1) + return page; + + StringBuffer sb=new StringBuffer(path.subString(0,lastindex+1)); + sb.append(page); + return sb.toString(); + } else { + int beginindex=page.indexOf(http)+http.length(); + int nextindex=page.indexOf('/',beginindex+1); + if ((beginindex==-1)||(nextindex==-1)) + System.printString("ERROR"); + return page.subString(nextindex+1, page.length()-1); + } + } } diff --git a/Robust/src/Benchmarks/Spider/BR/QueryList.java b/Robust/src/Benchmarks/Spider/BR/QueryList.java new file mode 100644 index 00000000..90dc9dfe --- /dev/null +++ b/Robust/src/Benchmarks/Spider/BR/QueryList.java @@ -0,0 +1,14 @@ +public class QueryList { + flag initialized; + HashSet queries; + + public QueryList() { + queries=new HashSet(); + } + public boolean checkQuery(String x) { + return queries.contains(x); + } + public void addQuery(String x) { + queries.add(x); + } +} diff --git a/Robust/src/Benchmarks/Spider/BR/Spider.java b/Robust/src/Benchmarks/Spider/BR/Spider.java new file mode 100644 index 00000000..280f198b --- /dev/null +++ b/Robust/src/Benchmarks/Spider/BR/Spider.java @@ -0,0 +1,105 @@ +task Startup(StartupObject s {initialstate}) { + String firstmachine=s.parameters[0]; + String firstpage=s.parameters[1]; + QueryList ql=new QueryList() {initialized}; + Query firstquery=new Query(firstmachine, firstpage){}; + taskexit(s{!initialstate}); +} + +task requestQuery(Query q{!requested}) { + String hostname=q.getHostName(); + q.makeConnection(); + StringBuffer req=new StringBuffer("GET "); + req.append("/"); + req.append(q.getPath()); + req.append(" HTTP/1.1\r\nHost:"); + req.append(q.getHostName()); + req.append("\r\n\r\n"); + q.write(req.toString().getBytes()); + taskexit(q{requested}); +} + +task readResponse(Query q{requested && ! received && IOPending}) { + // state 0 - nothing + // state 1 - \r + // state 2 - \r\n + // state 3 - \r\n\r + // state 4 - \r\n\r\n + if (q.state<4) { + if (q.state==0) { + byte[] b=new byte[1]; + int numchars=q.read(b); + if ((numchars==1) && (b[0]=='\r')) + q.state++; + System.printString(new String(b)); + } else if (q.state==1) { + byte[] b=new byte[1]; + int numchars=q.read(b); + if (numchars==1) { + if (b[0]=='\n') + q.state++; + else + q.state=0; + System.printString(new String(b)); + } + } else if (q.state==2) { + byte[] b=new byte[1]; + int numchars=q.read(b); + if (numchars==1) { + if (b[0]=='\r') + q.state++; + else + q.state=0; + System.printString(new String(b)); + } + } else if (q.state==3) { + byte[] b=new byte[1]; + int numchars=q.read(b); + if (numchars==1) { + if (b[0]=='\n') + q.state++; + else + q.state=0; + System.printString(new String(b)); + } + } + } else { + byte[] buffer=new byte[1024]; + int numchars=q.read(buffer); + if (numchars==0) + taskexit(q{received}); + else { + String curr=(new String(buffer)).subString(0,numchars); + System.printString(curr); + q.response.append(curr); + } + } +} + +task processPage(Query q{received&&!processed}, QueryList ql{initialized}) { + int index=0; + String href=new String("href=\""); + String searchstr=q.response.toString(); + boolean cont=true; + while(cont) { + int mindex=searchstr.indexOf(href,index); + if (mindex!=-1) { + + int endquote=searchstr.indexOf('"', mindex+href.length()); + if (endquote!=-1) { + String match=searchstr.subString(mindex+href.length(), endquote); + String match2=q.makewebcanonical(match); + if (match2!=null&&!ql.checkQuery(match2)) { + ql.addQuery(match2); + System.printString(q.getHostName(match)); + System.printString(" "); + System.printString(q.getPathName(match)); + System.printString("\n"); + Query newq=new Query(q.getHostName(match), q.getPathName(match)){}; + } + index=endquote; + } else cont=false; + } else cont=false; + } + taskexit(q{processed}); +} -- 2.34.1