--- /dev/null
+public class Query {
+ private String hostname;
+ private String path;
+
+ private StringBuffer response;
+
+ public Query(String hostname, String path) {
+ this.hostname=hostname;
+ this.path=path;
+ response=new StringBuffer();
+ }
+
+ public String getHostName() {
+ return hostname;
+ }
+
+ public String getPath() {
+ return path;
+ }
+
+ public String makewebcanonical(String page) {
+ StringBuffer b=new StringBuffer(getHostName(page));
+ b.append("/");
+ b.append(getPathName(page));
+ return b.toString();
+ }
+
+ public String getHostName(String page) {
+ String http=new String("http://");
+ if (page.indexOf(http)==-1) {
+ return getHostName();
+ } else {
+ int beginindex=page.indexOf(http)+http.length();
+ int endindex=page.indexOf('/',beginindex+1);
+ if ((beginindex==-1)) {
+ System.printString("ERROR");
+ }
+ if (endindex==-1)
+ endindex=page.length();
+ return page.subString(beginindex, endindex);
+ }
+ }
+
+ public String getPathName(String page) {
+ String http=new String("http://");
+ if (page.indexOf(http)==-1) {
+ String path=getPath();
+ int lastindex=path.lastindexOf('/');
+ if (lastindex==-1)
+ return page;
+
+ StringBuffer sb=new StringBuffer(path.subString(0,lastindex+1));
+ sb.append(page);
+ return sb.toString();
+ } else {
+ int beginindex=page.indexOf(http)+http.length();
+ int nextindex=page.indexOf('/',beginindex+1);
+ if ((beginindex==-1)||(nextindex==-1))
+ return new String("index.html");
+ return page.subString(nextindex+1, page.length()-1);
+ }
+ }
+}
--- /dev/null
+public class QueryList {
+ HashSet queries;
+
+ public QueryList() {
+ queries=new HashSet();
+ }
+ public boolean checkQuery(String x) {
+ return queries.contains(x);
+ }
+ public void addQuery(String x) {
+ queries.add(x);
+ }
+}
--- /dev/null
+public class QueryQueue {
+ HashSet queries;
+
+ public QueryQueue() {
+ queries=new HashSet();
+ }
+ public synchronized Query getQuery() {
+ if (queries.isEmpty())
+ return null;
+ Query q=(Query) queries.iterator().next();
+ queries.remove(q);
+ return q;
+ }
+ public synchronized void addQuery(Query x) {
+ queries.add(x);
+ }
+}
--- /dev/null
+public class QueryThread extends Thread {
+ QueryQueue toprocess;
+ QueryList ql;
+ public QueryThread(QueryQueue qq, QueryList ql) {
+ toprocess=qq;
+ this.ql=ql;
+ }
+
+ public void run() {
+ while(true) {
+ Query q=null;
+ while(q==null) {
+ q=toprocess.getQuery();
+ if (q==null)
+ Thread.sleep(2);
+ }
+ String hostname=q.getHostName();
+ Socket s=new Socket(hostname, 80);
+ requestQuery(q, s);
+ readResponse(q, s);
+ processPage(q, ql);
+ s.close();
+ }
+ }
+
+ void requestQuery(Query q, Socket sock) {
+ StringBuffer req=new StringBuffer("GET ");
+ req.append("/");
+ req.append(q.getPath());
+ req.append(" HTTP/1.1\r\nHost:");
+ req.append(q.getHostName());
+ req.append("\r\n\r\n");
+ sock.write(req.toString().getBytes());
+ }
+
+ void readResponse(Query q, Socket sock) {
+ // state 0 - nothing
+ // state 1 - \r
+ // state 2 - \r\n
+ // state 3 - \r\n\r
+ // state 4 - \r\n\r\n
+ int state=0;
+ while(true) {
+ if (state<4) {
+ if (state==0) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if ((numchars==1)) {
+ if (b[0]=='\r') {
+ state++;
+ System.printString(new String(b));
+ }
+ } else
+ return;
+ } else if (state==1) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if (numchars==1) {
+ if (b[0]=='\n')
+ state++;
+ else
+ state=0;
+ System.printString(new String(b));
+ } else return;
+ } else if (state==2) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if (numchars==1) {
+ if (b[0]=='\r')
+ state++;
+ else
+ state=0;
+ System.printString(new String(b));
+ } else return;
+ } else if (state==3) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if (numchars==1) {
+ if (b[0]=='\n')
+ state++;
+ else
+ state=0;
+ System.printString(new String(b));
+ } else return;
+ }
+ } else {
+ byte[] buffer=new byte[1024];
+ int numchars=sock.read(buffer);
+ if (numchars==0)
+ return;
+ else {
+ String curr=(new String(buffer)).subString(0,numchars);
+ System.printString(curr);
+ q.response.append(curr);
+ }
+ }
+ }
+ }
+
+ void processPage(Query q, QueryList ql) {
+ int index=0;
+ String href=new String("href=\"");
+ String searchstr=q.response.toString();
+ boolean cont=true;
+ while(cont) {
+ int mindex=searchstr.indexOf(href,index);
+ if (mindex!=-1) {
+
+ int endquote=searchstr.indexOf('"', mindex+href.length());
+ if (endquote!=-1) {
+ String match=searchstr.subString(mindex+href.length(), endquote);
+ String match2=q.makewebcanonical(match);
+ if (match2!=null&&!ql.checkQuery(match2)) {
+ ql.addQuery(match2);
+ System.printString(q.getHostName(match));
+ System.printString(" ");
+ System.printString(q.getPathName(match));
+ System.printString("\n");
+ Query newq=new Query(q.getHostName(match), q.getPathName(match));
+ toprocess.addQuery(newq);
+ }
+ index=endquote;
+ } else cont=false;
+ } else cont=false;
+ }
+ }
+
+}
--- /dev/null
+public class Spider {
+
+
+
+ public static void main(String[] parameters) {
+ String firstmachine=parameters[0];
+ String firstpage=parameters[1];
+ QueryList ql=new QueryList();
+ QueryQueue toprocess=new QueryQueue();
+ Query firstquery=new Query(firstmachine, firstpage);
+ toprocess.addQuery(firstquery);
+ QueryThread qt1=new QueryThread(toprocess, ql);
+ qt1.run();
+ // qt1.start();
+ //QueryThread qt2=new QueryThread(toprocess, ql);
+ //qt2.start();
+ //QueryThread qt3=new QueryThread(toprocess, ql);
+ //qt3.start();
+ //while(true)
+ // Thread.sleep(1000000);
+ }
+
+
+}