--- /dev/null
+public class Query {
+ GlobalString hostname;
+ GlobalString path;
+ GlobalStringBuffer response;
+
+ public Query(GlobalString hostname, GlobalString path) {
+ this.hostname = global new GlobalString(hostname);
+ this.path = global new GlobalString(path);
+ response = global new GlobalStringBuffer();
+ }
+
+ public GlobalString getHostName() {
+ return hostname;
+ }
+
+ public GlobalString getPath() {
+ return path;
+ }
+
+ public void outputFile() {
+ StringBuffer sb = new StringBuffer(hostname.toLocalString());
+ sb.append(path.toLocalString());
+ FileOutputStream fos = new FileOutputStream(sb.toString().replace('/','#'));
+ fos.write(response.toLocalString().getBytes());
+ fos.close();
+ }
+
+
+ public GlobalString makewebcanonical(GlobalString page) {
+ GlobalStringBuffer b = global new GlobalStringBuffer(getHostName(page));
+ b.append("/");
+ b.append(getPathName(page));
+ return b.toGlobalString();
+ }
+
+ public GlobalString getHostName(GlobalString page) {
+ GlobalString http = global new GlobalString("http://");
+ if (page.indexOf(http) == -1) {
+ return getHostName();
+ } else {
+ int beginindex = page.indexOf(http) + http.length();
+ int endindex = page.indexOf('/',beginindex+1);
+ if ((beginindex == -1)) {
+ System.printString("ERROR");
+ }
+ if (endindex == -1)
+ endindex = page.length();
+ return page.subString(beginindex, endindex);
+ }
+ }
+
+
+ public GlobalString getPathName(GlobalString page) {
+ GlobalString http = global new GlobalString("http://");
+ if (page.indexOf(http) == -1) {
+ GlobalString path = getPath();
+ int lastindex = path.lastindexOf('/');
+ if (lastindex == -1)
+ return page;
+
+ GlobalStringBuffer sb = global new GlobalStringBuffer(path.subString(0,lastindex+1));
+ sb.append(page);
+ return sb.toGlobalString();
+ } else {
+ int beginindex = page.indexOf(http)+http.length();
+ int nextindex = page.indexOf('/',beginindex+1);
+ if ((beginindex == -1) || (nextindex == -1))
+ return global new GlobalString("index.html");
+ return page.subString(nextindex+1, page.length());
+ }
+ }
+}
--- /dev/null
+public class QueryList extends Queue {
+ public QueryList() {
+ Queue(); // ??
+ }
+
+ public boolean checkQuery(GlobalString x) {
+ boolean set = false;;
+ for (int i = 0 ; i < size; i++) {
+ if (x.equals((GlobalString)elements[i])) {
+ set = true;
+ break;
+ }
+ }
+ return set;
+ }
+}
--- /dev/null
+public class QueryThread extends Task {
+ int maxDepth;
+ int depthCnt;
+ int maxSearchDepth;
+ int searchDepthCnt;
+
+ public QueryThread(Queue qq, Queue ql, int depth, int searchDepth) {
+ this.todoList = qq;
+ this.doneList = ql;
+ this.maxDepth = depth;
+ this.maxSearchDepth = searchDepth;
+ depthCnt = 1;
+ searchDepthCnt = 0;
+ }
+
+ public void execute(Object mywork) {
+ Query q = (Query)mywork;
+ GlobalString ghostname;
+ GlobalString gpath;
+
+ atomic {
+ ghostname = q.getHostName();
+ gpath = q.getPath();
+ }
+
+ String hostname = new String(GlobalString.toLocalCharArray(ghostname));
+ String path = new String(GlobalString.toLocalCharArray(gpath));
+
+ System.printString("Processing ");
+ System.printString(hostname + "\n");
+ System.printString(" ");
+ System.printString(path);
+ System.printString("\n");
+
+ Socket s = new Socket(hostname, 80);
+
+ requestQuery(hostname, path, s);
+// System.printString("Wait for 5 secs\n");
+// Thread.sleep(2000000);
+
+ readResponse(q, s);
+// System.printString("Wait for 5 secs\n");
+// Thread.sleep(2000000);
+
+ q.outputFile();
+// System.printString("Wait for 5 secs\n");
+// Thread.sleep(2000000);
+
+ processPage(q, (QueryList)doneList);
+ s.close();
+ }
+
+ public void requestQuery(String hostname, String path, Socket sock) {
+ StringBuffer req = new StringBuffer("GET ");
+ req.append("/");
+ req.append(path);
+ req.append(" HTTP/1.1\r\nHost:");
+ req.append(hostname);
+ req.append("\r\n\r\n");
+ System.printString("req : " + req + "\n");
+ sock.write(req.toString().getBytes());
+ }
+
+ public void readResponse(Query q, Socket sock) {
+ // state 0 - nothing
+ // state 1 - \r
+ // state 2 - \r\n
+ // state 3 - \r\n\r
+ // state 4 - \r\n\r\n
+ int state=0;
+ while(true) {
+ if (state<4) {
+ if (state==0) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if ((numchars==1)) {
+ if (b[0]=='\r') {
+ state++;
+ }
+ } else
+ return;
+ } else if (state==1) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if (numchars==1) {
+ if (b[0]=='\n')
+ state++;
+ else
+ state=0;
+ } else return;
+ } else if (state==2) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if (numchars==1) {
+ if (b[0]=='\r')
+ state++;
+ else
+ state=0;
+ } else return;
+ } else if (state==3) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if (numchars==1) {
+ if (b[0]=='\n')
+ state++;
+ else
+ state=0;
+ } else return;
+ }
+ } else {
+ byte[] buffer=new byte[1024];
+ int numchars=sock.read(buffer);
+ if (numchars==0)
+ return;
+ else {
+ String curr=(new String(buffer)).subString(0,numchars);
+ q.response.append(curr);
+ }
+ }
+ }
+ }
+
+ public void done(Object obj) {
+ doneList.push(obj);
+// System.printString("Size of todoList : " + todoList.size() + "\n");
+// Thread.sleep(5000000);
+ }
+
+ public void processPage(Query q, QueryList doneList) {
+ int index = 0;
+ String href = new String("href=\"");
+ String searchstr = q.response.toLocalString();
+ boolean cont = true;
+
+ while(cont && (searchDepthCnt < maxSearchDepth)) {
+ int mindex = searchstr.indexOf(href,index);
+ if (mindex != -1) {
+ int endquote = searchstr.indexOf('"', mindex+href.length());
+ if (endquote != -1) {
+ String match = searchstr.subString(mindex+href.length(), endquote);
+ GlobalString gmatch;
+ GlobalString gmatch2;
+
+ atomic {
+ gmatch = global new GlobalString(match);
+ gmatch2 = q.makewebcanonical(gmatch);
+ }
+ if (gmatch2 != null && !doneList.checkQuery(gmatch2)) {
+// doneList.push(gmatch2);
+ done(gmatch2);
+ if (depthCnt < maxDepth) {
+ Query newq;
+ System.printString("Depth : " + depthCnt + "\n");
+ atomic {
+ newq = global new Query(q.getHostName(gmatch), q.getPathName(gmatch));
+ todoList.push(newq);
+ System.printString("Size of todoList : " + todoList.size() + "\n");
+ searchDepthCnt++;
+ }
+ }
+ }
+ index = endquote;
+ } else cont = false;
+ } else cont = false;
+ }
+ depthCnt++;
+ searchDepthCnt = 0;
+ }
+}
--- /dev/null
+public class Spider {
+ public static int[] getMID (int num_threads) {
+ int[] mid = new int[num_threads];
+
+ FileInputStream ifs = new FileInputStream("dstm.conf");
+ String str;
+ String sub;
+ int fromIndex = 0;
+ int endIndex = 0;
+ int[] tmp = new int[4];
+
+ for (int i = 0; i < num_threads; i++) {
+ str = ifs.readLine();
+ endIndex = str.indexOf('.', fromIndex);
+ sub = str.subString(fromIndex, endIndex);
+
+ fromIndex = endIndex + 1;
+ endIndex = str.indexOf('.', fromIndex);
+ sub = str.subString(fromIndex, endIndex);
+
+ fromIndex = endIndex + 1;
+ endIndex = str.indexOf('.', fromIndex);
+ sub = str.subString(fromIndex, endIndex);
+
+ fromIndex = endIndex + 1;
+ sub = str.subString(fromIndex);
+
+ fromIndex = 0;
+ }
+ return mid;
+ }
+
+ public static void main(String[] args) {
+ int NUM_THREADS = 3;
+ int depth = 5;
+ int searchDepth = 5;
+ int i, j;
+ Work[] works;
+ QueryThread[] qt;
+ Query[] currentWorkList;
+
+ NUM_THREADS = Integer.parseInt(args[0]);
+ GlobalString firstmachine;
+ GlobalString firstpage;
+
+ int[] mid = getMID(NUM_THREADS);
+
+/* int mid[] = new int[NUM_THREADS];
+ mid[0] = (128<<24)|(195<<16)|(136<<8)|166; //dc-4
+ mid[1] = (128<<24)|(195<<16)|(136<<8)|167; //dc-5
+ mid[2] = (128<<24)|(195<<16)|(136<<8)|168; //dc-6
+*/
+ atomic {
+ firstmachine = global new GlobalString(args[1]);
+ firstpage = global new GlobalString(args[2]);
+
+ works = global new Work[NUM_THREADS];
+ qt = global new QueryThread[NUM_THREADS];
+ currentWorkList = global new Query[NUM_THREADS];
+
+ Query firstquery = global new Query(firstmachine, firstpage);
+
+ Queue todoList = global new Queue();
+ todoList.push(firstquery);
+ QueryList doneList = global new QueryList();
+
+ for (i = 0; i < NUM_THREADS; i++) {
+ qt[i] = global new QueryThread(todoList, doneList, depth, searchDepth);
+ works[i] = global new Work(qt[i], NUM_THREADS, i, currentWorkList);
+ }
+ }
+ System.printString("Finished to create Objects\n");
+
+ Work tmp;
+ for (i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = works[i];
+ }
+ tmp.start(mid[i]);
+ }
+
+ for (i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = works[i];
+ }
+ tmp.join();
+ }
+
+// while(true)
+// Thread.sleep(1000000);
+
+ }
+}
--- /dev/null
+128.195.136.166
+128.195.136.167
+128.195.136.168
--- /dev/null
+MAINCLASS=Spider
+SUBCLASS=Query
+SRC1=${MAINCLASS}.java
+SRC2=${SUBCLASS}.java
+SRC3=${SUBCLASS}List.java
+SRC4=${SUBCLASS}Thread.java
+FLAGS= -dsm -32bit -nooptimize -debug -recovery -mainclass ${MAINCLASS}
+default:
+ ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC4} ${SRC1}
+
+clean:
+ rm -rf tmpbuilddirectory
+ rm *.bin