public class Spider {
+ public static void main(String[] parameters) {
+ String firstmachine=parameters[0];
+ String firstpage=parameters[1];
+ QueryList ql=new QueryList();
+ QueryQueue toprocess=new QueryQueue();
+ Query firstquery=new Query(firstmachine, firstpage);
+ toprocess.addQuery(firstquery);
+ QueryThread qt1=new QueryThread(toprocess, ql);
+ qt1.start();
+ QueryThread qt2=new QueryThread(toprocess, ql);
+ qt2.start();
+ QueryThread qt3=new QueryThread(toprocess, ql);
+ qt3.start();
- public static void main(String[] parameters) {
- String firstmachine=parameters[0];
- String firstpage=parameters[1];
- QueryList ql=new QueryList();
- QueryQueue toprocess=new QueryQueue();
- Query firstquery=new Query(firstmachine, firstpage);
- toprocess.addQuery(firstquery);
- QueryThread qt1=new QueryThread(toprocess, ql);
- qt1.start();
- QueryThread qt2=new QueryThread(toprocess, ql);
- qt2.start();
- QueryThread qt3=new QueryThread(toprocess, ql);
- qt3.start();
- while(true)
- Thread.sleep(1000000);
+ while(true)
+ Thread.sleep(1000000);
}
-
-
}
-public class QueryThread extends Task {
+public class QueryThread extends Thread {
int maxDepth;
int maxSearchDepth;
+ int MY_MID;
+ int NUM_THREADS;
+ Queue todoList;
+ Queue doneList;
+ Query myWork;
+ Query[] currentWorkList;
- public QueryThread(Queue todoList, Queue doneList, int maxDepth, int maxSearchDepth) {
+ public QueryThread(Queue todoList, Queue doneList, int maxDepth, int maxSearchDepth,int mid,int NUM_THREADS,Query[] currentWorkList) {
this.todoList = todoList;
this.doneList = doneList;
this.maxDepth = maxDepth;
this.maxSearchDepth = maxSearchDepth;
+ this.currentWorkList = currentWorkList;
+ this.MY_MID = mid;
+ this.NUM_THREADS = NUM_THREADS;
}
- public void execute() {
+ public void run()
+ {
+ int workMID;
+
+ atomic {
+ workMID = MY_MID;
+ }
+
+ System.out.println("Thread " + workMID + " has started");
+
+ int chk;
+
+ while(true) {
+ atomic {
+ myWork = (Query)todoList.pop();
+
+ if(null == myWork) // no work in todolist
+ {
+ chk = checkCurrentWorkList(this);
+ }
+ else {
+ currentWorkList[workMID] = myWork;
+ chk = 1;
+ }
+ }
+
+ if(chk == 1) { // it has query
+ execute(this);
+
+ atomic {
+ doneWork(myWork);
+ currentWorkList[workMID] = null;
+ }
+ }
+ else if(chk == -1) { // finished all work
+ break;
+ }
+ else { // wait for other thread
+ sleep(5000000);
+ }
+
+ }
+
+ atomic {
+ System.out.println("\n\nDoneSize = " + doneList.size());
+ }
+
+ System.out.println("\n\n\n I'm done");
+ }
+
+ public static int checkCurrentWorkList(QueryThread qt) {
+ int i;
+ int myID;
+ int num_threads;
+ boolean chk = false;
+ Object s;
+
+ atomic {
+ myID = qt.MY_MID;
+ num_threads = qt.NUM_THREADS;
+
+ for(i = 0 ; (i < num_threads); i++) {
+ if(myID == i) {
+ continue;
+ }
+
+ s = qt.currentWorkList[i];
+
+ if(null != s) {
+ chk = true;
+ break;
+ }
+ }
+
+ }
+
+ if(chk == false) // wait for other machine's work
+ return -1;
+ else
+ return 0; // others are still working wait until they finish work
+ }
+
+ public static void execute(QueryThread qt) {
int depth;
int max;
int maxSearch;
-
- atomic {
- depth = ((Query)myWork).getDepth();
- max = this.maxDepth;
- maxSearch = this.maxSearchDepth;
+
+ atomic {
+ if(qt.myWork == null) {
+ System.out.println("What!!!!!!!!!!!!!!!");
+ System.exit(0);
+ }
+ depth = ((Query)qt.myWork).getDepth();
+ max = qt.maxDepth;
+ maxSearch = qt.maxSearchDepth;
}
if (depth < max) {
String path;
atomic {
- q = (Query)myWork;
+ q = (Query)(qt.myWork);
ghostname = q.getHostName();
gpath = q.getPath();
hostname = new String(GlobalString.toLocalCharArray(ghostname));
gpath = global new GlobalString(lq.getPath());
q = global new Query(ghostname, gpath, lq.getDepth());
- todoList.push(q);
+ qt.todoList.push(q);
}
}
}
}
}
- public void done(Object obj) {
+ public void doneWork(Object obj) {
doneList.push(obj);
}
int maxDepth = 5;
int searchDepth = 10;
int i, j;
- Work[] works;
QueryThread[] qt;
Query[] currentWorkList;
mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dc-1
mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dc-2
mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dc-3
- mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc-3
+ mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc-4
+// mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dc-5
+// mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dc-6
atomic {
firstmachine = global new GlobalString(args[1]);
firstpage = global new GlobalString(args[2]);
- works = global new Work[NUM_THREADS];
qt = global new QueryThread[NUM_THREADS];
currentWorkList = global new Query[NUM_THREADS];
todoList.push(firstquery);
for (i = 0; i < NUM_THREADS; i++) {
- qt[i] = global new QueryThread(todoList, doneList, maxDepth, searchDepth);
- works[i] = global new Work(qt[i], NUM_THREADS, i, currentWorkList);
+ qt[i] = global new QueryThread(todoList, doneList, maxDepth, searchDepth,i,NUM_THREADS,currentWorkList);
}
}
System.printString("Finished to create Objects\n");
- Work tmp;
+ QueryThread tmp;
for (i = 0; i < NUM_THREADS; i++) {
atomic {
- tmp = works[i];
+ tmp = qt[i];
}
- Thread.myStart(tmp, mid[i]);
+ tmp.start(mid[i]);
}
for (i = 0; i < NUM_THREADS; i++) {
atomic {
- tmp = works[i];
+ tmp = qt[i];
}
tmp.join();
}
SRC2=${SUBCLASS}.java
SRC3=${SUBCLASS}Queue.java
SRC4=${SUBCLASS}Thread.java
-FLAGS= -dsm -32bit -nooptimize -debug -recovery -mainclass ${MAINCLASS}
+FLAGS= -dsm -32bit -nooptimize -debug -mainclass ${MAINCLASS}
default:
../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC4} ${SRC1}
--- /dev/null
+public class LocalQuery {
+ String hostname;
+ String path;
+ StringBuffer response;
+ int depth;
+
+ public LocalQuery(String hostname, String path, int depth) {
+ this.hostname = new String(hostname);
+ this.path = new String(path);
+ response = new StringBuffer();
+ this.depth = depth;
+ }
+
+ public int getDepth() {
+ return depth;
+ }
+
+ public String getHostName() {
+ return hostname;
+ }
+
+ public String getPath() {
+ return path;
+ }
+
+ public void outputFile() {
+ StringBuffer sb = new StringBuffer(hostname);
+ sb.append(path);
+ FileOutputStream fos = new FileOutputStream(sb.toString().replace('/','#'));
+ fos.write(response.toString().getBytes());
+ fos.close();
+ }
+
+ public String makewebcanonical(String page) {
+ StringBuffer b = new StringBuffer(getHostName(page));
+ b.append("/");
+ b.append(getPathName(page));
+ return b.toString();
+ }
+
+ public String getHostName(String page) {
+ String http = new String("http://");
+ if (page.indexOf(http) == -1) {
+ return getHostName();
+ } else {
+ int beginindex = page.indexOf(http) + http.length();
+ int endindex = page.indexOf('/',beginindex+1);
+ if ((beginindex == -1)) {
+ System.printString("ERROR");
+ }
+ if (endindex == -1)
+ endindex=page.length();
+ return page.subString(beginindex, endindex);
+ }
+ }
+
+ public String getPathName(String page) {
+ String http = new String("http://");
+ if (page.indexOf(http) == -1) {
+ String path = getPath();
+ int lastindex = path.lastindexOf('/');
+ if (lastindex == -1)
+ return page;
+
+ StringBuffer sb = new StringBuffer(path.subString(0,lastindex+1));
+ sb.append(page);
+ return sb.toString();
+ } else {
+ int beginindex = page.indexOf(http) + http.length();
+ int nextindex = page.indexOf('/',beginindex+1);
+ if ((beginindex==-1) || (nextindex==-1))
+ return new String("index.html");
+ return page.subString(nextindex+1, page.length());
+ }
+ }
+}
--- /dev/null
+public class Query {
+ GlobalString hostname;
+ GlobalString path;
+ int depth;
+
+ public Query(GlobalString hostname, GlobalString path, int depth) {
+ this.hostname = global new GlobalString(hostname);
+ this.path = global new GlobalString(path);
+ this.depth = depth;
+ }
+
+ public int getDepth() {
+ return depth;
+ }
+
+ public GlobalString getHostName() {
+ return hostname;
+ }
+
+ public GlobalString getPath() {
+ return path;
+ }
+
+ public GlobalString getHostName(GlobalString page) {
+ GlobalString http = global new GlobalString("http://");
+ if (page.indexOf(http) == -1) {
+ return getHostName();
+ } else {
+ int beginindex = page.indexOf(http) + http.length();
+ int endindex = page.indexOf('/',beginindex+1);
+ if ((beginindex == -1)) {
+ System.printString("ERROR");
+ }
+ if (endindex == -1)
+ endindex = page.length();
+ return page.subString(beginindex, endindex);
+ }
+ }
+
+
+ public GlobalString getPathName(GlobalString page) {
+ GlobalString http = global new GlobalString("http://");
+ if (page.indexOf(http) == -1) {
+ GlobalString path = getPath();
+ int lastindex = path.lastindexOf('/');
+ if (lastindex == -1)
+ return page;
+
+ GlobalStringBuffer sb = global new GlobalStringBuffer(path.subString(0,lastindex+1));
+ sb.append(page);
+ return sb.toGlobalString();
+ } else {
+ int beginindex = page.indexOf(http)+http.length();
+ int nextindex = page.indexOf('/',beginindex+1);
+ if ((beginindex == -1) || (nextindex == -1))
+ return global new GlobalString("index.html");
+ return page.subString(nextindex+1, page.length());
+ }
+ }
+}
--- /dev/null
+public class QueryList extends Queue {
+ Queue queries;
+
+ public QueryList() {
+ queries = global new Queue();
+ }
+
+ public boolean checkQuery(GlobalString x) {
+ boolean set = false;;
+ for (int i = 0 ; i < size; i++) {
+ if (x.equals((GlobalString)elements[i])) {
+ set = true;
+ break;
+ }
+ }
+ return set;
+ }
+
+ public void addQuery(GlobalString x) {
+ queries.push(x);
+ }
+}
--- /dev/null
+public class QueryQueue {
+ HashSet queries;
+ int size;
+
+ public QueryQueue() {
+ queries = new HashSet();
+ size = 0;
+ }
+
+ public LocalQuery pop() {
+ if (queries.isEmpty())
+ return null;
+ LocalQuery q = (LocalQuery) queries.iterator().next();
+ queries.remove(q);
+ size--;
+ return q;
+ }
+
+ public void push(LocalQuery x) {
+ queries.add(x);
+ size++;
+ }
+
+ public int size() {
+ return size;
+ }
+
+ public boolean isEmpty() {
+ if (size == 0)
+ return true;
+ else
+ return false;
+ }
+}
--- /dev/null
+public class QueryThread extends Task {
+ int maxDepth;
+ int maxSearchDepth;
+
+ public QueryThread(Queue todoList, Queue doneList, int maxDepth, int maxSearchDepth) {
+ this.todoList = todoList;
+ this.doneList = doneList;
+ this.maxDepth = maxDepth;
+ this.maxSearchDepth = maxSearchDepth;
+ }
+
+ public void execute() {
+ int depth;
+ int max;
+ int maxSearch;
+
+ atomic {
+ depth = ((Query)myWork).getDepth();
+ max = this.maxDepth;
+ maxSearch = this.maxSearchDepth;
+ }
+
+ if (depth < max) {
+ /* global variables */
+ Query q;
+ GlobalString ghostname;
+ GlobalString gpath;
+
+ /* local variables */
+ QueryQueue toprocess;
+ LocalQuery lq;
+ String hostname;
+ String path;
+
+ atomic {
+ q = (Query)myWork;
+ ghostname = q.getHostName();
+ gpath = q.getPath();
+ hostname = new String(GlobalString.toLocalCharArray(ghostname));
+ path = new String(GlobalString.toLocalCharArray(gpath));
+ }
+ lq = new LocalQuery(hostname, path, depth);
+
+ System.printString("Processing - Hostname : ");
+ System.printString(hostname);
+ System.printString(", Path : ");
+ System.printString(path);
+ System.printString("\n");
+
+ Socket s = new Socket(hostname, 80);
+
+ requestQuery(hostname, path, s);
+ readResponse(lq, s);
+ toprocess = processPage(lq,maxSearch);
+ s.close();
+
+ atomic {
+ while(!toprocess.isEmpty()) {
+ lq = toprocess.pop();
+ ghostname = global new GlobalString(lq.getHostName());
+ gpath = global new GlobalString(lq.getPath());
+
+ q = global new Query(ghostname, gpath, lq.getDepth());
+ todoList.push(q);
+ }
+ }
+ }
+ }
+
+ public static void requestQuery(String hostname, String path, Socket sock) {
+ StringBuffer req = new StringBuffer("GET ");
+ req.append("/");
+ req.append(path);
+ req.append(" HTTP/1.1\r\nHost:");
+ req.append(hostname);
+ req.append("\r\n\r\n");
+ sock.write(req.toString().getBytes());
+ }
+
+ public static void readResponse(LocalQuery lq, Socket sock) {
+ // state 0 - nothing
+ // state 1 - \r
+ // state 2 - \r\n
+ // state 3 - \r\n\r
+ // state 4 - \r\n\r\n
+ int state=0;
+ while(true) {
+ if (state<4) {
+ if (state==0) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if ((numchars==1)) {
+ if (b[0]=='\r') {
+ state++;
+ }
+ } else
+ return;
+ } else if (state==1) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if (numchars==1) {
+ if (b[0]=='\n')
+ state++;
+ else
+ state=0;
+ } else return;
+ } else if (state==2) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if (numchars==1) {
+ if (b[0]=='\r')
+ state++;
+ else
+ state=0;
+ } else return;
+ } else if (state==3) {
+ byte[] b=new byte[1];
+ int numchars=sock.read(b);
+ if (numchars==1) {
+ if (b[0]=='\n')
+ state++;
+ else
+ state=0;
+ } else return;
+ }
+ } else {
+ byte[] buffer=new byte[1024];
+ int numchars=sock.read(buffer);
+ if (numchars==0)
+ return;
+ else {
+ String curr=(new String(buffer)).subString(0,numchars);
+ lq.response.append(curr);
+ }
+ }
+ }
+ }
+
+ public void done(Object obj) {
+ doneList.push(obj);
+ }
+
+ public static QueryQueue processPage(LocalQuery lq,int maxSearchDepth) {
+ int index = 0;
+ String href = new String("href=\"");
+ String searchstr = lq.response.toString();
+ int depth;
+ boolean cont = true;
+
+ QueryQueue toprocess = new QueryQueue();
+ depth = lq.getDepth() + 1;
+
+ int searchDepthCnt = 0;
+ while(cont && (searchDepthCnt < maxSearchDepth)) {
+ int mindex = searchstr.indexOf(href,index);
+ if (mindex != -1) {
+ int endquote = searchstr.indexOf('"', mindex+href.length());
+ if (endquote != -1) {
+ String match = searchstr.subString(mindex+href.length(), endquote);
+ String match2 = lq.makewebcanonical(match);
+
+ if (match2 != null) {
+ LocalQuery newlq = new LocalQuery(lq.getHostName(match), lq.getPathName(match), depth);
+
+ toprocess.push(newlq);
+ searchDepthCnt++;
+ }
+ index = endquote;
+ } else cont = false;
+ } else cont = false;
+ }
+
+ return toprocess;
+ }
+}
--- /dev/null
+public class Spider {
+ public static void main(String[] args) {
+ int NUM_THREADS = 4;
+ int maxDepth = 5;
+ int searchDepth = 10;
+ int i, j;
+ Work[] works;
+ QueryThread[] qt;
+ Query[] currentWorkList;
+
+ NUM_THREADS = Integer.parseInt(args[0]);
+ GlobalString firstmachine;
+ GlobalString firstpage;
+
+// int[] mid = getMID(NUM_THREADS);
+ int mid[] = new int[NUM_THREADS];
+/* mid[0] = (128<<24)|(195<<16)|(180<<8)|21; //dc-4
+ mid[1] = (128<<24)|(195<<16)|(180<<8)|24; //dc-5
+ mid[2] = (128<<24)|(195<<16)|(180<<8)|26; //dc-6
+ */
+ mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dc-1
+ mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dc-2
+ mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dc-3
+ mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc-3
+ mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dc-3
+ mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dc-3
+
+ atomic {
+ firstmachine = global new GlobalString(args[1]);
+ firstpage = global new GlobalString(args[2]);
+
+ works = global new Work[NUM_THREADS];
+ qt = global new QueryThread[NUM_THREADS];
+ currentWorkList = global new Query[NUM_THREADS];
+
+ Query firstquery = global new Query(firstmachine, firstpage, 0);
+
+ Queue todoList = global new Queue();
+ Queue doneList = global new Queue();
+ todoList.push(firstquery);
+
+ for (i = 0; i < NUM_THREADS; i++) {
+ qt[i] = global new QueryThread(todoList, doneList, maxDepth, searchDepth);
+ works[i] = global new Work(qt[i], NUM_THREADS, i, currentWorkList);
+ }
+ }
+ System.printString("Finished to create Objects\n");
+
+ Work tmp;
+ for (i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = works[i];
+ }
+ Thread.myStart(tmp, mid[i]);
+ }
+
+ for (i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = works[i];
+ }
+ tmp.join();
+ }
+ }
+
+ public static int[] getMID (int num_threads) {
+ int[] mid = new int[num_threads];
+
+ FileInputStream ifs = new FileInputStream("dstm.conf");
+ String str;
+ String sub;
+ int fromIndex;
+ int endIndex;
+ double num;
+
+ for (int i = 0; i < num_threads; i++) {
+ int power = 3 - i;
+ fromIndex = 0;
+ num = 0;
+
+ str = ifs.readLine();
+
+ endIndex = str.indexOf('.', fromIndex);
+ sub = str.subString(fromIndex, endIndex);
+ num += (Integer.parseInt(sub) << 24);
+
+ fromIndex = endIndex + 1;
+ endIndex = str.indexOf('.', fromIndex);
+ sub = str.subString(fromIndex, endIndex);
+ num += (Integer.parseInt(sub) << 16);
+
+ fromIndex = endIndex + 1;
+ endIndex = str.indexOf('.', fromIndex);
+ sub = str.subString(fromIndex, endIndex);
+ num += (Integer.parseInt(sub) << 8);
+
+ fromIndex = endIndex + 1;
+ sub = str.subString(fromIndex);
+ num += Integer.parseInt(sub);
+
+ mid[i] = (int)num;
+ }
+ return mid;
+ }
+}
--- /dev/null
+#128.195.180.21
+#128.195.180.24
+#128.195.180.26
+128.195.136.162
+128.195.136.163
+128.195.136.164
+128.195.136.165
+128.195.136.166
+128.195.136.167
--- /dev/null
+MAINCLASS=Spider
+SUBCLASS=Query
+SRC1=${MAINCLASS}.java
+SRC2=${SUBCLASS}.java
+SRC3=${SUBCLASS}Queue.java
+SRC4=${SUBCLASS}Thread.java
+FLAGS= -dsm -32bit -nooptimize -debug -recovery -mainclass ${MAINCLASS}
+default:
+ ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC4} ${SRC1}
+
+clean:
+ rm -rf tmpbuilddirectory
+ rm *.bin
+ rm *.php
+ rm *.css
+ rm www*
+ rm eee*
+ rm web*