public class Query {
GlobalString hostname;
GlobalString path;
- GlobalStringBuffer response;
+ int depth;
- public Query(GlobalString hostname, GlobalString path) {
+ public Query(GlobalString hostname, GlobalString path, int depth) {
this.hostname = global new GlobalString(hostname);
this.path = global new GlobalString(path);
- response = global new GlobalStringBuffer();
+ this.depth = depth;
}
+ public int getDepth() {
+ return depth;
+ }
+
public GlobalString getHostName() {
return hostname;
}
public GlobalString getPath() {
return path;
}
-
- public void outputFile() {
- StringBuffer sb = new StringBuffer(hostname.toLocalString());
- sb.append(path.toLocalString());
- FileOutputStream fos = new FileOutputStream(sb.toString().replace('/','#'));
- fos.write(response.toLocalString().getBytes());
- fos.close();
- }
-
-
- public GlobalString makewebcanonical(GlobalString page) {
- GlobalStringBuffer b = global new GlobalStringBuffer(getHostName(page));
- b.append("/");
- b.append(getPathName(page));
- return b.toGlobalString();
- }
public GlobalString getHostName(GlobalString page) {
GlobalString http = global new GlobalString("http://");
public class QueryList extends Queue {
+ Queue queries;
+
public QueryList() {
- Queue(); // ??
+ queries = global new Queue();
}
public boolean checkQuery(GlobalString x) {
}
return set;
}
+
+ public void addQuery(GlobalString x) {
+ queries.push(x);
+ }
}
public class QueryThread extends Task {
int maxDepth;
- int depthCnt;
int maxSearchDepth;
- int searchDepthCnt;
- public QueryThread(Queue qq, Queue ql, int depth, int searchDepth) {
- this.todoList = qq;
- this.doneList = ql;
- this.maxDepth = depth;
- this.maxSearchDepth = searchDepth;
- depthCnt = 1;
- searchDepthCnt = 0;
+ public QueryThread(Queue todoList, Queue doneList, int maxDepth, int maxSearchDepth) {
+ this.todoList = todoList;
+ this.doneList = doneList;
+ this.maxDepth = maxDepth;
+ this.maxSearchDepth = maxSearchDepth;
}
- public void execute(Object mywork) {
- Query q = (Query)mywork;
- GlobalString ghostname;
- GlobalString gpath;
-
+ public void execute() {
+ int depth;
+ int max;
+ int maxSearch;
+
atomic {
- ghostname = q.getHostName();
- gpath = q.getPath();
+ depth = ((Query)myWork).getDepth();
+ max = this.maxDepth;
+ maxSearch = this.maxSearchDepth;
}
- String hostname = new String(GlobalString.toLocalCharArray(ghostname));
- String path = new String(GlobalString.toLocalCharArray(gpath));
+ if (depth < max) {
+ /* global variables */
+ Query q;
+ GlobalString ghostname;
+ GlobalString gpath;
- System.printString("Processing ");
- System.printString(hostname + "\n");
- System.printString(" ");
- System.printString(path);
- System.printString("\n");
+ /* local variables */
+ QueryQueue toprocess;
+ LocalQuery lq;
+ String hostname;
+ String path;
- Socket s = new Socket(hostname, 80);
+ atomic {
+ q = (Query)myWork;
+ ghostname = q.getHostName();
+ gpath = q.getPath();
+ hostname = new String(GlobalString.toLocalCharArray(ghostname));
+ path = new String(GlobalString.toLocalCharArray(gpath));
+ }
+ lq = new LocalQuery(hostname, path, depth);
- requestQuery(hostname, path, s);
-// System.printString("Wait for 5 secs\n");
-// Thread.sleep(2000000);
+ System.printString("Processing - Hostname : ");
+ System.printString(hostname);
+ System.printString(", Path : ");
+ System.printString(path);
+ System.printString("\n");
- readResponse(q, s);
-// System.printString("Wait for 5 secs\n");
-// Thread.sleep(2000000);
+ Socket s = new Socket(hostname, 80);
+
+ requestQuery(hostname, path, s);
+ readResponse(lq, s);
+ toprocess = processPage(lq,maxSearch);
+ s.close();
- q.outputFile();
-// System.printString("Wait for 5 secs\n");
-// Thread.sleep(2000000);
+ atomic {
+ while(!toprocess.isEmpty()) {
+ lq = toprocess.pop();
+ ghostname = global new GlobalString(lq.getHostName());
+ gpath = global new GlobalString(lq.getPath());
- processPage(q, (QueryList)doneList);
- s.close();
+ q = global new Query(ghostname, gpath, lq.getDepth());
+ todoList.push(q);
+ }
+ }
+ }
}
- public void requestQuery(String hostname, String path, Socket sock) {
+ public static void requestQuery(String hostname, String path, Socket sock) {
StringBuffer req = new StringBuffer("GET ");
req.append("/");
req.append(path);
req.append(" HTTP/1.1\r\nHost:");
req.append(hostname);
req.append("\r\n\r\n");
- System.printString("req : " + req + "\n");
sock.write(req.toString().getBytes());
}
- public void readResponse(Query q, Socket sock) {
+ public static void readResponse(LocalQuery lq, Socket sock) {
// state 0 - nothing
// state 1 - \r
// state 2 - \r\n
return;
else {
String curr=(new String(buffer)).subString(0,numchars);
- q.response.append(curr);
+ lq.response.append(curr);
}
}
}
public void done(Object obj) {
doneList.push(obj);
-// System.printString("Size of todoList : " + todoList.size() + "\n");
-// Thread.sleep(5000000);
}
- public void processPage(Query q, QueryList doneList) {
+ public static QueryQueue processPage(LocalQuery lq,int maxSearchDepth) {
int index = 0;
String href = new String("href=\"");
- String searchstr = q.response.toLocalString();
+ String searchstr = lq.response.toString();
+ int depth;
boolean cont = true;
+ QueryQueue toprocess = new QueryQueue();
+ depth = lq.getDepth() + 1;
+
+ int searchDepthCnt = 0;
while(cont && (searchDepthCnt < maxSearchDepth)) {
int mindex = searchstr.indexOf(href,index);
if (mindex != -1) {
int endquote = searchstr.indexOf('"', mindex+href.length());
if (endquote != -1) {
String match = searchstr.subString(mindex+href.length(), endquote);
- GlobalString gmatch;
- GlobalString gmatch2;
+ String match2 = lq.makewebcanonical(match);
+
+ if (match2 != null) {
+ LocalQuery newlq = new LocalQuery(lq.getHostName(match), lq.getPathName(match), depth);
- atomic {
- gmatch = global new GlobalString(match);
- gmatch2 = q.makewebcanonical(gmatch);
- }
- if (gmatch2 != null && !doneList.checkQuery(gmatch2)) {
-// doneList.push(gmatch2);
- done(gmatch2);
- if (depthCnt < maxDepth) {
- Query newq;
- System.printString("Depth : " + depthCnt + "\n");
- atomic {
- newq = global new Query(q.getHostName(gmatch), q.getPathName(gmatch));
- todoList.push(newq);
- System.printString("Size of todoList : " + todoList.size() + "\n");
- searchDepthCnt++;
- }
- }
+ toprocess.push(newlq);
+ searchDepthCnt++;
}
- index = endquote;
+ index = endquote;
} else cont = false;
} else cont = false;
}
- depthCnt++;
- searchDepthCnt = 0;
+
+ return toprocess;
}
}
public class Spider {
- public static int[] getMID (int num_threads) {
- int[] mid = new int[num_threads];
-
- FileInputStream ifs = new FileInputStream("dstm.conf");
- String str;
- String sub;
- int fromIndex = 0;
- int endIndex = 0;
- int[] tmp = new int[4];
-
- for (int i = 0; i < num_threads; i++) {
- str = ifs.readLine();
- endIndex = str.indexOf('.', fromIndex);
- sub = str.subString(fromIndex, endIndex);
-
- fromIndex = endIndex + 1;
- endIndex = str.indexOf('.', fromIndex);
- sub = str.subString(fromIndex, endIndex);
-
- fromIndex = endIndex + 1;
- endIndex = str.indexOf('.', fromIndex);
- sub = str.subString(fromIndex, endIndex);
-
- fromIndex = endIndex + 1;
- sub = str.subString(fromIndex);
-
- fromIndex = 0;
- }
- return mid;
- }
-
public static void main(String[] args) {
- int NUM_THREADS = 3;
- int depth = 5;
- int searchDepth = 5;
+ int NUM_THREADS = 4;
+ int maxDepth = 5;
+ int searchDepth = 10;
int i, j;
Work[] works;
QueryThread[] qt;
GlobalString firstmachine;
GlobalString firstpage;
- int[] mid = getMID(NUM_THREADS);
+// int[] mid = getMID(NUM_THREADS);
+ int mid[] = new int[NUM_THREADS];
+/* mid[0] = (128<<24)|(195<<16)|(180<<8)|21; //dc-4
+ mid[1] = (128<<24)|(195<<16)|(180<<8)|24; //dc-5
+ mid[2] = (128<<24)|(195<<16)|(180<<8)|26; //dc-6
+ */
+ mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dc-1
+ mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dc-2
+ mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dc-3
+ mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc-3
-/* int mid[] = new int[NUM_THREADS];
- mid[0] = (128<<24)|(195<<16)|(136<<8)|166; //dc-4
- mid[1] = (128<<24)|(195<<16)|(136<<8)|167; //dc-5
- mid[2] = (128<<24)|(195<<16)|(136<<8)|168; //dc-6
-*/
atomic {
firstmachine = global new GlobalString(args[1]);
firstpage = global new GlobalString(args[2]);
qt = global new QueryThread[NUM_THREADS];
currentWorkList = global new Query[NUM_THREADS];
- Query firstquery = global new Query(firstmachine, firstpage);
+ Query firstquery = global new Query(firstmachine, firstpage, 0);
Queue todoList = global new Queue();
+ Queue doneList = global new Queue();
todoList.push(firstquery);
- QueryList doneList = global new QueryList();
for (i = 0; i < NUM_THREADS; i++) {
- qt[i] = global new QueryThread(todoList, doneList, depth, searchDepth);
+ qt[i] = global new QueryThread(todoList, doneList, maxDepth, searchDepth);
works[i] = global new Work(qt[i], NUM_THREADS, i, currentWorkList);
}
}
atomic {
tmp = works[i];
}
- tmp.start(mid[i]);
+ Thread.myStart(tmp, mid[i]);
}
for (i = 0; i < NUM_THREADS; i++) {
}
tmp.join();
}
+ }
+
+ public static int[] getMID (int num_threads) {
+ int[] mid = new int[num_threads];
+
+ FileInputStream ifs = new FileInputStream("dstm.conf");
+ String str;
+ String sub;
+ int fromIndex;
+ int endIndex;
+ double num;
+
+ for (int i = 0; i < num_threads; i++) {
+ int power = 3 - i;
+ fromIndex = 0;
+ num = 0;
+
+ str = ifs.readLine();
+
+ endIndex = str.indexOf('.', fromIndex);
+ sub = str.subString(fromIndex, endIndex);
+ num += (Integer.parseInt(sub) << 24);
+
+ fromIndex = endIndex + 1;
+ endIndex = str.indexOf('.', fromIndex);
+ sub = str.subString(fromIndex, endIndex);
+ num += (Integer.parseInt(sub) << 16);
+
+ fromIndex = endIndex + 1;
+ endIndex = str.indexOf('.', fromIndex);
+ sub = str.subString(fromIndex, endIndex);
+ num += (Integer.parseInt(sub) << 8);
-// while(true)
-// Thread.sleep(1000000);
+ fromIndex = endIndex + 1;
+ sub = str.subString(fromIndex);
+ num += Integer.parseInt(sub);
+ mid[i] = (int)num;
+ }
+ return mid;
}
}
-128.195.136.166
-128.195.136.167
-128.195.136.168
+#128.195.180.21
+#128.195.180.24
+#128.195.180.26
+128.195.136.162
+128.195.136.163
+128.195.136.164
+128.195.136.165
+#128.195.136.166
+#128.195.136.167
SUBCLASS=Query
SRC1=${MAINCLASS}.java
SRC2=${SUBCLASS}.java
-SRC3=${SUBCLASS}List.java
+SRC3=${SUBCLASS}Queue.java
SRC4=${SUBCLASS}Thread.java
FLAGS= -dsm -32bit -nooptimize -debug -recovery -mainclass ${MAINCLASS}
default:
clean:
rm -rf tmpbuilddirectory
rm *.bin
+ rm *.php
+ rm *.css
+ rm www*
+ rm eee*
+ rm web*