From 71a52347c002d52afe4d7e06bb29d64971e4b168 Mon Sep 17 00:00:00 2001 From: hkhang Date: Mon, 9 Nov 2009 01:15:13 +0000 Subject: [PATCH] *** empty log message *** --- .../Spider/recovery/GlobalQuery.java | 2 +- .../Benchmarks/Spider/recovery/QueryTask.java | 126 +++++++++++++++--- .../Benchmarks/Spider/recovery/Spider.java | 6 +- Robust/src/ClassLibrary/InetAddress.java | 19 ++- Robust/src/ClassLibrary/Java/Socket.java | 12 ++ Robust/src/Runtime/socket.c | 6 +- 6 files changed, 139 insertions(+), 32 deletions(-) diff --git a/Robust/src/Benchmarks/Spider/recovery/GlobalQuery.java b/Robust/src/Benchmarks/Spider/recovery/GlobalQuery.java index 0a9d1630..7efff695 100644 --- a/Robust/src/Benchmarks/Spider/recovery/GlobalQuery.java +++ b/Robust/src/Benchmarks/Spider/recovery/GlobalQuery.java @@ -4,7 +4,7 @@ public class GlobalQuery { int depth; public GlobalQuery(GlobalString hostname) { - this.hostname = global new GlobalString(hostname); + this.hostname = hostname; this.path = global new GlobalString(""); this.depth = 0; } diff --git a/Robust/src/Benchmarks/Spider/recovery/QueryTask.java b/Robust/src/Benchmarks/Spider/recovery/QueryTask.java index 9c0b5a11..743d63e0 100644 --- a/Robust/src/Benchmarks/Spider/recovery/QueryTask.java +++ b/Robust/src/Benchmarks/Spider/recovery/QueryTask.java @@ -10,6 +10,7 @@ public class QueryTask extends Task { this.doneList = doneList; this.maxDepth = maxDepth; this.results = results; + toprocess = global new Queue(); } public void execute() { @@ -51,8 +52,16 @@ public class QueryTask extends Task { System.printString(path); System.printString("\n"); - Socket s = new Socket(hostname, 80); - + if (isDocument(path)) { + return; + } + + Socket s = new Socket(); + + if(s.connect(hostname, 80) == -1) { + return; + } + requestQuery(hostname, path, s); readResponse(lq, s); @@ -60,15 +69,27 @@ public class QueryTask extends Task { atomic { gTitle = global new GlobalString(title); } + atomic { + toprocess = processPage(lq); + } } - - atomic { - toprocess = processPage(lq); - } - s.close(); } } + + public static boolean isDocument(String str) { + int index = str.lastindexOf('.'); + + if (index != -1) { + if ((str.subString(index+1)).equals("pdf")) return true; + else if ((str.subString(index+1)).equals("ps")) return true; + else if ((str.subString(index+1)).equals("ppt")) return true; + else if ((str.subString(index+1)).equals("pptx")) return true; + else if ((str.subString(index+1)).equals("jpg")) return true; + else return false; + } + return false; + } public void done(Object obj) { if (gTitle != null) @@ -95,22 +116,46 @@ public class QueryTask extends Task { } public static String grabTitle(LocalQuery lq) { - String sTitle = new String(""); - String eTitle = new String(""); + String sBrace = new String("<"); + String strTitle = new String("title>"); String searchstr = lq.response.toString(); String title = null; char ch; - int mindex = searchstr.indexOf(sTitle); - if (mindex != -1) { - int endquote = searchstr.indexOf(eTitle, mindex+sTitle.length()); + int mindex = -1; + int endquote = -1; + int i, j; + String tmp; + + for (i = 0; i < searchstr.length(); i++) { + if (searchstr.charAt(i) == '<') { + i++; + if (searchstr.length() > (i+strTitle.length())) { + tmp = searchstr.subString(i, i+strTitle.length()); + if (tmp.equalsIgnoreCase("title>")) { + mindex = i + tmp.length(); + for (j = mindex; j < searchstr.length(); j++) { + if (searchstr.charAt(j) == '<') { + j++; + tmp = searchstr.subString(j, j+strTitle.length()+1); + if (tmp.equalsIgnoreCase("/title>")) { + endquote = j - 1; + break; + } + } + } + } + } + } + } - title = new String(searchstr.subString(mindex+sTitle.length(), endquote)); - + if (mindex != -1) { + title = searchstr.subString(mindex, endquote); if (Character.isWhitespace(title.charAt(0))){ mindex=0; while (Character.isWhitespace(title.charAt(mindex++))); mindex--; + if (mindex >= title.length()) return null; title = new String(title.subString(mindex)); } @@ -118,23 +163,30 @@ public class QueryTask extends Task { endquote=title.length()-1; while (Character.isWhitespace(title.charAt(endquote--))); endquote += 2; + if (mindex >= endquote) return null; title = new String(title.subString(0, endquote)); } - if (errorPage(title)) - title = null; + if (isErrorPage(title)) { + return null; + } } +// System.out.println("Title = [" + title + "]"); return title; } - public static boolean errorPage(String str) { + public static boolean isErrorPage(String str) { if (str.equals("301 Moved Permanently")) return true; else if (str.equals("302 Found")) return true; else if (str.equals("404 Not Found")) return true; + else if (str.equals("403 Forbidden")) + return true; + else if (str.equals("404 File Not Found")) + return true; else return false; } @@ -143,12 +195,32 @@ public class QueryTask extends Task { StringBuffer req = new StringBuffer("GET "); req.append("/"); req.append(path); - req.append(" HTTP/1.1\r\nHost:"); + req.append(" HTTP/1.0\r\nHost: "); req.append(hostname); req.append("\r\n\r\n"); sock.write(req.toString().getBytes()); } + public static void readResponse(LocalQuery lq, Socket sock) { + // state 0 - nothing + // state 1 - \r + // state 2 - \r\n + // state 3 - \r\n\r + // state 4 - \r\n\r\n + byte[] buffer = new byte[1024]; + int numchars; + + do { + numchars = sock.read(buffer); + + String curr = (new String(buffer)).subString(0, numchars); + + lq.response.append(curr); + buffer = new byte[1024]; + } while(numchars > 0); + } + +/* public static void readResponse(LocalQuery lq, Socket sock) { // state 0 - nothing // state 1 - \r @@ -202,12 +274,13 @@ public class QueryTask extends Task { return; else { String curr=(new String(buffer)).subString(0,numchars); +// System.out.println("numchars = "+numchars); lq.response.append(curr); } } } } - +*/ public void processList() { LinkedList ll; GlobalString token = null; @@ -236,7 +309,7 @@ public class QueryTask extends Task { } q.push(workingURL); results.put(token, q); - System.out.println("Key : ["+token.toLocalString()+"],["+q.size()+"]"); +// System.out.println("Key : ["+token.toLocalString()+"],["+q.size()+"]"); } } @@ -251,9 +324,11 @@ public class QueryTask extends Task { else if (str.equals("or")) return true; else if (str.equals("but")) return true; else if (str.equals("to")) return true; + else if (str.equals("The")) return true; else if (str.equals(".")) return true; - else if (str.equals("=")) return true; else if (str.equals("-")) return true; + else if (str.equals("=")) return true; + else if (str.equals("_")) return true; else if (str.equals(":")) return true; else if (str.equals(";")) return true; else if (str.equals("\'")) return true; @@ -261,6 +336,7 @@ public class QueryTask extends Task { else if (str.equals("|")) return true; else if (str.equals("@")) return true; else if (str.equals("&")) return true; + else if (str.equals(" ")) return true; else return false; } @@ -274,6 +350,9 @@ public class QueryTask extends Task { if (str.charAt(0) == '&') { // & return str.subString(1); } + else if (str.charAt(0) == '/') { // & + return str.subString(1); + } return str; } @@ -294,6 +373,11 @@ public class QueryTask extends Task { if (str.charAt(str.length()-2) == '\'') return str.subString(0, str.length()-2); } + else if (str.charAt(str.length()-1) == '-') { + int index = str.length()-2; + while (Character.isWhitespace(str.charAt(index--))); + return str.subString(0, index+2); + } return str; } diff --git a/Robust/src/Benchmarks/Spider/recovery/Spider.java b/Robust/src/Benchmarks/Spider/recovery/Spider.java index 356d6fa8..54aea288 100644 --- a/Robust/src/Benchmarks/Spider/recovery/Spider.java +++ b/Robust/src/Benchmarks/Spider/recovery/Spider.java @@ -19,9 +19,9 @@ public class Spider { // mid[0] = (128<<24)|(195<<16)|(180<<8)|21; // mid[1] = (128<<24)|(195<<16)|(180<<8)|24; // mid[2] = (128<<24)|(195<<16)|(180<<8)|26; - mid[0] = (128<<24)|(195<<16)|(136<<8)|162; - mid[1] = (128<<24)|(195<<16)|(136<<8)|163; - mid[2] = (128<<24)|(195<<16)|(136<<8)|164; + mid[0] = (128<<24)|(195<<16)|(136<<8)|162; + mid[1] = (128<<24)|(195<<16)|(136<<8)|163; + mid[2] = (128<<24)|(195<<16)|(136<<8)|164; atomic { firstmachine = global new GlobalString(args[1]); diff --git a/Robust/src/ClassLibrary/InetAddress.java b/Robust/src/ClassLibrary/InetAddress.java index fee8efff..8637cd83 100644 --- a/Robust/src/ClassLibrary/InetAddress.java +++ b/Robust/src/ClassLibrary/InetAddress.java @@ -13,7 +13,10 @@ public class InetAddress { public static InetAddress getByName(String hostname) { InetAddress[] addresses=getAllByName(hostname); - return addresses[0]; + if (addresses != null) + return addresses[0]; + else + return null; } public byte[] getAddress() { @@ -40,12 +43,16 @@ public class InetAddress { byte[][] iplist = InetAddress.getHostByName(hostname.getBytes()); - addresses = new InetAddress[iplist.length]; + if (iplist != null) { + addresses = new InetAddress[iplist.length]; - for (int i = 0; i < iplist.length; i++) { - addresses[i] = new InetAddress(iplist[i], hostname); - } - return addresses; + for (int i = 0; i < iplist.length; i++) { + addresses[i] = new InetAddress(iplist[i], hostname); + } + return addresses; + } + else + return null; } public static native byte[][] getHostByName(byte[] hostname); diff --git a/Robust/src/ClassLibrary/Java/Socket.java b/Robust/src/ClassLibrary/Java/Socket.java index 297fe3d0..5863a518 100644 --- a/Robust/src/ClassLibrary/Java/Socket.java +++ b/Robust/src/ClassLibrary/Java/Socket.java @@ -32,6 +32,18 @@ public class Socket { sout=new SocketOutputStream(this); } + public int connect(String host, int port) { + InetAddress address=InetAddress.getByName(host); + if (address != null) { + fd=nativeBind(address.getAddress(), port); + nativeConnect(fd, address.getAddress(), port); + return 0; + } + else { + return -1; + } + } + public static native int nativeBind(byte[] address, int port); public static native int nativeConnect(int fd, byte[] address, int port); diff --git a/Robust/src/Runtime/socket.c b/Robust/src/Runtime/socket.c index 0b855036..7c079ac0 100644 --- a/Robust/src/Runtime/socket.c +++ b/Robust/src/Runtime/socket.c @@ -153,7 +153,8 @@ struct ArrayObject * CALL01(___InetAddress______getHostByName_____AR_B, struct A h=gethostbyname(str); free(str); - for (n=0; h->h_addr_list[n]; n++) /* do nothing */ ; + if (h != NULL) { + for (n=0; h->h_addr_list[n]; n++) /* do nothing */ ; #ifdef PRECISE_GC arraybytearray=allocate_newarray(___params___,BYTEARRAYARRAYTYPE,n); @@ -180,6 +181,9 @@ struct ArrayObject * CALL01(___InetAddress______getHostByName_____AR_B, struct A return arraybytearray; #endif + } + else + return NULL; } -- 2.34.1