+++ /dev/null
-/*\r
-Usage :\r
- ./FileSystem.bin <num thread> <datafile prefix>\r
-*/\r
-\r
-\r
-\r
-public class FileSystem extends Thread {\r
- DistributedHashMap dir; // Directory \r
- DistributedHashMap fs; // File \r
- DistributedLinkedList dir_list;\r
- GlobalString inputfile;\r
- int mid;\r
- \r
- public FileSystem(DistributedHashMap dir, DistributedHashMap fs, DistributedLinkedList dir_list) {\r
- this.dir = dir;\r
- this.fs = fs;\r
- this.dir_list = dir_list;\r
- }\r
- \r
- public FileSystem(DistributedHashMap dir, DistributedHashMap fs, DistributedLinkedList dir_list, String filename, int mid) {\r
- this.dir = dir;\r
- this.fs = fs;\r
- this.dir_list = dir_list;\r
- this.mid = mid;\r
- this.inputfile = global new GlobalString("data/"+filename + mid);\r
- }\r
-\r
-\r
- public void setInputFileName(String filename, int mid) {\r
- this.mid = mid;\r
- this.inputfile = global new GlobalString("data/"+filename + mid);\r
- }\r
-\r
- public void init() {\r
- fillHashTable();\r
- }\r
- \r
- public void fillHashTable() {\r
- GlobalString path;\r
- DistributedLinkedList list; \r
-\r
- atomic {\r
- path = global new GlobalString("/home/"); // root is 'home'\r
- list = global new DistributedLinkedList();\r
-\r
- dir.put(path, list);\r
- dir_list.add(path);\r
- }\r
- }\r
- \r
- public static void fillTodoList(String file, LinkedList todoList) {\r
- FileInputStream fis;\r
- String comm;\r
- char c;\r
- String key;\r
- String val;\r
- Transaction t;\r
-\r
- fis = new FileInputStream(file);\r
-\r
- while ((comm = fis.readLine()) != null) { // 'command' 'path'\r
- c = comm.charAt(0); // ex) w /home/abc.c \r
- key = comm.subString(2);\r
- t = new Transaction(c, key);\r
- todoList.add(t);\r
- }\r
- }\r
-\r
- public void run() {\r
- Transaction t;\r
-\r
- char command;\r
- String key;\r
- String val;\r
- GlobalString gkey;\r
- GlobalString gval;\r
- boolean isDir;\r
-\r
- int index;\r
- String file;\r
- atomic {\r
- file = inputfile.toLocalString();\r
- }\r
-\r
- LinkedList todoList = new LinkedList();\r
- fillTodoList(file, todoList);\r
-\r
- while (!todoList.isEmpty()) {\r
- t = (Transaction)(todoList.removeFirst());\r
-\r
- command = t.getCommand();\r
- key = t.getKey();\r
-\r
- atomic {\r
- gkey = global new GlobalString(key);\r
- }\r
-\r
- index = key.lastindexOf('/');\r
- if (index+1 == key.length()) \r
- isDir = true;\r
- else \r
- isDir = false;\r
- \r
- if (command == 'r') {\r
- System.out.println("["+command+"] ["+key+"]");\r
- if (isDir == true) {\r
- atomic {\r
- readDirectory(gkey);\r
- }\r
- }\r
- else {\r
- atomic {\r
- readFile(gkey);\r
- }\r
- }\r
- }\r
- else if (command == 'c') {\r
- System.out.println("["+command+"] ["+key+"]");\r
- if (isDir == true) {\r
- atomic {\r
- createDirectory(gkey);\r
- }\r
- }\r
- else {\r
- val = t.getValue();\r
- atomic {\r
- gval = global new GlobalString(val);\r
- createFile(gkey, gval);\r
- }\r
- }\r
- }\r
- }\r
-\r
- sleep(3000000);\r
- atomic {\r
- output();\r
- }\r
-\r
- RecoveryStat.printRecoveryStat();\r
- }\r
-\r
- public void output() { \r
- Iterator iter;\r
- GlobalString gstr;\r
-\r
- iter = dir_list.iterator();\r
-\r
- while (iter.hasNext()) {\r
- gstr = (GlobalString)(iter.next());\r
- System.printString(gstr.toLocalString() + "\n");\r
- }\r
- }\r
-\r
- public void readFile(GlobalString gkey) {\r
- GlobalString gval;\r
- String val;\r
-\r
- atomic {\r
- gval = (GlobalString)(fs.get(gkey));\r
- val = gval.toLocalString();\r
- }\r
- if (val != null) {\r
-// System.out.println("<"+val+">");\r
- }\r
- else {\r
- System.out.println("No such file or directory");\r
- }\r
- }\r
-\r
- public void readDirectory(GlobalString gkey) {\r
- DistributedLinkedList list;\r
- Iterator iter;\r
- GlobalString gval;\r
-\r
- list = (DistributedLinkedList)(dir.get(gkey));\r
-\r
- if (list != null) {\r
- iter = list.iterator();\r
- while (iter.hasNext() == true) {\r
- gval = (GlobalString)(iter.next());\r
-// System.out.print("["+gval.toLocalString()+"] ");\r
- }\r
-// System.out.println("");\r
- }\r
- else {\r
- System.out.println("No such file or directory");\r
- }\r
- }\r
-\r
- public void createFile(GlobalString gkey, GlobalString gval) {\r
- String path;\r
- String target;\r
- GlobalString gpath;\r
- GlobalString gtarget;\r
- int index;\r
- DistributedLinkedList list;\r
-\r
- index = gkey.lastindexOf('/');\r
- gpath = gkey.subString(0, index+1);\r
- gtarget = gkey.subString(index+1);\r
-\r
- if (dir.containsKey(gpath)) {\r
- list = (DistributedLinkedList)(dir.get(gpath));\r
- list.push(gtarget);\r
- dir.put(gpath, list);\r
- fs.put(gkey, gval);\r
- }\r
- else {\r
- System.out.println("Cannot create file");\r
- }\r
- }\r
-\r
- public void createDirectory(GlobalString gkey) {\r
- int index;\r
- GlobalString gpath;\r
- GlobalString gtarget;\r
- DistributedLinkedList list;\r
-\r
- index = gkey.lastindexOf('/', gkey.length()-2);\r
-\r
- if (index != -1) {\r
- gpath = gkey.subString(0, index+1);\r
- gtarget = gkey.subString(index+1);\r
-\r
- if (dir.containsKey(gpath)) {\r
- list = (DistributedLinkedList)(dir.get(gpath));\r
- list.push(gtarget);\r
- dir.put(gpath, list);\r
-\r
- list = global new DistributedLinkedList();\r
- dir.put(gkey, list);\r
- dir_list.add(gkey);\r
- }\r
- else {\r
- System.out.println("Cannot create directory");\r
- }\r
- }\r
- else {\r
- System.out.println("Cannot create directory");\r
- }\r
- }\r
- \r
- public Object read(DistributedHashMap mydhmap, GlobalString key) {\r
- Object obj = mydhmap.get(key); \r
- \r
- return obj;\r
- }\r
- \r
- public static void main(String[] args) {\r
- int NUM_THREADS = 3;\r
- String filename = new String();\r
-\r
- if (args.length == 2) {\r
- NUM_THREADS = Integer.parseInt(args[0]);\r
- filename = args[1];\r
- }\r
- else {\r
- System.out.println("./FileSystem.bin master <num_thread> <data>");\r
- System.exit(0);\r
- }\r
- \r
- int[] mid = new int[8];\r
- mid[0] = (128<<24)|(195<<16)|(180<<8)|21;//dw-2\r
- mid[1] = (128<<24)|(195<<16)|(180<<8)|26;//dw-7\r
-/*\r
- mid[0] = (128<<24)|(195<<16)|(136<<8)|162;//dc-1\r
- mid[1] = (128<<24)|(195<<16)|(136<<8)|163;//dc-2\r
- mid[2] = (128<<24)|(195<<16)|(136<<8)|164;//dc-3\r
- mid[3] = (128<<24)|(195<<16)|(136<<8)|165;//dc-4\r
- mid[4] = (128<<24)|(195<<16)|(136<<8)|166;//dc-5\r
- mid[5] = (128<<24)|(195<<16)|(136<<8)|167;//dc-6\r
- mid[6] = (128<<24)|(195<<16)|(136<<8)|168;//dc-7\r
- mid[7] = (128<<24)|(195<<16)|(136<<8)|169;//dc-8\r
- */\r
- FileSystem[] lus;\r
- FileSystem initLus;\r
-\r
- Work[] works;\r
- Transaction[] currentWorkList; // type might be something else\r
- \r
- atomic {\r
- currentWorkList = global new Transaction[NUM_THREADS]; // something else\r
- works = global new Work[NUM_THREADS];\r
- \r
- DistributedHashMap fs = global new DistributedHashMap(500, 500, 0.75f);\r
- DistributedHashMap dir = global new DistributedHashMap(500, 500, 0.75f);\r
- DistributedLinkedList dir_list = global new DistributedLinkedList();\r
- \r
- initLus = global new FileSystem(dir, fs, dir_list);\r
- initLus.init();\r
-\r
- lus = global new FileSystem[NUM_THREADS];\r
- for(int i = 0; i < NUM_THREADS; i++) {\r
-// lus[i] = initLus;\r
-// lus[i].setInputFileName(filename, i);\r
- lus[i] = global new FileSystem(initLus.dir, initLus.fs, initLus.dir_list, filename, i);\r
- }\r
- }\r
-\r
- FileSystem tmp;\r
- /* Start threads */\r
- for(int i = 0; i < NUM_THREADS; i++) {\r
- atomic {\r
- tmp = lus[i];\r
- }\r
- Thread.myStart(tmp, mid[i]);\r
- }\r
- \r
- /* Join threads */\r
- for(int i = 0; i < NUM_THREADS; i++) {\r
- atomic {\r
- tmp = lus[i];\r
- }\r
- tmp.join();\r
- }\r
- \r
- System.printString("Finished\n");\r
- }\r
-}\r
-\r
-public class Transaction { // object for todoList\r
- char command; // r: read, w: write\r
- String key;\r
- String val;\r
- \r
- Transaction (char c, String key) {\r
- command = c;\r
- \r
- this.key = new String(key);\r
- this.val = new String();\r
- }\r
- \r
- Transaction (char c, String key, String val) {\r
- command = c;\r
- \r
- this.key = new String(key);\r
- this.val = new String(val);\r
- }\r
- \r
- public char getCommand() {\r
- return command;\r
- }\r
- \r
- public String getKey() {\r
- return key;\r
- }\r
- \r
- public String getValue() {\r
- return val;\r
- }\r
-}\r
+++ /dev/null
-128.195.180.21
-#128.195.180.24
-128.195.180.26
--- /dev/null
+MAINCLASS=FileSystem
+SRC1=${MAINCLASS}.java
+FLAGS= -optimize -thread -mainclass ${MAINCLASS}
+default:
+ ../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC1}
+
+clean:
+ rm -rf tmpbuilddirectory
+ rm *.bin
+++ /dev/null
-MAINCLASS=FileSystem
-SRC1=${MAINCLASS}.java
-FLAGS= -recoverystats -dsm -dsmtask -32bit -recovery -nooptimize -mainclass ${MAINCLASS}
-default:
- ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC1}
-
-clean:
- rm -rf tmpbuilddirectory
- rm *.bin
--- /dev/null
+/*\r
+Usage :\r
+ ./FileSystem.bin <num thread> <datafile prefix>\r
+*/\r
+\r
+\r
+\r
+public class FileSystem extends Thread {\r
+ DistributedHashMap dir; // Directory \r
+ DistributedHashMap fs; // File \r
+ DistributedLinkedList dir_list;\r
+ GlobalString inputfile;\r
+ int mid;\r
+ \r
+ public FileSystem(DistributedHashMap dir, DistributedHashMap fs, DistributedLinkedList dir_list) {\r
+ this.dir = dir;\r
+ this.fs = fs;\r
+ this.dir_list = dir_list;\r
+ }\r
+ \r
+ public FileSystem(DistributedHashMap dir, DistributedHashMap fs, DistributedLinkedList dir_list, String filename, int mid) {\r
+ this.dir = dir;\r
+ this.fs = fs;\r
+ this.dir_list = dir_list;\r
+ this.mid = mid;\r
+ this.inputfile = global new GlobalString("../data/"+filename + mid);\r
+ }\r
+\r
+\r
+ public void setInputFileName(String filename, int mid) {\r
+ this.mid = mid;\r
+ this.inputfile = global new GlobalString("../data/"+filename + mid);\r
+ }\r
+\r
+ public void init() {\r
+ fillHashTable();\r
+ }\r
+ \r
+ public void fillHashTable() {\r
+ GlobalString path;\r
+ DistributedLinkedList list; \r
+\r
+ atomic {\r
+ path = global new GlobalString("/home/"); // root is 'home'\r
+ list = global new DistributedLinkedList();\r
+\r
+ dir.put(path, list);\r
+ dir_list.add(path);\r
+ }\r
+ }\r
+ \r
+ public static void fillTodoList(String file, LinkedList todoList) {\r
+ FileInputStream fis;\r
+ String comm;\r
+ char c;\r
+ String key;\r
+ String val;\r
+ Transaction t;\r
+\r
+ fis = new FileInputStream(file);\r
+\r
+ while ((comm = fis.readLine()) != null) { // 'command' 'path'\r
+ c = comm.charAt(0); // ex) w /home/abc.c \r
+ key = comm.subString(2);\r
+ t = new Transaction(c, key);\r
+ todoList.add(t);\r
+ }\r
+ }\r
+\r
+ public void run() {\r
+ Transaction t;\r
+\r
+ char command;\r
+ String key;\r
+ String val;\r
+ GlobalString gkey;\r
+ GlobalString gval;\r
+ boolean isDir;\r
+\r
+ int index;\r
+ String file;\r
+ atomic {\r
+ file = inputfile.toLocalString();\r
+ }\r
+\r
+ LinkedList todoList = new LinkedList();\r
+ fillTodoList(file, todoList);\r
+\r
+ while (!todoList.isEmpty()) {\r
+ t = (Transaction)(todoList.removeFirst());\r
+\r
+ command = t.getCommand();\r
+ key = t.getKey();\r
+\r
+ atomic {\r
+ gkey = global new GlobalString(key);\r
+ }\r
+\r
+ index = key.lastindexOf('/');\r
+ if (index+1 == key.length()) \r
+ isDir = true;\r
+ else \r
+ isDir = false;\r
+ \r
+ if (command == 'r') {\r
+ System.out.println("["+command+"] ["+key+"]");\r
+ if (isDir == true) {\r
+ atomic {\r
+ readDirectory(gkey);\r
+ }\r
+ }\r
+ else {\r
+ atomic {\r
+ readFile(gkey);\r
+ }\r
+ }\r
+ }\r
+ else if (command == 'c') {\r
+ System.out.println("["+command+"] ["+key+"]");\r
+ if (isDir == true) {\r
+ atomic {\r
+ createDirectory(gkey);\r
+ }\r
+ }\r
+ else {\r
+ val = t.getValue();\r
+ atomic {\r
+ gval = global new GlobalString(val);\r
+ createFile(gkey, gval);\r
+ }\r
+ }\r
+ }\r
+ }\r
+\r
+ sleep(3000000);\r
+ atomic {\r
+ output();\r
+ }\r
+\r
+ RecoveryStat.printRecoveryStat();\r
+\r
+\r
+ System.out.println("\n\n\n I'm done\n\n\n");\r
+\r
+ }\r
+\r
+ public void output() { \r
+ Iterator iter;\r
+ GlobalString gstr;\r
+\r
+ iter = dir_list.iterator();\r
+\r
+ while (iter.hasNext()) {\r
+ gstr = (GlobalString)(iter.next());\r
+ System.printString(gstr.toLocalString() + "\n");\r
+ }\r
+ }\r
+\r
+ public void readFile(GlobalString gkey) {\r
+ GlobalString gval;\r
+ String val;\r
+\r
+ atomic {\r
+ gval = (GlobalString)(fs.get(gkey));\r
+ val = gval.toLocalString();\r
+ }\r
+ if (val != null) {\r
+// System.out.println("<"+val+">");\r
+ }\r
+ else {\r
+ System.out.println("No such file or directory");\r
+ }\r
+ }\r
+\r
+ public void readDirectory(GlobalString gkey) {\r
+ DistributedLinkedList list;\r
+ Iterator iter;\r
+ GlobalString gval;\r
+\r
+ list = (DistributedLinkedList)(dir.get(gkey));\r
+\r
+ if (list != null) {\r
+ iter = list.iterator();\r
+ while (iter.hasNext() == true) {\r
+ gval = (GlobalString)(iter.next());\r
+// System.out.print("["+gval.toLocalString()+"] ");\r
+ }\r
+// System.out.println("");\r
+ }\r
+ else {\r
+ System.out.println("No such file or directory");\r
+ }\r
+ }\r
+\r
+ public void createFile(GlobalString gkey, GlobalString gval) {\r
+ String path;\r
+ String target;\r
+ GlobalString gpath;\r
+ GlobalString gtarget;\r
+ int index;\r
+ DistributedLinkedList list;\r
+\r
+ index = gkey.lastindexOf('/');\r
+ gpath = gkey.subString(0, index+1);\r
+ gtarget = gkey.subString(index+1);\r
+\r
+ if (dir.containsKey(gpath)) {\r
+ list = (DistributedLinkedList)(dir.get(gpath));\r
+ list.push(gtarget);\r
+ dir.put(gpath, list);\r
+ fs.put(gkey, gval);\r
+ }\r
+ else {\r
+ System.out.println("Cannot create file");\r
+ }\r
+ }\r
+\r
+ public void createDirectory(GlobalString gkey) {\r
+ int index;\r
+ GlobalString gpath;\r
+ GlobalString gtarget;\r
+ DistributedLinkedList list;\r
+\r
+ index = gkey.lastindexOf('/', gkey.length()-2);\r
+\r
+ if (index != -1) {\r
+ gpath = gkey.subString(0, index+1);\r
+ gtarget = gkey.subString(index+1);\r
+\r
+ if (dir.containsKey(gpath)) {\r
+ list = (DistributedLinkedList)(dir.get(gpath));\r
+ list.push(gtarget);\r
+ dir.put(gpath, list);\r
+\r
+ list = global new DistributedLinkedList();\r
+ dir.put(gkey, list);\r
+ dir_list.add(gkey);\r
+ }\r
+ else {\r
+ System.out.println("Cannot create directory");\r
+ }\r
+ }\r
+ else {\r
+ System.out.println("Cannot create directory");\r
+ }\r
+ }\r
+ \r
+ public Object read(DistributedHashMap mydhmap, GlobalString key) {\r
+ Object obj = mydhmap.get(key); \r
+ \r
+ return obj;\r
+ }\r
+ \r
+ public static void main(String[] args) {\r
+ int NUM_THREADS = 3;\r
+ String filename = new String();\r
+\r
+ if (args.length == 2) {\r
+ NUM_THREADS = Integer.parseInt(args[0]);\r
+ filename = args[1];\r
+ }\r
+ else {\r
+ System.out.println("./FileSystem.bin master <num_thread> <data>");\r
+ System.exit(0);\r
+ }\r
+ \r
+ int[] mid = new int[8];\r
+ mid[0] = (128<<24)|(195<<16)|(180<<8)|21;//dw-2\r
+ mid[1] = (128<<24)|(195<<16)|(180<<8)|26;//dw-7\r
+/*\r
+ mid[0] = (128<<24)|(195<<16)|(136<<8)|162;//dc-1\r
+ mid[1] = (128<<24)|(195<<16)|(136<<8)|163;//dc-2\r
+ mid[2] = (128<<24)|(195<<16)|(136<<8)|164;//dc-3\r
+ mid[3] = (128<<24)|(195<<16)|(136<<8)|165;//dc-4\r
+ mid[4] = (128<<24)|(195<<16)|(136<<8)|166;//dc-5\r
+ mid[5] = (128<<24)|(195<<16)|(136<<8)|167;//dc-6\r
+ mid[6] = (128<<24)|(195<<16)|(136<<8)|168;//dc-7\r
+ mid[7] = (128<<24)|(195<<16)|(136<<8)|169;//dc-8\r
+ */\r
+ FileSystem[] lus;\r
+ FileSystem initLus;\r
+\r
+ Work[] works;\r
+ Transaction[] currentWorkList; // type might be something else\r
+ \r
+ atomic {\r
+ currentWorkList = global new Transaction[NUM_THREADS]; // something else\r
+ works = global new Work[NUM_THREADS];\r
+ \r
+ DistributedHashMap fs = global new DistributedHashMap(500, 500, 0.75f);\r
+ DistributedHashMap dir = global new DistributedHashMap(500, 500, 0.75f);\r
+ DistributedLinkedList dir_list = global new DistributedLinkedList();\r
+ \r
+ initLus = global new FileSystem(dir, fs, dir_list);\r
+ initLus.init();\r
+\r
+ lus = global new FileSystem[NUM_THREADS];\r
+ for(int i = 0; i < NUM_THREADS; i++) {\r
+// lus[i] = initLus;\r
+// lus[i].setInputFileName(filename, i);\r
+ lus[i] = global new FileSystem(initLus.dir, initLus.fs, initLus.dir_list, filename, i);\r
+ }\r
+ }\r
+\r
+ FileSystem tmp;\r
+ /* Start threads */\r
+ for(int i = 0; i < NUM_THREADS; i++) {\r
+ atomic {\r
+ tmp = lus[i];\r
+ }\r
+ Thread.myStart(tmp, mid[i]);\r
+ }\r
+ \r
+ /* Join threads */\r
+ for(int i = 0; i < NUM_THREADS; i++) {\r
+ atomic {\r
+ tmp = lus[i];\r
+ }\r
+ tmp.join();\r
+ }\r
+ \r
+ System.printString("Finished\n");\r
+ }\r
+}\r
+\r
+public class Transaction { // object for todoList\r
+ char command; // r: read, w: write\r
+ String key;\r
+ String val;\r
+ \r
+ Transaction (char c, String key) {\r
+ command = c;\r
+ \r
+ this.key = new String(key);\r
+ this.val = new String();\r
+ }\r
+ \r
+ Transaction (char c, String key, String val) {\r
+ command = c;\r
+ \r
+ this.key = new String(key);\r
+ this.val = new String(val);\r
+ }\r
+ \r
+ public char getCommand() {\r
+ return command;\r
+ }\r
+ \r
+ public String getKey() {\r
+ return key;\r
+ }\r
+ \r
+ public String getValue() {\r
+ return val;\r
+ }\r
+}\r
--- /dev/null
+128.195.180.21
+#128.195.180.24
+128.195.180.26
--- /dev/null
+MAINCLASS=FileSystem
+SRC1=${MAINCLASS}.java
+FLAGS= -recoverystats -dsm -dsmtask -recovery -nooptimize -mainclass ${MAINCLASS}
+default:
+ ../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC1}
+
+clean:
+ rm -rf tmpbuilddirectory
+ rm *.bin
+++ /dev/null
-/* MatrixMultiplyN.java
-
- Matrix Multiplication Benchmark using Task Library.
- a, b, and c are two dimensional array.
- It computes a * b and assigns to c.
-
-*/
-public class MatrixMultiply extends Task {
- MMul mmul;
- int SIZE;
- int increment;
-
- public MatrixMultiply(MMul mmul, int num_threads, int size,int increment) {
- this.mmul = mmul;
-
- SIZE = size;
- this.increment = increment;
-
- init();
- }
-
- public void init() {
- todoList = global new GlobalQueue();
-
- fillTodoList();
- }
-
- // fill up the Work Pool
- public void fillTodoList() {
- Segment seg;
- int i;
-
- for(i = 0; i < SIZE; i +=increment) {
-
- if(i+increment > SIZE) {
- seg = global new Segment(i,SIZE);
- }
- else {
- seg = global new Segment(i, i + increment);
- }
- todoList.push(seg);
- }
- }
-
- public void execute() {
- double la[][];
- double lc[][];
- double lb[][];
- double rowA[];
- double colB[];
- Segment seg;
-
- double innerproduct;
- int i,j;
- int x0;
- int x1;
- int size;
-
- // get matrix
- atomic {
- seg = (Segment)myWork;
- x0 = seg.x0; // x start row
- x1 = seg.x1; // x end row
- la = mmul.a; // first mat
- lb = mmul.btranspose; // second mat
- size = SIZE;
- }
-
- lc = new double[size][size];
-
- for(i = x0; i < x1 ; i++) {
- atomic {
- rowA = la[i]; // grab first mat's row
-
- for(j = 0; j < size ; j++) {
- colB = lb[j]; // grab second mat's col
-
- innerproduct = computeProduct(rowA,colB, size); // computes the value
-
- lc[i][j] = innerproduct; // store in dest mat
- } // end of for j
- }
- } // end for i
-// }
-
- atomic {
- for (i = x0; i < x1; i++) {
- for (j = 0; j < size; j++) {
- mmul.c[i][j] = lc[i][j];
- }
- }
- }
- }
-
- public double computeProduct(double[] rowA,double[] colB, int size)
- {
- int i;
- double sum = 0;
-
- for(i = 0 ;i < size; i++) {
- sum += rowA[i] * colB[i];
- }
-
- return sum;
- }
-
- public void done(Object work) {
- }
-
- public static void main(String[] args) {
- int NUM_THREADS=4;
- int SIZE = 1600;
- int increment = 80;
- int i,j;
- Work[] works;
- MMul matrix;
- MatrixMultiply mm;
- Segment[] currentWorkList;
-
- if (args.length == 3) {
- NUM_THREADS = Integer.parseInt(args[0]);
- SIZE = Integer.parseInt(args[1]);
- increment = Integer.parseInt(args[2]); // size of subtask
- }
- else {
- System.out.println("usage: ./MatrixMultiply.bin master <num_threads> <size of matrix> <size of subtask>");
- }
-
- int[] mid = new int[8];
- mid[0] = (128<<24)|(195<<16)|(180<<8)|21; //dw-2
-/* mid[1] = (128<<24)|(195<<16)|(180<<8)|24; //dw-5
- mid[2] = (128<<24)|(195<<16)|(180<<8)|26; //dw-7
- mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dc1
- mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dc2
- mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dc3
- mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc4
- mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dc5
- mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dc6
- mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dc7
- mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dc8
-*/
- atomic {
- matrix = global new MMul(SIZE, SIZE, SIZE);
- matrix.setValues();
- matrix.transpose();
- mm = global new MatrixMultiply(matrix, NUM_THREADS, SIZE,increment);
-
- works = global new Work[NUM_THREADS];
- currentWorkList = global new Segment[NUM_THREADS];
-
- for(i = 0; i < NUM_THREADS; i++) {
- works[i] = global new Work(mm, NUM_THREADS, i,currentWorkList);
- }
- }
-
- long st = System.currentTimeMillis();
- long fi;
-
- Work tmp;
- for (i = 0; i < NUM_THREADS; i++) {
- atomic {
- tmp = works[i];
- }
- Thread.myStart(tmp,mid[i]);
- }
-
- for (i = 0; i < NUM_THREADS; i++) {
- atomic {
- tmp = works[i];
- }
- tmp.join();
- }
- fi = System.currentTimeMillis();
-
- double sum= 0;
- atomic {
- sum = matrix.getSum();
- }
-
- System.out.println("Sum of matrix = " + sum);
- System.out.println("Time Elapse = " + (double)((fi-st)/1000));
- System.printString("Finished\n");
- }
-
- public void output() {
- System.out.println("Sum = " + mmul.getSum());
- }
-
-}
-
-public class MMul{
- public int L, M, N;
- public double[][] a;
- public double[][] b;
- public double[][] c;
- public double[][] btranspose;
-
- public MMul(int L, int M, int N) {
- this.L = L;
- this.M = M;
- this.N = N;
- a = global new double[L][M];
- b = global new double[M][N];
- c = global new double[L][N];
- btranspose = global new double[N][M];
- }
-
- public void setValues() {
- for(int i = 0; i < L; i++) {
- double ai[] = a[i];
- for(int j = 0; j < M; j++) {
- ai[j] = j+1;
- }
- }
-
- for(int i = 0; i < M; i++) {
- double bi[] = b[i];
- for(int j = 0; j < N; j++) {
- bi[j] = j+1;
- }
- }
-
- for(int i = 0; i < L; i++) {
- double ci[] = c[i];
- for(int j = 0; j < N; j++) {
- ci[j] = 0;
- }
- }
- for(int i = 0; i < N; i++) {
- double btransposei[] = btranspose[i];
- for(int j = 0; j < M; j++) {
- btransposei[j] = 0;
- }
- }
- }
-
- public void transpose() {
- for(int row = 0; row < M; row++) {
- double brow[] = b[row];
- for(int col = 0; col < N; col++) {
- btranspose[col][row] = brow[col];
- }
- }
- }
-
- public double getSum() {
- double sum =0;
-
- for(int row =0; row < L; row++) {
- double cr[] = c[row];
- for(int col = 0; col < N; col++) {
- sum += cr[col];
- }
- }
- return sum;
- }
-}
-
-public class Segment {
- int x0;
- int x1;
-
- Segment (int x0, int x1) {
- this.x0 = x0;
- this.x1 = x1;
- }
-}
-
+++ /dev/null
-128.195.180.21
-#128.195.136.162
-#128.195.136.163
-#128.195.136.164
-#128.195.136.165
-#128.195.136.166
-#128.195.136.167
-#128.195.136.168
-#128.195.136.169
-
--- /dev/null
+MAINCLASS=MatrixMultiply
+SRC1=${MAINCLASS}.java
+FLAGS= -optimize -thread -mainclass ${MAINCLASS}
+default:
+ ../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC1}
+
+clean:
+ rm -rf tmpbuilddirectory
+ rm *.bin
+++ /dev/null
-MAINCLASS=MatrixMultiply
-SRC1=${MAINCLASS}.java
-
-FLAGS= -recovery -recoverystats -dsm -32bit -nooptimize -debug -dsmtask -mainclass ${MAINCLASS}
-
-default:
- ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC1}
-
-clean:
- rm -rf tmpbuilddirectory
- rm *.bin
--- /dev/null
+/* MatrixMultiplyN.java
+
+ Matrix Multiplication Benchmark using Task Library.
+ a, b, and c are two dimensional array.
+ It computes a * b and assigns to c.
+
+*/
+public class MatrixMultiply extends Task {
+ MMul mmul;
+ int SIZE;
+ int increment;
+
+ public MatrixMultiply(MMul mmul, int num_threads, int size,int increment) {
+ this.mmul = mmul;
+
+ SIZE = size;
+ this.increment = increment;
+
+ init();
+ }
+
+ public void init() {
+ todoList = global new GlobalQueue();
+
+ fillTodoList();
+ }
+
+ // fill up the Work Pool
+ public void fillTodoList() {
+ Segment seg;
+ int i;
+
+ for(i = 0; i < SIZE; i +=increment) {
+
+ if(i+increment > SIZE) {
+ seg = global new Segment(i,SIZE);
+ }
+ else {
+ seg = global new Segment(i, i + increment);
+ }
+ todoList.push(seg);
+ }
+ }
+
+ public void execute() {
+ double la[][];
+ double lc[][];
+ double lb[][];
+ double rowA[];
+ double colB[];
+ Segment seg;
+
+ double innerproduct;
+ int i,j;
+ int x0;
+ int x1;
+ int size;
+
+ // get matrix
+ atomic {
+ seg = (Segment)myWork;
+ x0 = seg.x0; // x start row
+ x1 = seg.x1; // x end row
+ la = mmul.a; // first mat
+ lb = mmul.btranspose; // second mat
+ size = SIZE;
+ }
+
+ lc = new double[size][size];
+
+ for(i = x0; i < x1 ; i++) {
+ atomic {
+ rowA = la[i]; // grab first mat's row
+
+ for(j = 0; j < size ; j++) {
+ colB = lb[j]; // grab second mat's col
+
+ innerproduct = computeProduct(rowA,colB, size); // computes the value
+
+ lc[i][j] = innerproduct; // store in dest mat
+ } // end of for j
+ }
+ } // end for i
+// }
+
+ atomic {
+ for (i = x0; i < x1; i++) {
+ for (j = 0; j < size; j++) {
+ mmul.c[i][j] = lc[i][j];
+ }
+ }
+ }
+ }
+
+ public double computeProduct(double[] rowA,double[] colB, int size)
+ {
+ int i;
+ double sum = 0;
+
+ for(i = 0 ;i < size; i++) {
+ sum += rowA[i] * colB[i];
+ }
+
+ return sum;
+ }
+
+ public void done(Object work) {
+ }
+
+ public static void main(String[] args) {
+ int NUM_THREADS=4;
+ int SIZE = 1600;
+ int increment = 80;
+ int i,j;
+ Work[] works;
+ MMul matrix;
+ MatrixMultiply mm;
+ Segment[] currentWorkList;
+
+ if (args.length == 3) {
+ NUM_THREADS = Integer.parseInt(args[0]);
+ SIZE = Integer.parseInt(args[1]);
+ increment = Integer.parseInt(args[2]); // size of subtask
+ }
+ else {
+ System.out.println("usage: ./MatrixMultiply.bin master <num_threads> <size of matrix> <size of subtask>");
+ }
+
+ int[] mid = new int[8];
+ mid[0] = (128<<24)|(195<<16)|(180<<8)|21; //dw-2
+ mid[1] = (128<<24)|(195<<16)|(180<<8)|26; //dw-7
+/* mid[2] = (128<<24)|(195<<16)|(180<<8)|26; //dw-7
+ mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dc1
+ mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dc2
+ mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dc3
+ mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc4
+ mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dc5
+ mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dc6
+ mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dc7
+ mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dc8
+*/
+ atomic {
+ matrix = global new MMul(SIZE, SIZE, SIZE);
+ matrix.setValues();
+ matrix.transpose();
+ mm = global new MatrixMultiply(matrix, NUM_THREADS, SIZE,increment);
+
+ works = global new Work[NUM_THREADS];
+ currentWorkList = global new Segment[NUM_THREADS];
+
+ for(i = 0; i < NUM_THREADS; i++) {
+ works[i] = global new Work(mm, NUM_THREADS, i,currentWorkList);
+ }
+ }
+
+ long st = System.currentTimeMillis();
+ long fi;
+
+ Work tmp;
+ for (i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = works[i];
+ }
+ Thread.myStart(tmp,mid[i]);
+ }
+
+ for (i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = works[i];
+ }
+ tmp.join();
+ }
+ fi = System.currentTimeMillis();
+
+ double sum= 0;
+ atomic {
+ sum = matrix.getSum();
+ }
+
+ System.out.println("Sum of matrix = " + sum);
+ System.out.println("Time Elapse = " + (double)((fi-st)/1000));
+ System.printString("Finished\n");
+ }
+
+ public void output() {
+ System.out.println("Sum = " + mmul.getSum());
+ }
+
+}
+
+public class MMul{
+ public int L, M, N;
+ public double[][] a;
+ public double[][] b;
+ public double[][] c;
+ public double[][] btranspose;
+
+ public MMul(int L, int M, int N) {
+ this.L = L;
+ this.M = M;
+ this.N = N;
+ a = global new double[L][M];
+ b = global new double[M][N];
+ c = global new double[L][N];
+ btranspose = global new double[N][M];
+ }
+
+ public void setValues() {
+ for(int i = 0; i < L; i++) {
+ double ai[] = a[i];
+ for(int j = 0; j < M; j++) {
+ ai[j] = j+1;
+ }
+ }
+
+ for(int i = 0; i < M; i++) {
+ double bi[] = b[i];
+ for(int j = 0; j < N; j++) {
+ bi[j] = j+1;
+ }
+ }
+
+ for(int i = 0; i < L; i++) {
+ double ci[] = c[i];
+ for(int j = 0; j < N; j++) {
+ ci[j] = 0;
+ }
+ }
+ for(int i = 0; i < N; i++) {
+ double btransposei[] = btranspose[i];
+ for(int j = 0; j < M; j++) {
+ btransposei[j] = 0;
+ }
+ }
+ }
+
+ public void transpose() {
+ for(int row = 0; row < M; row++) {
+ double brow[] = b[row];
+ for(int col = 0; col < N; col++) {
+ btranspose[col][row] = brow[col];
+ }
+ }
+ }
+
+ public double getSum() {
+ double sum =0;
+
+ for(int row =0; row < L; row++) {
+ double cr[] = c[row];
+ for(int col = 0; col < N; col++) {
+ sum += cr[col];
+ }
+ }
+ return sum;
+ }
+}
+
+public class Segment {
+ int x0;
+ int x1;
+
+ Segment (int x0, int x1) {
+ this.x0 = x0;
+ this.x1 = x1;
+ }
+}
+
--- /dev/null
+128.195.180.21
+128.195.180.26
+#128.195.136.162
+#128.195.136.163
+#128.195.136.164
+#128.195.136.165
+#128.195.136.166
+#128.195.136.167
+#128.195.136.168
+#128.195.136.169
+
--- /dev/null
+MAINCLASS=MatrixMultiply
+SRC1=${MAINCLASS}.java
+
+FLAGS= -recovery -recoverystats -dsm -32bit -nooptimize -debug -dsmtask -mainclass ${MAINCLASS}
+
+default:
+ ../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC1}
+
+clean:
+ rm -rf tmpbuilddirectory
+ rm *.bin
+++ /dev/null
-public class BufferedReader {
- FileInputStream fr;
- byte[] buffer;
- int offset;
- int end;
-
- public BufferedReader(FileInputStream fr) {
- this.fr=fr;
- this.buffer=new byte[2048];
- }
-
- public int read() {
- if (offset<end) {
- return buffer[offset++];
- } else {
- readBuffer();
- if (end<=0)
- return -1;
- return buffer[offset++];
- }
- }
-
- public int read(byte[] array) {
- int off=0;
- int arraylen=array.length;
- do {
- for(;offset<end;offset++) {
- if (off>=arraylen)
- return off;
- array[off++]=buffer[offset];
- }
- readBuffer();
- if (end==0)
- return off;
- if (end<0)
- return end;
- } while(true);
- }
-
- public void readBuffer() {
- offset=0;
- end=fr.read(buffer);
- }
-
- public String readLine() {
- String str=null;
- do {
- boolean foundcr=false;
- int index=offset;
- for(;index<end;index++) {
- if (buffer[index]=='\n'||buffer[index]==13) {
- foundcr=true;
- break;
- }
- }
- String buf=new String(buffer, offset, index-offset);
- if (str==null)
- str=buf;
- else
- str=str.concat(buf);
- if (foundcr) {
- offset=index++;
- do {
- for(;offset<end;offset++) {
- if (buffer[offset]!='\n'&&buffer[offset]!=13) {
- return str;
- }
- }
- readBuffer();
- if (end<=0)
- return str;
- } while(true);
- } else {
- readBuffer();
- if (end<=0)
- return null;
- }
- } while(true);
-
- }
-
- public void close() {
- fr.close();
- }
-
-}
\ No newline at end of file
+++ /dev/null
-public class DistributedHashMap {
- DistributedHashEntry[] table;
- float loadFactor;
-
- public DistributedHashMap(int initialCapacity, float loadFactor) {
- init(initialCapacity, loadFactor);
- }
-
- private void init(int initialCapacity, float loadFactor) {
- table=global new DistributedHashEntry[initialCapacity];
- this.loadFactor=loadFactor;
- }
-
- private static int hash1(int hashcode, int length) {
- int value=hashcode%length;
- if (value<0)
- return -value;
- else
- return value;
- }
-
- Object remove(Object key) {
- int hashcode=key.hashCode();
- int index1=hash1(hashcode, table.length);
- DistributedHashEntry dhe=table[index1];
- if (dhe==null)
- return null;
- DHashEntry ptr=dhe.array;
-
- if (ptr!=null) {
- if (ptr.hashval==hashcode&&ptr.key.equals(key)) {
- dhe.array=ptr.next;
- dhe.count--;
- return ptr.value;
- }
- while(ptr.next!=null) {
- if (ptr.hashval==hashcode&&ptr.next.key.equals(key)) {
- Object oldvalue=ptr.value;
- ptr.next=ptr.next.next;
- dhe.count--;
- return oldvalue;
- }
- ptr=ptr.next;
- }
- }
- return null;
- }
-
- Object get(Object key) {
- int hashcode=key.hashCode();
- int index1=hash1(hashcode, table.length);
-
- DistributedHashEntry dhe=table[index1];
- if (dhe==null)
- return null;
-
- DHashEntry ptr=dhe.array;
-
- while(ptr!=null) {
- if (ptr.hashval==hashcode
- &&ptr.key.equals(key)) {
- return ptr.value;
- }
- ptr=ptr.next;
- }
- return null;
- }
-
-
- Object getKey(Object key) {
- int hashcode=key.hashCode();
- int index1=hash1(hashcode, table.length);
-
- DistributedHashEntry dhe=table[index1];
- if (dhe==null)
- return null;
-
- DHashEntry ptr=dhe.array;
-
- while(ptr!=null) {
- if (ptr.hashval==hashcode
- &&ptr.key.equals(key)) {
- return ptr.key;
- }
- ptr=ptr.next;
- }
- return null;
- }
-
- boolean containsKey(Object key) {
- int hashcode=key.hashCode();
- int index1=hash1(hashcode, table.length);
- DistributedHashEntry dhe=table[index1];
- if (dhe==null)
- return false;
-
- DHashEntry ptr=dhe.array;
-
- while(ptr!=null) {
- if (ptr.hashval==hashcode
- &&ptr.key.equals(key)) {
- return true;
- }
- ptr=ptr.next;
- }
- return false;
- }
-
- Object put(Object key, Object value) {
- int hashcode=key.hashCode();
- int index1=hash1(hashcode, table.length);
- DistributedHashEntry dhe=table[index1];
- if (dhe==null) {
- dhe=global new DistributedHashEntry();
- table[index1]=dhe;
- }
- DHashEntry ptr=dhe.array;
-
- while(ptr!=null) {
- if (ptr.hashval==hashcode&&ptr.key.equals(key)) {
- Object oldvalue=ptr.value;
- ptr.value=value;
- return oldvalue;
- }
- ptr=ptr.next;
- }
-
- DHashEntry he=global new DHashEntry();
- he.value=value;
- he.key=key;
- he.hashval=hashcode;
- he.next=dhe.array;
- dhe.array=he;
-
- dhe.count++;
-
- return null;
- }
-}
-
-class DistributedHashEntry {
- int count;
- public DistributedHashEntry() {
- count=0;
- }
- DHashEntry array;
-}
-
-
-class DHashEntry {
- public DHashEntry() {
- }
- int hashval;
- Object key;
- Object value;
- DHashEntry next;
-}
+++ /dev/null
-public class EphemeralSignature {
-
- int serverSeed;
- String serverSeparator;
- Random rand;
-
- public EphemeralSignature() {
- Random rand = new Random(0);
- }
-
- public EphemeralSignature(int randomNumberSeed, String separator) {
- Random rand = new Random(randomNumberSeed);
- serverSeparator = separator;
- }
-
- public EphemeralSignature(String seedAndSeparator) {
- serverSeparator = seedAndSeparator;
- }
-
- public String computeSignature(String body) {
- MD5 md = new MD5();
- int len = body.length();
- byte buf[] = body.getBytes();
- byte sig[] = new byte[16];
-
- md.update(buf, len);
- md.md5final(sig);
- String signature = new String(sig);
-
- return signature;
- }
-
- /*
- public long DEKHash(String str)
- {
- long hash = str.length();
-
- for(int i = 0; i < str.length(); i++)
- {
- hash = ((hash << 5) ^ (hash >> 27)) ^ str.charAt(i);
- }
-
- return hash;
- }
- */
-
-}
+++ /dev/null
-/**
- * A FilterResult encapsulates the result of a filter made by checking a mail.
- **/
-public class FilterResult {
- /**
- * This value is used if type is ERROR or UNKNOWN.
- */
- public double NO_RESULT;
-
- /**
- * A result value greater or equal this value indicates that the filter has
- * decided on spam.
- */
- public int SPAM_THRESHOLD;
- public int ABSOLUTE_SPAM;
- public int ABSOLUTE_HAM;
-
- //public double result; // the result, a value between -1 (ham) and 1000 (spam),
- // negative values for "error", "unknown" etc.
-
- // -----------------------------------------------------------------------------
-
- public FilterResult(double result) {
- SPAM_THRESHOLD=50;
- ABSOLUTE_SPAM=100;
- ABSOLUTE_HAM=0;
- NO_RESULT=-1;
- //this.result = result;
- }
-
- public FilterResult() {
- SPAM_THRESHOLD=50;
- ABSOLUTE_SPAM=100;
- ABSOLUTE_HAM=0;
- NO_RESULT=-1;
- }
-
- public boolean getResult(int[] confidenceVals) {
- int[] res = new int[3]; //3 equals spam, ham and unknown
- for(int i=0; i<confidenceVals.length; i++) {
- if(confidenceVals[i] < 0)
- res[0]+=1; //unknown
- if(confidenceVals[i] >= 0 && confidenceVals[i] < SPAM_THRESHOLD)
- res[1]+=1; //ham
- if(confidenceVals[i] >= SPAM_THRESHOLD)
- res[2]+=1;//spam
- }
- int maxVotes=0;
- int max;
- for(int i=0; i<3;i++) {
- if(res[i] > maxVotes) {
- maxVotes = res[i];
- max = i;
- }
- }
- if(max==0)
- return false;
- if(max==1)
- return false;
- if(max==2)
- return true;
-
- System.out.println("Err: getResult() Control shouldn't come here, max= " + max);
- return false;
- }
-
- /*
- public void addProperty(String key, String value) {
- properties.put(key,value);
- }
-
- public String getProperty(String key) {
- return properties.get(key);
- }
-
- public HashMap<String,String> getProperties() {
- return properties;
- }
- */
-}
+++ /dev/null
-public class FilterStatistic {
- int unknown;
- int spam;
- int ham;
-
- // -------------------------------------------------------
-
- public FilterStatistic() {
- this.spam = 0;
- this.ham = 0;
- this.unknown = 0;
- }
-
- public FilterStatistic(int spam, int ham, int unknown) {
- this.spam = spam;
- this.ham = ham;
- this.unknown = unknown;
- }
-
- public int getChecked() {
- return getSpam() + getHam() + getUnknown();
- }
-
- public int getHam() {
- return ham;
- }
-
- public int getSpam() {
- return spam;
- }
-
- public void setHam(int i) {
- ham = i;
- }
-
- public void setSpam(int i) {
- spam = i;
- }
-
- public int getUnknown() {
- return unknown;
- }
-
- public void setUnknown(int u) {
- unknown = u;
- }
-
- public void increaseSpam() {
- setSpam(getSpam() + 1);
- }
-
- public void increaseHam() {
- setHam(getHam() + 1);
- }
-
- public void increaseUnknown() {
- setUnknown(getUnknown() + 1);
- }
-
- public String toString() {
- String str = "Filterstats_spam_"+spam;
- str += "_ham_" +ham;
- str += "_unknown_"+unknown;
- return str;
- }
-}
+++ /dev/null
-public class GString {
- public char value[];
- public int count;
- public int offset;
-
- public GString() {
- }
-
- public GString(char c) {
- char[] str = global new char[1];
- str[0] = c;
- GString(str);
- }
-
- public GString(String str) {
- value = global new char[str.count];
- for(int i =0; i< str.count;i++) {
- value[i] = str.value[i+str.offset];
- }
- count = str.count;
- offset = 0;
- }
-
- public GString(GString gstr) {
- this.value = gstr.value;
- this.count = gstr.count;
- this.offset = gstr.offset;
- }
-
- /*
- public GString(StringBuffer gsb) {
- value = global new char[gsb.length()];
- count = gsb.length();
- offset = 0;
- for (int i = 0; i < count; i++)
- value[i] = gsb.value[i];
- }
- */
-
- public GString(char str[]) {
- char charstr[]=new char[str.length];
- for(int i=0; i<str.length; i++)
- charstr[i]=str[i];
- this.value=charstr;
- this.count=str.length;
- this.offset=0;
- }
-
- public static char[] toLocalCharArray(GString str) {
- char[] c;
- int length;
-
- length = str.length();
-
- c = new char[length];
-
- for (int i = 0; i < length; i++) {
- c[i] = str.value[i+str.offset];
- }
- return c;
- }
-
- public String toLocalString() {
- return new String(toLocalCharArray(this));
- }
-
- public int length() {
- return count;
- }
-
- public int indexOf(int ch, int fromIndex) {
- for (int i = fromIndex; i < count; i++)
- if (this.charAt(i) == ch)
- return i;
- return -1;
- }
-
- public int lastindexOf(int ch) {
- return this.lastindexOf(ch, count - 1);
- }
-
- public int lastindexOf(int ch, int fromIndex) {
- for (int i = fromIndex; i > 0; i--)
- if (this.charAt(i) == ch)
- return i;
- return -1;
- }
-
- public char charAt(int i) {
- return value[i+offset];
- }
-
- public int indexOf(String str) {
- return this.indexOf(str, 0);
- }
-
- public int indexOf(String str, int fromIndex) {
- if (fromIndex < 0)
- fromIndex = 0;
- for (int i = fromIndex; i <= (count-str.count); i++)
- if (regionMatches(i, str, 0, str.count))
- return i;
- return -1;
- }
-
- public boolean regionMatches(int toffset, String other, int ooffset, int len) {
- if (toffset < 0 || ooffset < 0 || (toffset+len) > count || (ooffset+len) > other.count)
- return false;
-
- for (int i = 0; i < len; i++) {
- if (other.value[i+other.offset+ooffset] != this.value[i+this.offset+toffset])
- return false;
- }
- return true;
- }
-
- public String subString(int beginIndex, int endIndex) {
- return substring(beginIndex, endIndex);
- }
-
- public String substring(int beginIndex, int endIndex) {
- String str;
- str = global new String();
- str.value = this.value;
- str.count = endIndex-beginIndex;
- str.offset = this.offset + beginIndex;
- return str;
- }
-
- public static String valueOf(Object o) {
- if (o==null)
- return "null";
- else
- return o.toString();
- }
-
- public String toLocalString() {
- return new String(toLocalCharArray(this));
- }
-
- public static char[] toLocalCharArray(GString str) {
- char[] c;
- int length;
- length = str.length();
- c = new char[length];
- for (int i = 0; i < length; i++) {
- c[i] = str.value[i+str.offset];
- }
- return c;
- }
-
- public int hashCode() {
- String s = this.toLocalString();
- return s.hashCode();
- }
-
- public boolean equals(Object o) {
- if(o == null)
- return false;
- if(!(o instanceof GString))
- return false;
- GString gs = (GString)o;
- String s1 = gs.toLocalString();
- String s2 = this.toLocalString();
- if(s2.equals(s1))
- return true;
- return false;
- }
-}
+++ /dev/null
-public class HashEntry {
- public GString engine;
- public GString signature;
- public HashStat stats;
-
- public HashEntry() {
-
- }
-
- /**
- * hashCode that combines two strings using xor.
- * @return a hash code value on the entire object.
- */
- public int hashCode() {
- int result=0;
- // this will not work well if some of the strings are equal.
- result = engine.hashCode();
- result ^= signature.hashCode();
- //result ^= stats.hashCode();
- //System.out.println("HashEntry: hashCode= " + result);
- return result;
- }
-
- public void setengine(GString engine) {
- this.engine=engine;
- }
-
- public void setstats(HashStat stats) {
- this.stats=stats;
- }
-
- public void setsig(GString signature) {
- this.signature=signature;
- }
-
- public GString getEngine() {
- return engine;
- }
-
- public GString getSignature() {
- return signature;
- }
-
- public HashStat getStats() {
- return stats;
- }
-
- public boolean equals(Object o) {
- HashEntry he = (HashEntry)o;
- if(!(he.getEngine().equals(engine)))
- return false;
- if(!(he.getSignature().equals(signature)))
- return false;
- //if(!(he.getStats().equals(stats)))
- // return false;
- return true;
- }
-
- public int askForSpam() {
- int[] users = stats.getUsers();
- int spamConfidence=0;
- for(int i=0; i<users.length; i++) {
- int userid = users[i];
- spamConfidence += stats.userstat[userid].getChecked();
- }
- return spamConfidence;
- }
-}
+++ /dev/null
-public class HashStat {
- int[] userid;
- FilterStatistic[] userstat;
- int[] listofusers;
- public HashStat() {
- userid = global new int[8]; //max users for our system=8
- userstat = global new FilterStatistic[8];
- for(int i=0; i<8; i++) {
- userstat[i] = global new FilterStatistic();
- }
- }
-
- public void setuser(int id, int spam, int ham, int unknown) {
- userid[id] = 1;
- userstat[id].setSpam(spam);
- userstat[id].setHam(ham);
- userstat[id].setUnknown(unknown);
- }
-
- public void setuserid(int id) {
- userid[id] = 1;
- }
-
- public int getuser(int id) {
- return userid[id];
- }
-
- public int getspamcount(int userid) {
- return userstat[userid].getSpam();
- }
-
- public int gethamcount(int userid) {
- return userstat[userid].getHam();
- }
-
- public int getunknowncount(int userid) {
- return userstat[userid].getUnknown();
- }
-
- public void incSpamCount(int userid) {
- userstat[userid].increaseSpam();
- }
-
- public void incHamCount(int userid) {
- userstat[userid].increaseHam();
- }
-
- public int[] getUsers() {
- int nusers = numUsers();
- listofusers = global new int[nusers];
- int j=0;
- for(int i=0; i<8; i++) {
- if(userid[i] == 1) {
- listofusers[j]=i;
- j++;
- }
- }
- return listofusers;
- }
-
- public int numUsers() {
- int count=0;
- for(int i=0; i<8; i++) {
- if(userid[i] == 1) {
- count++;
- }
- }
- return count;
- }
-}
+++ /dev/null
-
-// This class computes MD5 hashes.
-// Manually translated by Jon Howell <jonh@cs.dartmouth.edu>
-// from some public domain C code (md5.c) included with the ssh-1.2.22 source.
-// Tue Jan 19 15:55:50 EST 1999
-// $Id: MD5.java,v 1.1 2010/01/31 10:04:47 jihoonl Exp $
-//
-// To compute the message digest of a chunk of bytes, create an
-// MD5 object 'md5', call md5.update() as needed on buffers full
-// of bytes, and then call md5.md5final(), which
-// will fill a supplied 16-byte array with the digest.
-//
-// A main() method is included that hashes the data on System.in.
-//
-// It seems to run around 25-30 times slower (JDK1.1.6) than optimized C
-// (gcc -O4, version 2.7.2.3). Measured on a Sun Ultra 5 (SPARC 270MHz).
-//
-// Comments from md5.c from ssh-1.2.22, the basis for this code:
-//
-/* This code has been heavily hacked by Tatu Ylonen <ylo@cs.hut.fi> to
- make it compile on machines like Cray that don't have a 32 bit integer
- type. */
-/*
- * This code implements the MD5 message-digest algorithm.
- * The algorithm is due to Ron Rivest. This code was
- * written by Colin Plumb in 1993, no copyright is claimed.
- * This code is in the public domain; do with it what you wish.
- *
- * Equivalent code is available from RSA Data Security, Inc.
- * This code has been tested against that, and is equivalent,
- * except that you don't need to include two pages of legalese
- * with every copy.
- *
- * To compute the message digest of a chunk of bytes, declare an
- * MD5Context structure, pass it to MD5Init, call MD5Update as
- * needed on buffers full of bytes, and then call MD5Final, which
- * will fill a supplied 16-byte array with the digest.
- */
-
-public class MD5 {
- int buf[]; // These were originally unsigned ints.
- // This Java code makes an effort to avoid sign traps.
- // buf[] is where the hash accumulates.
- long bits; // This is the count of bits hashed so far.
- byte in[]; // This is a buffer where we stash bytes until we have
- // enough (64) to perform a transform operation.
- int inint[];
- // inint[] used and discarded inside transform(),
- // but why allocate it over and over?
- // (In the C version this is allocated on the stack.)
-
- public MD5() {
- buf = new int[4];
- // fill the hash accumulator with a seed value
- buf[0] = 0x67452301;
- buf[1] = 0xefcdab89;
- buf[2] = 0x98badcfe;
- buf[3] = 0x10325476;
-
- // initially, we've hashed zero bits
- bits = 0L;
-
- in = new byte[64];
- inint = new int[16];
- }
-
- public void update(byte[] newbuf) {
- update(newbuf, 0, newbuf.length);
- }
-
- public void update(byte[] newbuf, int length) {
- update(newbuf, 0, length);
- }
-
- public void update(byte[] newbuf, int bufstart, int buflen) {
- int t;
- int len = buflen;
-
- // shash old bits value for the "Bytes already in" computation
- // just below.
- t = (int) bits; // (int) cast should just drop high bits, I hope
-
- /* update bitcount */
- /* the C code used two 32-bit ints separately, and carefully
- * ensured that the carry carried.
- * Java has a 64-bit long, which is just what the code really wants.
- */
- bits += (long)(len<<3);
-
- t = (t >>> 3) & 0x3f; /* Bytes already in this->in */
-
- /* Handle any leading odd-sized chunks */
- /* (that is, any left-over chunk left by last update() */
-
- if (t!=0) {
- int p = t;
- t = 64 - t;
- if (len < t) {
- arraycopy(newbuf, bufstart, in, p, len);
- return;
- }
- arraycopy(newbuf, bufstart, in, p, t);
- transform();
- bufstart += t;
- len -= t;
- }
-
- /* Process data in 64-byte chunks */
- while (len >= 64) {
- arraycopy(newbuf, bufstart, in, 0, 64);
- transform();
- bufstart += 64;
- len -= 64;
- }
-
- /* Handle any remaining bytes of data. */
- /* that is, stash them for the next update(). */
- arraycopy(newbuf, bufstart, in, 0, len);
- }
-
- public void arraycopy(byte[] src, int srcPos, byte[] dest, int destPos, int len) {
- for (int i = 0; i < len; i++) {
- dest[destPos+i] = src[srcPos+i];
- }
- return;
- }
-
- /*
- * Final wrapup - pad to 64-byte boundary with the bit pattern
- * 1 0* (64-bit count of bits processed, MSB-first)
- */
- public void md5final(byte[] digest) {
- /* "final" is a poor method name in Java. :v) */
- int count;
- int p; // in original code, this is a pointer; in this java code
- // it's an index into the array this->in.
-
- /* Compute number of bytes mod 64 */
- count = (int) ((bits >>> 3) & 0x3F);
-
- /* Set the first char of padding to 0x80. This is safe since there is
- always at least one byte free */
- p = count;
- in[p++] = (byte) 0x80;
-
- /* Bytes of padding needed to make 64 bytes */
- count = 64 - 1 - count;
-
- /* Pad out to 56 mod 64 */
- if (count < 8) {
- /* Two lots of padding: Pad the first block to 64 bytes */
- zeroByteArray(in, p, count);
- transform();
-
- /* Now fill the next block with 56 bytes */
- zeroByteArray(in, 0, 56);
- } else {
- /* Pad block to 56 bytes */
- zeroByteArray(in, p, count - 8);
- }
-
- /* Append length in bits and transform */
- // Could use a PUT_64BIT... func here. This is a fairly
- // direct translation from the C code, where bits was an array
- // of two 32-bit ints.
- int lowbits = (int) bits;
- int highbits = (int) (bits >>> 32);
- PUT_32BIT_LSB_FIRST(in, 56, lowbits);
- PUT_32BIT_LSB_FIRST(in, 60, highbits);
-
- transform();
- PUT_32BIT_LSB_FIRST(digest, 0, buf[0]);
- PUT_32BIT_LSB_FIRST(digest, 4, buf[1]);
- PUT_32BIT_LSB_FIRST(digest, 8, buf[2]);
- PUT_32BIT_LSB_FIRST(digest, 12, buf[3]);
-
- /* zero sensitive data */
- /* notice this misses any sneaking out on the stack. The C
- * version uses registers in some spots, perhaps because
- * they care about this.
- */
- zeroByteArray(in);
- zeroIntArray(buf);
- bits = 0;
- zeroIntArray(inint);
- }
-
- /*
- public static void main(String args[]) {
- // This main() method was created to easily test
- // this class. It hashes whatever's on System.in.
-
- byte buf[] = new byte[397];
- // arbitrary buffer length designed to irritate update()
- int rc;
- MD5 md = new MD5();
- byte out[] = new byte[16];
- int i;
- int len = 0;
-
- try {
- while ((rc = System.in.read(buf, 0, 397)) > 0) {
- md.update(buf, rc);
- len += rc;
- }
- } catch (IOException ex) {
- ex.printStackTrace();
- return;
- }
- md.md5final(out);
-
- System.out.println("file length: "+len);
- System.out.println("hash: "+dumpBytes(out));
- }
- */
-
-
- /////////////////////////////////////////////////////////////////////
- // Below here ye will only finde private functions //
- /////////////////////////////////////////////////////////////////////
-
- // There must be a way to do these functions that's
- // built into Java, and I just haven't noticed it yet.
-
- private void zeroByteArray(byte[] a) {
- zeroByteArray(a, 0, a.length);
- }
-
- private void zeroByteArray(byte[] a, int start, int length) {
- setByteArray(a, (byte) 0, start, length);
- }
-
- private void setByteArray(byte[] a, byte val, int start, int length) {
- int i;
- int end = start+length;
- for (i=start; i<end; i++) {
- a[i] = val;
- }
- }
-
- private void zeroIntArray(int[] a) {
- zeroIntArray(a, 0, a.length);
- }
-
- private void zeroIntArray(int[] a, int start, int length) {
- setIntArray(a, (int) 0, start, length);
- }
-
- private void setIntArray(int[] a, int val, int start, int length) {
- int i;
- int end = start+length;
- for (i=start; i<end; i++) {
- a[i] = val;
- }
- }
-
- private int MD5STEP1(int w, int x, int y, int z, int data, int s) {
- w += (z ^ (x & (y ^ z))) + data;
- w = w<<s | w>>>(32-s);
- w += x;
- return w;
- }
-
- private int MD5STEP2(int w, int x, int y, int z, int data, int s) {
- w += (y ^ (z & (x ^ y))) + data;
- w = w<<s | w>>>(32-s);
- w += x;
- return w;
- }
-
- private int MD5STEP3(int w, int x, int y, int z, int data, int s) {
- w += (x ^ y ^ z) + data;
- w = w<<s | w>>>(32-s);
- w += x;
- return w;
- }
-
- private int MD5STEP4(int w, int x, int y, int z, int data, int s) {
- w += (y ^ (x | ~z)) + data;
- w = w<<s | w>>>(32-s);
- w += x;
- return w;
- }
-
- private void transform() {
- /* load in[] byte array into an internal int array */
- int i;
- int[] inint = new int[16];
-
- for (i=0; i<16; i++) {
- inint[i] = GET_32BIT_LSB_FIRST(in, 4*i);
- }
-
- int a, b, c, d;
- a = buf[0];
- b = buf[1];
- c = buf[2];
- d = buf[3];
-
- a = MD5STEP1(a, b, c, d, inint[0] + 0xd76aa478, 7);
- d = MD5STEP1(d, a, b, c, inint[1] + 0xe8c7b756, 12);
- c = MD5STEP1(c, d, a, b, inint[2] + 0x242070db, 17);
- b = MD5STEP1(b, c, d, a, inint[3] + 0xc1bdceee, 22);
- a = MD5STEP1(a, b, c, d, inint[4] + 0xf57c0faf, 7);
- d = MD5STEP1(d, a, b, c, inint[5] + 0x4787c62a, 12);
- c = MD5STEP1(c, d, a, b, inint[6] + 0xa8304613, 17);
- b = MD5STEP1(b, c, d, a, inint[7] + 0xfd469501, 22);
- a = MD5STEP1(a, b, c, d, inint[8] + 0x698098d8, 7);
- d = MD5STEP1(d, a, b, c, inint[9] + 0x8b44f7af, 12);
- c = MD5STEP1(c, d, a, b, inint[10] + 0xffff5bb1, 17);
- b = MD5STEP1(b, c, d, a, inint[11] + 0x895cd7be, 22);
- a = MD5STEP1(a, b, c, d, inint[12] + 0x6b901122, 7);
- d = MD5STEP1(d, a, b, c, inint[13] + 0xfd987193, 12);
- c = MD5STEP1(c, d, a, b, inint[14] + 0xa679438e, 17);
- b = MD5STEP1(b, c, d, a, inint[15] + 0x49b40821, 22);
-
- a = MD5STEP2(a, b, c, d, inint[1] + 0xf61e2562, 5);
- d = MD5STEP2(d, a, b, c, inint[6] + 0xc040b340, 9);
- c = MD5STEP2(c, d, a, b, inint[11] + 0x265e5a51, 14);
- b = MD5STEP2(b, c, d, a, inint[0] + 0xe9b6c7aa, 20);
- a = MD5STEP2(a, b, c, d, inint[5] + 0xd62f105d, 5);
- d = MD5STEP2(d, a, b, c, inint[10] + 0x02441453, 9);
- c = MD5STEP2(c, d, a, b, inint[15] + 0xd8a1e681, 14);
- b = MD5STEP2(b, c, d, a, inint[4] + 0xe7d3fbc8, 20);
- a = MD5STEP2(a, b, c, d, inint[9] + 0x21e1cde6, 5);
- d = MD5STEP2(d, a, b, c, inint[14] + 0xc33707d6, 9);
- c = MD5STEP2(c, d, a, b, inint[3] + 0xf4d50d87, 14);
- b = MD5STEP2(b, c, d, a, inint[8] + 0x455a14ed, 20);
- a = MD5STEP2(a, b, c, d, inint[13] + 0xa9e3e905, 5);
- d = MD5STEP2(d, a, b, c, inint[2] + 0xfcefa3f8, 9);
- c = MD5STEP2(c, d, a, b, inint[7] + 0x676f02d9, 14);
- b = MD5STEP2(b, c, d, a, inint[12] + 0x8d2a4c8a, 20);
-
- a = MD5STEP3(a, b, c, d, inint[5] + 0xfffa3942, 4);
- d = MD5STEP3(d, a, b, c, inint[8] + 0x8771f681, 11);
- c = MD5STEP3(c, d, a, b, inint[11] + 0x6d9d6122, 16);
- b = MD5STEP3(b, c, d, a, inint[14] + 0xfde5380c, 23);
- a = MD5STEP3(a, b, c, d, inint[1] + 0xa4beea44, 4);
- d = MD5STEP3(d, a, b, c, inint[4] + 0x4bdecfa9, 11);
- c = MD5STEP3(c, d, a, b, inint[7] + 0xf6bb4b60, 16);
- b = MD5STEP3(b, c, d, a, inint[10] + 0xbebfbc70, 23);
- a = MD5STEP3(a, b, c, d, inint[13] + 0x289b7ec6, 4);
- d = MD5STEP3(d, a, b, c, inint[0] + 0xeaa127fa, 11);
- c = MD5STEP3(c, d, a, b, inint[3] + 0xd4ef3085, 16);
- b = MD5STEP3(b, c, d, a, inint[6] + 0x04881d05, 23);
- a = MD5STEP3(a, b, c, d, inint[9] + 0xd9d4d039, 4);
- d = MD5STEP3(d, a, b, c, inint[12] + 0xe6db99e5, 11);
- c = MD5STEP3(c, d, a, b, inint[15] + 0x1fa27cf8, 16);
- b = MD5STEP3(b, c, d, a, inint[2] + 0xc4ac5665, 23);
-
- a = MD5STEP4(a, b, c, d, inint[0] + 0xf4292244, 6);
- d = MD5STEP4(d, a, b, c, inint[7] + 0x432aff97, 10);
- c = MD5STEP4(c, d, a, b, inint[14] + 0xab9423a7, 15);
- b = MD5STEP4(b, c, d, a, inint[5] + 0xfc93a039, 21);
- a = MD5STEP4(a, b, c, d, inint[12] + 0x655b59c3, 6);
- d = MD5STEP4(d, a, b, c, inint[3] + 0x8f0ccc92, 10);
- c = MD5STEP4(c, d, a, b, inint[10] + 0xffeff47d, 15);
- b = MD5STEP4(b, c, d, a, inint[1] + 0x85845dd1, 21);
- a = MD5STEP4(a, b, c, d, inint[8] + 0x6fa87e4f, 6);
- d = MD5STEP4(d, a, b, c, inint[15] + 0xfe2ce6e0, 10);
- c = MD5STEP4(c, d, a, b, inint[6] + 0xa3014314, 15);
- b = MD5STEP4(b, c, d, a, inint[13] + 0x4e0811a1, 21);
- a = MD5STEP4(a, b, c, d, inint[4] + 0xf7537e82, 6);
- d = MD5STEP4(d, a, b, c, inint[11] + 0xbd3af235, 10);
- c = MD5STEP4(c, d, a, b, inint[2] + 0x2ad7d2bb, 15);
- b = MD5STEP4(b, c, d, a, inint[9] + 0xeb86d391, 21);
-
- buf[0] += a;
- buf[1] += b;
- buf[2] += c;
- buf[3] += d;
- }
-
- private int GET_32BIT_LSB_FIRST(byte[] b, int off) {
- return
- ((int)(b[off+0]&0xff)) |
- ((int)(b[off+1]&0xff) << 8) |
- ((int)(b[off+2]&0xff) << 16) |
- ((int)(b[off+3]&0xff) << 24);
- }
-
- private void PUT_32BIT_LSB_FIRST(byte[] b, int off, int value) {
- b[off+0] = (byte) (value & 0xff);
- b[off+1] = (byte) ((value >> 8) & 0xff);
- b[off+2] = (byte) ((value >> 16)& 0xff);
- b[off+3] = (byte) ((value >> 24)& 0xff);
- }
-
- // These are debug routines I was using while trying to
- // get this code to generate the same hashes as the C version.
- // (IIRC, all the errors were due to the absence of unsigned
- // ints in Java.)
- /*
- private void debugStatus(String m) {
- System.out.println(m+":");
- System.out.println("in: "+dumpBytes(in));
- System.out.println("bits: "+bits);
- System.out.println("buf: "
- +Integer.toHexString(buf[0])+" "
- +Integer.toHexString(buf[1])+" "
- +Integer.toHexString(buf[2])+" "
- +Integer.toHexString(buf[3]));
- }
-
- private static String dumpBytes(byte[] bytes) {
- int i;
- StringBuffer sb = new StringBuffer();
- for (i=0; i<bytes.length; i++) {
- if (i%32 == 0 && i!=0) {
- sb.append("\n");
- }
- String s = Integer.toHexString(bytes[i]);
- if (s.length() < 2) {
- s = "0"+s;
- }
- if (s.length() > 2) {
- s = s.substring(s.length()-2);
- }
- sb.append(s);
- }
- return sb.toString();
- }
- */
-}
+++ /dev/null
-/**
- * This class is a container for all data contained in an Email Message.
- **/
-public class Mail {
-
- String header; // the full header
- //String sentOn; // time the message was sent
- //String receivedOn; // time when the message arrived
- String from; // the "from" field
- String to; // the "to" field
- String cc;
- String subject;
- String body;
- String noURLBody;
- String sourceCode;
- String spam;
- boolean hasAttachement;
- String encoding; //rich text, plain, html
-
- String messageID; // cached message ID for reuse (takes a lot of memory and is used all over the place)
- //same as hashcode of a class
- boolean isSpam;
-
- /**
- * this is a really simple implementation of a tokenizer
- * used to build tokens from an email and divide email into parts
- **/
- int MAX_TOKEN_SIZE;
-
- public Mail() {
- messageID=null;
- }
-
- public Mail(String fileName) // read a mail from file
- {
- //System.out.println("DEBUG: fileName= " + fileName);
-
- BufferedReader fileinput = new BufferedReader(new FileInputStream(fileName));
- String line;
- boolean chk = false;
-
- while((line = fileinput.readLine()) != null)
- {
- chk = true;
-
- Vector splittedLine = line.split();
- if(((String)(splittedLine.elementAt(0))).equals("Spam:"))
- {
- spam = (String)(splittedLine.elementAt(1));
- }
- else if(((String)(splittedLine.elementAt(0))).equals("Header:")) // message id
- {
- header = (String)splittedLine.elementAt(1);
- }
- else if(((String)(splittedLine.elementAt(0))).equals("To:")) // receiver
- {
- to = (String)splittedLine.elementAt(1);
- }
- else if(((String)(splittedLine.elementAt(0))).equals("From:")) // sender
- {
- from = (String)splittedLine.elementAt(1);
- }
- else if(((String)(splittedLine.elementAt(0))).equals("Cc:")) // cc
- {
- cc = (String)splittedLine.elementAt(1);
- }
- else if(((String)(splittedLine.elementAt(0))).equals("Subject:")) // Subject
- {
- subject = (String)splittedLine.elementAt(1);
- break;
- }
- } // parsed messageID, To, from, cc, Title
-
- /**
- * error checking
- **/
- if(!chk)
- System.out.println("no line read");
-
-
- body = new String();
- byte[] readBody = new byte[256];
-
- while((fileinput.read(readBody)>0))
- {
- body += new String(readBody);
- readBody = new byte[256];
- }
-
- fileinput.close();
-
- MAX_TOKEN_SIZE = 1024;
- }
-
- // -------------------------------------------------------
-
- public void setHeader(String header) {
- this.header = header;
- }
-
- public String getHeader() {
- return header;
- }
-
-
- /*
- public void setSentOn(String sentOn) {
- this.sentOn = sentOn;
- }
-
- public String getSentOn() {
- return sentOn;
- }
-
- public Date getSentOnAsDate() {
- String sentOn = getSentOn();
- return parseDate(sentOn);
- }
-
- public void setReceivedOn(String receivedOn) {
- this.receivedOn = receivedOn;
- }
-
- public String getReceivedOn() {
- return receivedOn;
- }
-
- public Date getReceivedOnAsDate() {
- String receivedOn = getReceivedOn();
- return parseDate(receivedOn);
- }
- */
-
-
- /**
- * Parses a given Date-String in into a real Date-Object
- *
- * @param stringDate the string in format dd.mm.yyyy hh:mm
- * @return a Date containing the info of the string or the actual date and time if something fails.
- */
- /*
- public Date parseDate(String stringDate) {
- // date is in this format: dd.mm.yyyy hh:mm
- if (stringDate == null || "N/A".equals(stringDate)) {
- return new Date();
- }
- try {
- synchronized (MAIL_TIME_FORMAT) {
- return MAIL_TIME_FORMAT.parse(stringDate);
- }
- } catch (Throwable e) {
- return new Date();
- }
- }
- */
-
- public void setFrom(String from) {
- this.from = from;
- }
-
- public String getFrom() {
- return from;
- }
-
- public void setTo(String to) {
- this.to = to;
- }
-
- public String getTo() {
- return to;
- }
-
- public void setCc(String cc) {
- this.cc = cc;
- }
-
- public String getCc() {
- return cc;
- }
-
- public void setSubject(String subject) {
- this.subject = subject;
- }
-
- public String getSubject() {
- return subject;
- }
-
- public void setBody(String body) {
- this.body = body;
- }
-
- public String getBody() {
- return body;
- }
-
- public void setSourceCode(String sourceCode) {
- this.sourceCode = sourceCode;
- }
-
- public String getSourceCode() {
- return sourceCode;
- }
-
- public void setHasAttachement(boolean hasAttachement) {
- this.hasAttachement = hasAttachement;
- }
-
- public boolean getHasAttachement() {
- return hasAttachement;
- }
-
- public void setEncoding(String encoding) {
- this.encoding = encoding;
- }
-
- public String getEncoding() {
- return encoding;
- }
-
- public boolean isTextEncoding() {
- return getEncoding().toLowerCase().indexOf("plain") >= 0;
- }
-
- public boolean isHTMLEncoding() {
- return getEncoding().toLowerCase().indexOf("html") >= 0;
- }
-
- /*
- public String toString() {
- return getBody() + "," + getCc() + "," + getEncoding() + "," + getFrom() + "," + getHasAttachement() + "," + getHeader() + "," + getReceivedOn() + "," + getSentOn() + "," + getSourceCode() + "," + getSubject() + "," + getTo();
- }
- */
-
- public String toString() {
- return getBody() + "," + getCc() + "," + getEncoding() + "," + getFrom() + "," + getHasAttachement() + "," + getHeader() + "," + getSourceCode() + "," + getSubject() + "," + getTo();
- }
-
- /*
- public String getID() {
- if (messageID == null) { // no cached version
- // Take the message-ID header as ID (if present)
- String[] messageIDs = getHeaderField("Message-ID");
- if ((messageIDs != null) && (messageIDs.length > 0)) {
- messageID = messageIDs[0];
- } else { // otherwise, hash header and body as ID
- return String.valueOf(getHeader().hashCode() + getBody().hashCode());
- }
- }
-
- return messageID;
- }
- */
-
- public String[] getHeaderField(String fieldName) {
-
- }
-
- public String extractEMailAddress() {
-
- }
-
- /*
- public boolean equals(Object o) {
- if (o instanceof Mail) {
- Mail mail = (Mail)o;
- return this.getID().equals(mail.getID());
- }
-
- return false;
- }
- */
-
- public Vector getCommonPart()
- {
- Vector returnStrings = new Vector();
-
- // add header, sender, and title
- returnStrings.addElement(header);
- returnStrings.addElement(from);
- returnStrings.addElement(subject);
-
- return returnStrings;
- }
-
- public String getBodyString()
- {
- return body;
- }
-
- public Vector returnEmail() {
- Vector myemail = new Vector();
- myemail.addElement(getCommonPart());
- //System.out.println("DEBUG: getCommonPart.size= " + getCommonPart().size());
- myemail.addElement(getURLs());
- //System.out.println("DEBUG: getURLs.size= " + getURLs().size());
- myemail.addElement(getSplittedBody(MAX_TOKEN_SIZE));
- //System.out.println("DEBUG: getSplittedBody.size= " + getSplittedBody(MAX_TOKEN_SIZE).size());
- return myemail;
- }
-
- public Vector getURLs()
- {
- Vector returnStrings = new Vector();
- Vector splittedBody = body.split();
-
- // add URL and email in the body
- for(int i=0; i<splittedBody.size(); i++)
- {
- String segment = (String)(splittedBody.elementAt(i));
- if(segment.startsWith("http://")) // URL
- {
- returnStrings.addElement(segment);
- }
- else if(isEmailAccount(segment)) // email
- {
- returnStrings.addElement(segment);
- }
- }
-
- return returnStrings;
- }
-
- // check if it is email account string
- private boolean isEmailAccount(String str)
- {
- if(str.contains("@") && str.contains("."))
- return true;
- else
- return false;
- }
-
- public void setNoURLBody()
- {
- Vector splittedBody = body.split();
- int totalsize=0;
- for(int i=0; i< splittedBody.size();i ++) {
- String segment = (String)(splittedBody.elementAt(i));
- if(!(segment.startsWith("http://") || isEmailAccount(segment)))
- totalsize+=segment.length();
- }
-
- StringBuffer sb=new StringBuffer(totalsize);
- for(int i=0; i< splittedBody.size();i ++) {
- String segment = (String)(splittedBody.elementAt(i));
- if(!(segment.startsWith("http://") || isEmailAccount(segment))) {
- sb.append(segment);
- }
- }
- noURLBody=sb.toString();
- }
-
- // setNoURLBody method has to be called before this method
- // parameter : bytesize to split.
- public Vector getSplittedBody(int size)
- {
- setNoURLBody();
- Vector returnStrings = new Vector();
- int end=noURLBody.length();
-
- for(int i=1; i< end; i+=size)
- {
- if((i+size)>=end) {
- String str=noURLBody.substring(i, end);
- returnStrings.addElement(str);
- }
- else {
- String str=noURLBody.substring(i, i+size);
- returnStrings.addElement(str);
- }
- }
- return returnStrings;
- }
-
-
- public void setIsSpam(boolean spam) {
- isSpam = spam;
- }
-
- public boolean getIsSpam() {
- if(spam.equals("yes"))
- return true;
- return false;
- }
-
- /**
- * Returns result to the Spam filter
- **/
- public Vector checkMail(int userid) {
- //Preprocess emails
-
- //long startGetParts=System.currentTimeMillis();
- Vector partsOfMailStrings = returnEmail();
- //long stopGetParts=System.currentTimeMillis();
- //System.out.println("Time to read email= " + (stopGetParts-startGetParts));
-
- //Compute signatures
- SignatureComputer sigComp = new SignatureComputer();
- //Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings
- //long startGetsignatures=System.currentTimeMillis();
- Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of vector of strings
- //long stopGetsignatures=System.currentTimeMillis();
- //System.out.println("Time to Getsignatures= " + (stopGetsignatures-startGetsignatures));
-
- return signatures;
- }
-
- /* For tests only */
- /*
- public static void main(String[] args)
- {
- Mail mail = new Mail("./emails/email1");
-
- String[] a = mail.createMailStrings();
-
- for(String b : a)
- {
- System.out.println(b);
- }
- }
- */
-}
+++ /dev/null
-public class SignatureComputer {
- public EphemeralSignature sig4; //signature engines
- public WhiplashSignature sig8; //signature engines
-
- int[] enginesToUseForCheck;
-
- public SignatureComputer() {
- sig4 = new EphemeralSignature(); //default values
- sig8 = new WhiplashSignature();
- createEnginesToCheck();
- }
-
- /**
- * constructor to be used when some parsing has already taken place with the
- * server-provides value <code>randomNumberSeed</code>.
- *
- * @param randomNumberSeed
- * a non-negative number used for seeding the random number generator
- * before starting to hash values.
- * @param separator
- * how the mail-text should be splitted into lines. (== what chars
- * separate 2 lines)
- */
- public SignatureComputer(int randomNumberSeed, String separator) {
- sig4 = new EphemeralSignature(randomNumberSeed,separator);
- sig8 = new WhiplashSignature();
- createEnginesToCheck();
- }
-
- /**
- * the constructor to be used most of the time. you can hand over the
- * seed-string exactly as it is provided by the razor-server.
- *
- * @param seedAndSeparator
- * a string containing the seed value for the RNG and a separator list
- * (separated by ' <b>- </b>'). default value is
- * <code>"7542-10"</code> which means server-seed 7542 and only one
- * separator 10 (which is ascii '\n').
- */
- public SignatureComputer(String seedAndSeparator) {
- sig4 = new EphemeralSignature(seedAndSeparator);
- sig8 = new WhiplashSignature();
- createEnginesToCheck();
- }
-
- /**
- *
- */
- public void createEnginesToCheck() {
- enginesToUseForCheck = new int[2];
- enginesToUseForCheck[0] = 4; //Ephemeral engine
- enginesToUseForCheck[1] = 8;//Whiplash engine
- }
-
- public boolean isSigSupported(int sig) {
- boolean found = false;
- for (int i = 0; i < enginesToUseForCheck.length && !found; i++) {
- if (enginesToUseForCheck[i] == sig) {
- found = true;
- }
- }
- return found;
- }
-
- public boolean isSigSupported(String sig) {
- return (sig != null && isSigSupported(Integer.parseInt(sig)));
- }
-
- public String getDefaultEngine() {
- return "4";
- }
-
- public Vector computeSigs(Vector EmailParts) {
- if (EmailParts == null) return null;
-
- Vector printableSigs = new Vector(); // vector of strings
-
- /**
- * Step -I
- * Get signatures for the common parts
- **/
-
- Vector commonpart = (Vector) (EmailParts.elementAt(0));
- for (int mailIndex = 0; mailIndex < commonpart.size(); mailIndex++) {
- String mail = (String) (commonpart.elementAt(mailIndex));
-
- if (mail == null) continue;
-
- /*
- * Compute Sig for email header that are cleaned.
- */
- for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
- int engineNo = enginesToUseForCheck[engineIndex];
- String sig = null;
-
- /* EphemeralSignature calculator */
- if(engineNo==4) {
- sig = computeSignature(engineNo,mail);
- }
-
- if(engineNo==8) {
- continue;
- }
-
- if((engineNo!=4)) {
- System.out.println("Err: Common part Couldn't find the signature engine: " + engineNo);
- }
-
- if (sig != null) {
- String hash = engineNo + ":" + sig;
- printableSigs.addElement(hash);
-
- //System.out.println("DEBUG: mail= " +mail + " hash= " + hash);
-
- } else {
- // we didn't produce a signature for the mail.
- }
- }//engine
- }//common part
-
- /**
- * Step -II
- * Get signatures for the body parts without URLs
- **/
- Vector getBodywithNoURLs = (Vector)(EmailParts.elementAt(2));
- for (int mailIndex = 0; mailIndex < getBodywithNoURLs.size(); mailIndex++) {
- String mail = (String) (getBodywithNoURLs.elementAt(mailIndex));
-
-
- if (mail == null) continue;
-
- /*
- * Compute Sig for email header that are cleaned.
- */
- for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
- int engineNo = enginesToUseForCheck[engineIndex];
- String sig = null;
-
- /* EphemeralSignature calculator */
- if(engineNo==4) {
- sig = computeSignature(engineNo,mail);
- }
-
- if(engineNo==8)
- continue;
-
- if(engineNo!=4) {
- System.out.println("Err: body parts without URL Couldn't find the signature engine: " + engineNo);
- }
-
- if (sig != null) {
- String hash = engineNo + ":" + sig;
- printableSigs.addElement(hash);
- } else {
- // we didn't produce a signature for the mail.
- }
- }//engine
- }
-
- /**
- * Step -III
- * Get signatures for the body parts with URLs
- **/
- Vector getURLs = (Vector)(EmailParts.elementAt(1));
- for (int mailIndex = 0; mailIndex < getURLs.size(); mailIndex++) {
- String mail = (String) (getURLs.elementAt(mailIndex));
-
- /*
- * Compute Sig for bodyparts that are cleaned.
- */
- for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
- int engineNo = enginesToUseForCheck[engineIndex];
- if(engineNo==4)
- continue;
-
- /* WhiplashSignature calculator */
- String[] hosts = null;
- String sig = null;
- if(engineNo==8) {
- //hosts = computeSignature(engineNo,mail);
- hosts = sig8.computeSignature(mail);
- if(hosts != null) {
- for(int i=0; i<hosts.length; i++) {
- sig = hosts[i];
- //sig = (String) (hosts.elementAt(i));
- String hash = engineNo + ":" + sig;
- printableSigs.addElement(hash);
- }
- } else {
- // we didn't produce a signature for the mail.
- }
- }
-
- if(engineNo!=8) {
- System.out.println("Err: body parts with URL Couldn't find the signature engine: " + engineNo);
- }
-
- /*
- if (sig != null) {
- String hash = engineNo + ":" + sig;
- printableSigs.addElement(hash);
- } else {
- we didn't produce a signature for the mail.
- }
- */
- }//engine
- }
-
- // OLD IMPLEMENTATION
-//
-// for (int mailIndex = 0; mailIndex < EmailParts.size(); mailIndex++) {
-// String mail = (String) (EmailParts.elementAt(mailIndex));
-//
-// if (mail == null) continue;
-//
-// /*
-// * Compute Sig for bodyparts that are cleaned.
-// */
-// for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
-// int engineNo = enginesToUseForCheck[engineIndex];
-// String sig = null;
-//
-// /* EphemeralSignature calculator */
-// if(engineNo==4) {
-// sig = computeSignature(engineNo,mail);
-// if(sig != null) {
-// String hash = engineNo + ":" + sig;
-// printableSigs.addElement(hash);
-// } else {
-// // we didn't produce a signature for the mail.
-// }
-// }
-//
-// /* WhiplashSignature calculator */
-// String[] hosts = null;
-// if(engineNo==8) {
-// //hosts = computeSignature(engineNo,mail);
-// hosts = sig8.computeSignature(mail);
-// if(hosts != null) {
-// for(int i=0; i<hosts.length; i++) {
-// sig = hosts[i];
-// //sig = (String) (hosts.elementAt(i));
-// String hash = engineNo + ":" + sig;
-// printableSigs.addElement(hash);
-// }
-// } else {
-// // we didn't produce a signature for the mail.
-// }
-// }
-//
-// if(engineNo!=4 || engineNo!=8) {
-// System.out.println("Err: Couldn't find the signature engine: " + engineNo);
-// }
-//
-// /*
-// if (sig != null) {
-// String hash = engineNo + ":" + sig;
-// printableSigs.addElement(hash);
-// } else {
-// // we didn't produce a signature for the mail.
-// }
-// */
-// }//engine
-// }//each emails part
- return printableSigs;
- }//computeSigs
-
- /**
- * @param engineNo
- * @param email
- * @return
- */
- private String computeSignature(int engineNo, String mail) {
- if(engineNo==4) {
- //String s1 = this.sig4.computeSignature(mail);
- return this.sig4.computeSignature(mail);
- //return new String { this.sig4.computeSignature(mail) };
- }
-
- /*
- if(engineNo==8) {
- //String cleanedButKeepHTML = Preprocessor.preprocess(mail,Preprocessor.ConfigParams.NO_DEHTML);
- //return this.sig8.computeSignature(cleanedButKeepHTML);
- return this.sig8.computeSignature(mail);
- }
- */
- return null;
- }
-}
+++ /dev/null
-public class SpamFilter extends Thread {
- DistributedHashMap mydhmap;
-
- int id; //thread id
-
- /**
- * Total number of iterations
- **/
- int numiter;
-
- /**
- * Total number of emails
- **/
- int numemail;
-
- /**
- * Total number of threads
- **/
- int nthreads;
-
- public SpamFilter() {
-
- }
-
- public SpamFilter(int numiter, int numemail,int id, DistributedHashMap mydhmap, int nthreads) {
- this.numiter=numiter;
- this.numemail=numemail;
- this.id = id;
- this.mydhmap = mydhmap;
- this.nthreads = nthreads;
- }
-
- public void run() {
- int niter;
- int nemails;
- int thid;
- int correct=0;
- int wrong=0;
-
- atomic {
- niter=numiter;
- nemails=numemail;
- thid = id;
- }
-
- Random rand = new Random(thid);
- int i;
-
- for(i=0; i<niter; i++) {
- correct =0;
- wrong = 0;
- for(int j=0; j<nemails; j++) {
- // long start = System.currentTimeMillis();
- int pickemail = rand.nextInt(100);
-
-// System.out.println("pickemail= " + pickemail);
-
- // randomly pick emails
- pickemail+=1;
- Mail email = new Mail("emails/email"+pickemail);
- Vector signatures = email.checkMail(thid);
-
- //check with global data structure
- int[] confidenceVals=null;
- // long startcheck = System.currentTimeMillis();
- atomic {
- confidenceVals = check(signatures,thid);
- }
- // long stopcheckMail = System.currentTimeMillis();
- // long diff = (stopcheckMail-startcheck);
- // System.out.println("check takes= " + diff + "millisecs");
-
- /* Only for debugging
- for(int k=0; k<signatures.size();k++) {
- System.out.println("confidenceVals["+k+"]= "+confidenceVals[k]);
- }
- */
-
- //---- create and return results --------
- FilterResult filterResult = new FilterResult();
- //long startgetResult = System.currentTimeMillis();
- boolean filterAnswer = filterResult.getResult(confidenceVals);
- //long stopgetResult = System.currentTimeMillis();
- //diff = (stopgetResult-startgetResult);
- //System.out.println("getResult takes= " + diff + "millisecs");
-
- //---- get user's take on email and send feedback ------
- boolean userAnswer = email.getIsSpam();
-
-// System.out.println("userAnswer= " + userAnswer + " filterAnswer= " + filterAnswer);
-
- if(filterAnswer != userAnswer) {
- /* wrong answer from the spam filter */
- wrong++;
- //long startsendFeedBack = System.currentTimeMillis();
- atomic {
- sendFeedBack(signatures, userAnswer, thid, rand);
- }
- //long stopsendFeedBack = System.currentTimeMillis();
- //diff = (stopsendFeedBack-startsendFeedBack);
- //System.out.println("sendFeedback takes= " + diff + "millisecs");
- }
- else {
- /* Correct answer from the spam filter */
- correct++;
- }
- //long stop = System.currentTimeMillis();
- //diff = stop-start;
-// System.out.println("time to complete iteration" + j + " = " + diff + " millisecs");
- } //end num emails
- System.out.println((i+1)+"th iteration correct = " + correct + " Wrong = " + wrong + " percentage = " + ((float)correct/(float)nemails));
- }//end num iter
- // Sanity check
-// System.out.println((i)+"th iteration correct = " + correct + " Wrong = " + wrong + " percentage = " + ((float)correct/(float)nemails));
- System.out.println("\n\n\n I'm Done\n\n\n");
-
- RecoveryStat.printRecoveryStat();
- }
-
- public static void main(String[] args) {
- int[] mid = new int[8];
- mid[0] = (128<<24)|(195<<16)|(180<<8)|21; //dw-2
- mid[1] = (128<<24)|(195<<16)|(180<<8)|26; //dw-7
-/*
- mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dc-1.calit2
- mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dc-2.calit2
- mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dc-3.calit2
- mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc-4.calit2
- mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dc-5.calit2
- mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dc-6.calit2
- mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dc-7.calit2
- mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dc-8.calit2
-*/
- //Read options from command prompt
- SpamFilter sf = new SpamFilter();
- SpamFilter.parseCmdLine(args, sf);
- int nthreads = sf.nthreads;
-
- //Create Global data structure
- DistributedHashMap dhmap;
- SpamFilter[] spf;
- atomic {
- dhmap = global new DistributedHashMap(500, 0.75f);
- }
- atomic {
- spf = global new SpamFilter[nthreads];
- for(int i=0; i<nthreads; i++) {
- spf[i] = global new SpamFilter(sf.numiter, sf.numemail, i, dhmap, nthreads);
- }
- }
-
- /* ---- Start Threads ---- */
- SpamFilter tmp;
- for(int i = 0; i<nthreads; i++) {
- atomic {
- tmp = spf[i];
- }
- tmp.start(mid[i]);
- }
-
- /* ---- Join threads----- */
- for(int i = 0; i<nthreads; i++) {
- atomic {
- tmp = spf[i];
- }
- tmp.join();
- }
-
- System.out.println("Finished");
- }
-
- public static void parseCmdLine(String args[], SpamFilter sf) {
- int i = 1;
-
- sf.nthreads = new Integer(args[0]).intValue();
-
-
- String arg;
- while (i < args.length && args[i].startsWith("-")) {
- arg = args[i++];
- //check options
- if(arg.equals("-n")) { //num of iterations
- if(i < args.length) {
- sf.numiter = new Integer(args[i++]).intValue();
- }
- } else if(arg.equals("-e")) { //num of emails
- if(i < args.length) {
- sf.numemail = new Integer(args[i++]).intValue();
- }
- }
-
- /*else if(arg.equals("-t")) { //num of threads
- if(i < args.length) {
- sf.nthreads = new Integer(args[i++]).intValue();
- }
- }
- */
- else if(arg.equals("-h")) {
- sf.usage();
- }
- }
- if(sf.nthreads == 0) {
- sf.usage();
- }
- }
-
- /**
- * The usage routine describing the program
- **/
- public void usage() {
- System.out.println("usage: ./spamfilter <num thread> -n <num iterations> -e <num emails>\n");
- System.out.println( " -n : num iterations");
- System.out.println( " -e : number of emails");
- }
-
- /**
- * Returns result to the Spam filter
- **/
- /*
- public boolean checkMail(Mail mail, int userid) {
- //Preprocess emails
- //Vector partsOfMailStrings = mail.createMailStringsWithURL();
- /*
- Vector partsOfMailStrings = mail.getCommonPart();
- partsOfMailStrings.addElement(mail.getBodyString());
-
- //Compute signatures
- SignatureComputer sigComp = new SignatureComputer();
- Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings
-
- //check with global data structure
- int[] confidenceVals = check(signatures,userid);
-
- //---- create and return results --------
- FilterResult filterResult = new FilterResult();
- boolean spam = filterResult.getResult(confidenceVals);
-
- return spam;
- }
- */
-
- public int[] check(Vector signatures, int userid) {
- int numparts = signatures.size();
-
- //System.out.println("check() numparts= " + numparts);
-
- int[] confidenceVals = new int[numparts];
- for(int i=0; i<numparts; i++) {
- String part = (String)(signatures.elementAt(i));
- char tmpengine = part.charAt(0);
- GString engine=null;
- if(tmpengine == '4') { //Ephemeral Signature calculator
- String tmpstr = new String("4");
- engine = global new GString(tmpstr);
- }
- if(tmpengine == '8') { //Whiplash Signature calculator
- String tmpstr = new String("8");
- engine = global new GString(tmpstr);
- }
-
- //System.out.println("check(): engine= " + engine.toLocalString());
-
- String str = new String(part.substring(2));//a:b index of a =0, index of : =1, index of b =2
- GString signature = global new GString(str);
- HashEntry myhe = global new HashEntry();
- myhe.setengine(engine);
- myhe.setsig(signature);
-
- //find object in distributedhashMap: if no object then add object
- if(!mydhmap.containsKey(myhe)) {
- //add new object
- HashStat mystat = global new HashStat();
- mystat.setuser(userid, 0, 0, -1);
- myhe.setstats(mystat);
- FilterStatistic fs = global new FilterStatistic(0,0,-1);
- mydhmap.put(myhe, fs);
- confidenceVals[i] = 0;
- } else { //read exsisting object
- // ----- now connect to global data structure and ask for spam -----
- HashEntry tmphe = (HashEntry)(mydhmap.getKey(myhe));
- FilterStatistic fs = (FilterStatistic) (mydhmap.get(tmphe)); //get the value from hash
-
- //System.out.println(fs.toString()+"\n");
-
- confidenceVals[i] = fs.getChecked();
- }
- }
-
- // --> the mail client is able to determine if it is spam or not
- // --- According to the "any"-logic (in Core#check_logic) in original Razor ---
- // If any answer is spam, the entire email is spam.
- return confidenceVals;
- }
-
- /**
- * This method sends feedback from the user to a distributed
- * spam database and trains the spam database to check future
- * emails and detect spam
- **/
- public void sendFeedBack(Vector signatures, boolean isSpam, int id, Random myrand) {
-
- for(int i=0;i<signatures.size();i++) {
- String part = (String)(signatures.elementAt(i));
- //
- // Signature is of form a:b
- // where a = string representing a signature engine
- // either "4" or "8"
- // b = string representing signature
- //
- char tmpengine = part.charAt(0); //
-
- GString engine=null;
-
- if(tmpengine == '4') {
- String tmpstr = new String("4");
- engine = global new GString(tmpstr);
- }
-
- if(tmpengine == '8') {
- String tmpstr = new String("8");
- engine = global new GString(tmpstr);
- }
-
- //System.out.println("sendFeedBack(): engine= " + engine.toLocalString());
-
- String tmpsig = new String(part.substring(2));
- GString signature = global new GString(tmpsig);
-
- //System.out.println("sendFeedBack(): signature= " + signature.toLocalString());
-
- HashEntry myhe = global new HashEntry();
- myhe.setengine(engine);
- myhe.setsig(signature);
-
- // ----- now connect to global data structure and update stats -----
- if(mydhmap.containsKey(myhe)) {
- HashEntry tmphe = (HashEntry)(mydhmap.getKey(myhe));
-
-
- if(tmphe.stats.userid[id] != 1) {
- tmphe.stats.setuserid(id);
- }
-
- //---- get value from distributed hash and update spam count
- FilterStatistic fs = (FilterStatistic) (mydhmap.get(myhe));
-
- //System.out.println(fs.toString());
-
- //Allow users to give incorrect feedback
- int pickemail = myrand.nextInt(100);
- /* Randomly allow user to provide incorrect feedback */
- if(pickemail < 95) {
- //give correct feedback 95% of times
- //Increment spam or ham value
- if(isSpam) {
- tmphe.stats.incSpamCount(id);
- fs.increaseSpam();
- } else {
- tmphe.stats.incHamCount(id);
- fs.increaseHam();
- }
- } else {
- // Give incorrect feedback 5% of times
- if(isSpam) {
- tmphe.stats.incHamCount(id);
- fs.increaseHam();
- } else {
- tmphe.stats.incSpamCount(id);
- fs.increaseSpam();
- }
- } //end of pickemail
- }//end of if
- }//end of for
- }//end of sendFeeback()
-}
-
-
+++ /dev/null
-
-/*
- Part of the Spamato project (www.spamato.net)
- Copyright (C) 2005 ETHZ, DCG
- contact by email: info@spamato.net
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-
- $Id: WhiplashSignature.java,v 1.1 2010/01/31 10:04:47 jihoonl Exp $
- */
-public class WhiplashSignature {
- char[] b64table;
-
- public WhiplashSignature() {
- b64table = new char[64];
-
- for (int i= 0; i <= 25; i++) {
- b64table[i] = (char) ((i + 65) & 0xff);
- }
- for (int i= 26; i <= 51; i++) {
- b64table[i] = (char) ((i + 71) & 0xff);
- }
- for (int i= 52; i <= 61; i++) {
- b64table[i] = (char) ((i - 4) & 0xff);
- }
- b64table[62]= '-';
- b64table[63]= '_';
- }
-
- public String[] computeSignature(String text) {
-
- //System.out.println("Inside computeSignature");
- //Current: Simplify the host extraction and signature computation
- String[] sigs = whiplash(text);
- // TODO: Extract canonical domain name and convert to Base64
- /*
- if(sigs != null) {
- for(int i = 0; i<sigs.length; i++) {
- sigs[i] = hexToBase64(sigs[i]);
- System.out.println("sigs[i]= " + sigs[i]);
- }
- }
- */
- return sigs;
- }
-
- /**
- * converts a hex-string in a base64-string exactly as it is done in razor.
- * @param hex a hex-value
- * @return a base64-equivalent of <code>hex</code>.
- */
- public String hexToBase64(String hex){
- if(hex == null)
- return null;
- int[] b64s = new int[hex.length()*2/3 + ((hex.length()*2)%3)];
- int i=0;
- int b64count = 0;
-
- while(i < hex.length()){
- //process 3 hex char chunks at a time
- int upperBorder = Math.imin(i+3,hex.length());
- String hex3 = hex.substring(i,upperBorder);
- i+=3;
-
- int bv = convertHexToRazorEncoding(hex3);
- //now the right endian encoding
- b64s[b64count++] = ((0xfc0 & bv)>>>6); //higher 6 bits
- b64s[b64count++] = (0x3f & bv) ; //lower 6 bits
-
- }
- String bs = "";
- for (int j= 0; j < b64s.length; j++) {
- bs += b64table[ b64s[j] ];
- }
- return bs;
- }
-
- /**
- * razor does some special conversion using perl's <code>pack()</code> which
- * we must do manually in java.
- */
- private int convertHexToRazorEncoding(String hex3) {
- if((hex3 == null))
- return 0; //error
- int res = 0;
- int cur = Integer.parseInt(hex3.substring(0,1),16);
- cur = mirror4LSBits(cur);
- res |= ( (cur&0xf) << 8);
- if(hex3.length() >=2) {
- cur = Integer.parseInt(hex3.substring(1,2),16);
- } else {
- cur = 0;
- }
- //cur = ( hex3.length() >=2 ? Integer.parseInt(hex3.substring(1,2),16) : 0);
- cur = mirror4LSBits(cur);
- res |= ((cur & 0xf) << 4);
- if(hex3.length() >= 3) {
- cur = Integer.parseInt(hex3.substring(2,3),16);
- } else {
- cur = 0;
- }
- //cur = ( hex3.length() >= 3 ? Integer.parseInt(hex3.substring(2,3),16): 0);
- cur = mirror4LSBits(cur);
- res |= (cur & 0xf);
-
- return res;
- }
-
- /**
- * mirrors the 4 least significant bytes of an integer
- * @param cur an int containing 4 Least Singificant bytes like <code>00000...00abcd</code>
- * @return the mirrored 4 least significant bytes <code>00000...00dcba</code>. all bits except <code>a-b</code> are lost.
- */
- public int mirror4LSBits(int cur) {
- int res = 0;
- res |= (cur & 0x8)>>>3;
- res |= (cur & 0x4)>>>1;
- res |= (cur & 0x2)<<1;
- res |= (cur & 0x1)<<3;
- return res;
- }
-
- public String[] whiplash(String text) {
-
- if (text == null) {
- return null;
- }
- String[] hosts = extractHosts(text);
- if (hosts == null || hosts.length < 1) {
- return null;
- }
- String[] sigs = new String[hosts.length];
-
- for (int i = 0; i < hosts.length; i++) {
- MD5 md = new MD5();
- String host = hosts[i];
- int len = host.length();
- byte buf[] = host.getBytes();
- byte sig[] = new byte[16];
- md.update(buf, len);
- md.md5final(sig);
- String signature = new String(sig);
-
- // System.out.println("DEBUG: host= " + host + " whiplash sig= " + signature);
-
- sigs[i] = signature;
- }
- return sigs;
- }
-
- public String[] extractHosts(String text) {
- //System.out.println("Inside extractHosts");
- Vector hosts = new Vector();
- String buf = new String(text);
-
- //System.out.println("DEBUG: extractHosts() string= " + buf);
-
- /* Extract hosts from http:// links */
- int idx;
- String strwww = new String("www.");
- while ((idx = buf.indexOf(strwww)) != -1) {
- int startidx = idx + strwww.length();
- String strcom = new String(".");
- buf = buf.subString(startidx);
- int endidx = buf.indexOf(strcom);
- String host = buf.subString(0, endidx);
- //System.out.println("DEBUG: http links extracted host= " + host);
-
- buf = buf.subString(endidx);
- endidx = buf.indexOf(strcom);
- host += buf.subString(0, endidx);
-
- hosts.addElement(host);
- buf = buf.subString(endidx+strcom.length());
- }
-
- /* Extract hosts from email addressess */
- buf = new String(text);
- String strrate = new String("@");
- while ((idx = buf.indexOf(strrate)) != -1) {
- int startidx = idx + strrate.length();
- String strdot = new String(".");
- buf = buf.subString(startidx);
- int endidx = buf.indexOf(strdot);
- String host = buf.subString(0, endidx);
- //System.out.println("DEBUG: email addr extracted host= " + host);
-
- buf = buf.subString(endidx);
- endidx = buf.indexOf(strdot);
- host += buf.subString(0, endidx);
-
- hosts.addElement(host);
- buf = buf.subString(endidx+strdot.length());
- }
-
- if (hosts.size() == 0) {
- return null;
- }
-
- String[] retbuf = new String[hosts.size()];
- for (int i = 0; i < hosts.size(); i++) {
- retbuf[i] = (String) (hosts.elementAt(i));
- }
-
- return retbuf;
- }
-
-// Testing the signature computation
-// public static void main(String[] args) {
-// /* String testVector = " Test Vectors: \n"+
-// "\n" +
-// "1. http:www.nodg.com@www.geocities.com/nxcisdsfdfdsy/off\n"+
-// "2. http:www.ksleybiuh.com@213.171.60.74/getoff/\n"+
-// "3. <http:links.verotel.com/cgi-bin/showsite.verotel?vercode=12372:9804000000374206>\n"+
-// "4. http:217.12.4.7/rmi/http:definethis.net/526/index.html\n"+
-// "5. http:magalygr8sex.free-host.com/h.html\n"+
-// "6. http:%3CVenkatrs%3E@218.80.74.102/thecard/4index.htm\n"+
-// "7. http:EBCDVKIGURGGCEOKXHINOCANVQOIDOXJWTWGPC@218.80.74.102/thecard/5in\n"+
-// "8. http:g.india2.bag.gs/remove_page.htm\n"+
-// "9. https:220.97.40.149\n"+
-// "10. http:mjaked.biz/unsubscribe.ddd?leaving\n"+
-// "11. http:g5j99m8@it.rd.yahoo.com/bassi/*http:www.lekobas.com/c/index.php\n"+
-// "12. <a href=\"http:Chettxuydyhv vwyyrcmgbxzj n as ecq kkurxtrvaug nfsygjjjwhfkpaklh t a qsc exinscfjtxr\n"+
-// " jobg @www.mmv9.org?affil=19\">look great / feel great</a>\n"+
-// "13. <A HREF=\"http:href=www.churchwomen.comhref=www.cairn.nethref=www.teeter.orghref=www.lefty.bizhref=wwwbehold.pitfall@www.mmstong5f.com/host/index.asp?ID=01910?href=www.corrode.comhref=www.ode.nethref=www.clergy.orghref=www.aberrate.biz\" >\n"+
-// "14. www.pillzthatwork.com # anything that starts with www. \n";
-// */
-// String testVector = "<html>\n"+
-// "<body>\n"+
-// "<p>Our first autolink: www.autolink1.com or another link like www.autolink2.co.uk or how about https:plaintextlink1.co.uk or http:plaintextlink2.com</p>\n"+
-// "<p>now a masked link <a href=\"http://www.hiddenlink1.com\">http://www.coveringlink1.com</a> and another link http:plaintextlink3.net and how about https:plaintextlink4.to</p>\n"+
-// "<p>another masked link <A Href=\"http://www.hiddenlink2.com\">https:coveringlink2.com</A> and another link https:plaintextlink5.com</p>\n"+
-// "</body>\n"+
-// "</html>\n";
-// String test1 = "Our first autolink: www.autolink1.com or another link like www.autolink2.co.uk or how about https:plaintextlink1.co.uk or http:plaintextlink2.com</p>\n";
-// WhiplashSignature whiplash = new WhiplashSignature();
-// String[] hosts = whiplash.computeSignature(testVector);
-// //String[] hosts = whiplash.computeSignature(test1);
-// for (int i = 0; i < hosts.length; i++) {
-// String string = hosts[i];
-// System.out.println("host " + i + ":\t" + string);
-// }
-// }
-
-}
+++ /dev/null
-128.195.180.21
-128.195.180.26
-#128.195.136.162
-#128.195.136.163
-#128.195.136.164
-#128.195.136.165
-#128.195.136.166
-#128.195.136.167
-#128.195.136.168
-#128.195.136.169
-
-
+++ /dev/null
-MAINCLASS=SpamFilter
-SRC=${MAINCLASS}.java \
- DistributedHashMap.java \
- Mail.java \
- FilterResult.java \
- HashEntry.java \
- HashStat.java \
- SignatureComputer.java \
- FilterStatistic.java \
- EphemeralSignature.java \
- GString.java \
- WhiplashSignature.java
-
-FLAGS= -dsm -recoverystats -recovery -nooptimize -mainclass ${MAINCLASS}
-
-default:
- ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC}
-
-clean:
- rm -rf tmpbuilddirectory*
- rm *.bin
--- /dev/null
+public class BufferedReader {
+ FileInputStream fr;
+ byte[] buffer;
+ int offset;
+ int end;
+
+ public BufferedReader(FileInputStream fr) {
+ this.fr=fr;
+ this.buffer=new byte[2048];
+ }
+
+ public int read() {
+ if (offset<end) {
+ return buffer[offset++];
+ } else {
+ readBuffer();
+ if (end<=0)
+ return -1;
+ return buffer[offset++];
+ }
+ }
+
+ public int read(byte[] array) {
+ int off=0;
+ int arraylen=array.length;
+ do {
+ for(;offset<end;offset++) {
+ if (off>=arraylen)
+ return off;
+ array[off++]=buffer[offset];
+ }
+ readBuffer();
+ if (end==0)
+ return off;
+ if (end<0)
+ return end;
+ } while(true);
+ }
+
+ public void readBuffer() {
+ offset=0;
+ end=fr.read(buffer);
+ }
+
+ public String readLine() {
+ String str=null;
+ do {
+ boolean foundcr=false;
+ int index=offset;
+ for(;index<end;index++) {
+ if (buffer[index]=='\n'||buffer[index]==13) {
+ foundcr=true;
+ break;
+ }
+ }
+ String buf=new String(buffer, offset, index-offset);
+ if (str==null)
+ str=buf;
+ else
+ str=str.concat(buf);
+ if (foundcr) {
+ offset=index++;
+ do {
+ for(;offset<end;offset++) {
+ if (buffer[offset]!='\n'&&buffer[offset]!=13) {
+ return str;
+ }
+ }
+ readBuffer();
+ if (end<=0)
+ return str;
+ } while(true);
+ } else {
+ readBuffer();
+ if (end<=0)
+ return null;
+ }
+ } while(true);
+
+ }
+
+ public void close() {
+ fr.close();
+ }
+
+}
\ No newline at end of file
--- /dev/null
+public class DistributedHashMap {
+ DistributedHashEntry[] table;
+ float loadFactor;
+
+ public DistributedHashMap(int initialCapacity, float loadFactor) {
+ init(initialCapacity, loadFactor);
+ }
+
+ private void init(int initialCapacity, float loadFactor) {
+ table=global new DistributedHashEntry[initialCapacity];
+ this.loadFactor=loadFactor;
+ }
+
+ private static int hash1(int hashcode, int length) {
+ int value=hashcode%length;
+ if (value<0)
+ return -value;
+ else
+ return value;
+ }
+
+ Object remove(Object key) {
+ int hashcode=key.hashCode();
+ int index1=hash1(hashcode, table.length);
+ DistributedHashEntry dhe=table[index1];
+ if (dhe==null)
+ return null;
+ DHashEntry ptr=dhe.array;
+
+ if (ptr!=null) {
+ if (ptr.hashval==hashcode&&ptr.key.equals(key)) {
+ dhe.array=ptr.next;
+ dhe.count--;
+ return ptr.value;
+ }
+ while(ptr.next!=null) {
+ if (ptr.hashval==hashcode&&ptr.next.key.equals(key)) {
+ Object oldvalue=ptr.value;
+ ptr.next=ptr.next.next;
+ dhe.count--;
+ return oldvalue;
+ }
+ ptr=ptr.next;
+ }
+ }
+ return null;
+ }
+
+ Object get(Object key) {
+ int hashcode=key.hashCode();
+ int index1=hash1(hashcode, table.length);
+
+ DistributedHashEntry dhe=table[index1];
+ if (dhe==null)
+ return null;
+
+ DHashEntry ptr=dhe.array;
+
+ while(ptr!=null) {
+ if (ptr.hashval==hashcode
+ &&ptr.key.equals(key)) {
+ return ptr.value;
+ }
+ ptr=ptr.next;
+ }
+ return null;
+ }
+
+
+ Object getKey(Object key) {
+ int hashcode=key.hashCode();
+ int index1=hash1(hashcode, table.length);
+
+ DistributedHashEntry dhe=table[index1];
+ if (dhe==null)
+ return null;
+
+ DHashEntry ptr=dhe.array;
+
+ while(ptr!=null) {
+ if (ptr.hashval==hashcode
+ &&ptr.key.equals(key)) {
+ return ptr.key;
+ }
+ ptr=ptr.next;
+ }
+ return null;
+ }
+
+ boolean containsKey(Object key) {
+ int hashcode=key.hashCode();
+ int index1=hash1(hashcode, table.length);
+ DistributedHashEntry dhe=table[index1];
+ if (dhe==null)
+ return false;
+
+ DHashEntry ptr=dhe.array;
+
+ while(ptr!=null) {
+ if (ptr.hashval==hashcode
+ &&ptr.key.equals(key)) {
+ return true;
+ }
+ ptr=ptr.next;
+ }
+ return false;
+ }
+
+ Object put(Object key, Object value) {
+ int hashcode=key.hashCode();
+ int index1=hash1(hashcode, table.length);
+ DistributedHashEntry dhe=table[index1];
+ if (dhe==null) {
+ dhe=global new DistributedHashEntry();
+ table[index1]=dhe;
+ }
+ DHashEntry ptr=dhe.array;
+
+ while(ptr!=null) {
+ if (ptr.hashval==hashcode&&ptr.key.equals(key)) {
+ Object oldvalue=ptr.value;
+ ptr.value=value;
+ return oldvalue;
+ }
+ ptr=ptr.next;
+ }
+
+ DHashEntry he=global new DHashEntry();
+ he.value=value;
+ he.key=key;
+ he.hashval=hashcode;
+ he.next=dhe.array;
+ dhe.array=he;
+
+ dhe.count++;
+
+ return null;
+ }
+}
+
+class DistributedHashEntry {
+ int count;
+ public DistributedHashEntry() {
+ count=0;
+ }
+ DHashEntry array;
+}
+
+
+class DHashEntry {
+ public DHashEntry() {
+ }
+ int hashval;
+ Object key;
+ Object value;
+ DHashEntry next;
+}
--- /dev/null
+public class EphemeralSignature {
+
+ int serverSeed;
+ String serverSeparator;
+ Random rand;
+
+ public EphemeralSignature() {
+ Random rand = new Random(0);
+ }
+
+ public EphemeralSignature(int randomNumberSeed, String separator) {
+ Random rand = new Random(randomNumberSeed);
+ serverSeparator = separator;
+ }
+
+ public EphemeralSignature(String seedAndSeparator) {
+ serverSeparator = seedAndSeparator;
+ }
+
+ public String computeSignature(String body) {
+ MD5 md = new MD5();
+ int len = body.length();
+ byte buf[] = body.getBytes();
+ byte sig[] = new byte[16];
+
+ md.update(buf, len);
+ md.md5final(sig);
+ String signature = new String(sig);
+
+ return signature;
+ }
+
+ /*
+ public long DEKHash(String str)
+ {
+ long hash = str.length();
+
+ for(int i = 0; i < str.length(); i++)
+ {
+ hash = ((hash << 5) ^ (hash >> 27)) ^ str.charAt(i);
+ }
+
+ return hash;
+ }
+ */
+
+}
--- /dev/null
+/**
+ * A FilterResult encapsulates the result of a filter made by checking a mail.
+ **/
+public class FilterResult {
+ /**
+ * This value is used if type is ERROR or UNKNOWN.
+ */
+ public double NO_RESULT;
+
+ /**
+ * A result value greater or equal this value indicates that the filter has
+ * decided on spam.
+ */
+ public int SPAM_THRESHOLD;
+ public int ABSOLUTE_SPAM;
+ public int ABSOLUTE_HAM;
+
+ //public double result; // the result, a value between -1 (ham) and 1000 (spam),
+ // negative values for "error", "unknown" etc.
+
+ // -----------------------------------------------------------------------------
+
+ public FilterResult(double result) {
+ SPAM_THRESHOLD=50;
+ ABSOLUTE_SPAM=100;
+ ABSOLUTE_HAM=0;
+ NO_RESULT=-1;
+ //this.result = result;
+ }
+
+ public FilterResult() {
+ SPAM_THRESHOLD=50;
+ ABSOLUTE_SPAM=100;
+ ABSOLUTE_HAM=0;
+ NO_RESULT=-1;
+ }
+
+ public boolean getResult(int[] confidenceVals) {
+ int[] res = new int[3]; //3 equals spam, ham and unknown
+ for(int i=0; i<confidenceVals.length; i++) {
+ if(confidenceVals[i] < 0)
+ res[0]+=1; //unknown
+ if(confidenceVals[i] >= 0 && confidenceVals[i] < SPAM_THRESHOLD)
+ res[1]+=1; //ham
+ if(confidenceVals[i] >= SPAM_THRESHOLD)
+ res[2]+=1;//spam
+ }
+ int maxVotes=0;
+ int max;
+ for(int i=0; i<3;i++) {
+ if(res[i] > maxVotes) {
+ maxVotes = res[i];
+ max = i;
+ }
+ }
+ if(max==0)
+ return false;
+ if(max==1)
+ return false;
+ if(max==2)
+ return true;
+
+ System.out.println("Err: getResult() Control shouldn't come here, max= " + max);
+ return false;
+ }
+
+ /*
+ public void addProperty(String key, String value) {
+ properties.put(key,value);
+ }
+
+ public String getProperty(String key) {
+ return properties.get(key);
+ }
+
+ public HashMap<String,String> getProperties() {
+ return properties;
+ }
+ */
+}
--- /dev/null
+public class FilterStatistic {
+ int unknown;
+ int spam;
+ int ham;
+
+ // -------------------------------------------------------
+
+ public FilterStatistic() {
+ this.spam = 0;
+ this.ham = 0;
+ this.unknown = 0;
+ }
+
+ public FilterStatistic(int spam, int ham, int unknown) {
+ this.spam = spam;
+ this.ham = ham;
+ this.unknown = unknown;
+ }
+
+ public int getChecked() {
+ return getSpam() + getHam() + getUnknown();
+ }
+
+ public int getHam() {
+ return ham;
+ }
+
+ public int getSpam() {
+ return spam;
+ }
+
+ public void setHam(int i) {
+ ham = i;
+ }
+
+ public void setSpam(int i) {
+ spam = i;
+ }
+
+ public int getUnknown() {
+ return unknown;
+ }
+
+ public void setUnknown(int u) {
+ unknown = u;
+ }
+
+ public void increaseSpam() {
+ setSpam(getSpam() + 1);
+ }
+
+ public void increaseHam() {
+ setHam(getHam() + 1);
+ }
+
+ public void increaseUnknown() {
+ setUnknown(getUnknown() + 1);
+ }
+
+ public String toString() {
+ String str = "Filterstats_spam_"+spam;
+ str += "_ham_" +ham;
+ str += "_unknown_"+unknown;
+ return str;
+ }
+}
--- /dev/null
+public class GString {
+ public char value[];
+ public int count;
+ public int offset;
+
+ public GString() {
+ }
+
+ public GString(char c) {
+ char[] str = global new char[1];
+ str[0] = c;
+ GString(str);
+ }
+
+ public GString(String str) {
+ value = global new char[str.count];
+ for(int i =0; i< str.count;i++) {
+ value[i] = str.value[i+str.offset];
+ }
+ count = str.count;
+ offset = 0;
+ }
+
+ public GString(GString gstr) {
+ this.value = gstr.value;
+ this.count = gstr.count;
+ this.offset = gstr.offset;
+ }
+
+ /*
+ public GString(StringBuffer gsb) {
+ value = global new char[gsb.length()];
+ count = gsb.length();
+ offset = 0;
+ for (int i = 0; i < count; i++)
+ value[i] = gsb.value[i];
+ }
+ */
+
+ public GString(char str[]) {
+ char charstr[]=new char[str.length];
+ for(int i=0; i<str.length; i++)
+ charstr[i]=str[i];
+ this.value=charstr;
+ this.count=str.length;
+ this.offset=0;
+ }
+
+ public static char[] toLocalCharArray(GString str) {
+ char[] c;
+ int length;
+
+ length = str.length();
+
+ c = new char[length];
+
+ for (int i = 0; i < length; i++) {
+ c[i] = str.value[i+str.offset];
+ }
+ return c;
+ }
+
+ public String toLocalString() {
+ return new String(toLocalCharArray(this));
+ }
+
+ public int length() {
+ return count;
+ }
+
+ public int indexOf(int ch, int fromIndex) {
+ for (int i = fromIndex; i < count; i++)
+ if (this.charAt(i) == ch)
+ return i;
+ return -1;
+ }
+
+ public int lastindexOf(int ch) {
+ return this.lastindexOf(ch, count - 1);
+ }
+
+ public int lastindexOf(int ch, int fromIndex) {
+ for (int i = fromIndex; i > 0; i--)
+ if (this.charAt(i) == ch)
+ return i;
+ return -1;
+ }
+
+ public char charAt(int i) {
+ return value[i+offset];
+ }
+
+ public int indexOf(String str) {
+ return this.indexOf(str, 0);
+ }
+
+ public int indexOf(String str, int fromIndex) {
+ if (fromIndex < 0)
+ fromIndex = 0;
+ for (int i = fromIndex; i <= (count-str.count); i++)
+ if (regionMatches(i, str, 0, str.count))
+ return i;
+ return -1;
+ }
+
+ public boolean regionMatches(int toffset, String other, int ooffset, int len) {
+ if (toffset < 0 || ooffset < 0 || (toffset+len) > count || (ooffset+len) > other.count)
+ return false;
+
+ for (int i = 0; i < len; i++) {
+ if (other.value[i+other.offset+ooffset] != this.value[i+this.offset+toffset])
+ return false;
+ }
+ return true;
+ }
+
+ public String subString(int beginIndex, int endIndex) {
+ return substring(beginIndex, endIndex);
+ }
+
+ public String substring(int beginIndex, int endIndex) {
+ String str;
+ str = global new String();
+ str.value = this.value;
+ str.count = endIndex-beginIndex;
+ str.offset = this.offset + beginIndex;
+ return str;
+ }
+
+ public static String valueOf(Object o) {
+ if (o==null)
+ return "null";
+ else
+ return o.toString();
+ }
+
+ public String toLocalString() {
+ return new String(toLocalCharArray(this));
+ }
+
+ public static char[] toLocalCharArray(GString str) {
+ char[] c;
+ int length;
+ length = str.length();
+ c = new char[length];
+ for (int i = 0; i < length; i++) {
+ c[i] = str.value[i+str.offset];
+ }
+ return c;
+ }
+
+ public int hashCode() {
+ String s = this.toLocalString();
+ return s.hashCode();
+ }
+
+ public boolean equals(Object o) {
+ if(o == null)
+ return false;
+ if(!(o instanceof GString))
+ return false;
+ GString gs = (GString)o;
+ String s1 = gs.toLocalString();
+ String s2 = this.toLocalString();
+ if(s2.equals(s1))
+ return true;
+ return false;
+ }
+}
--- /dev/null
+public class HashEntry {
+ public GString engine;
+ public GString signature;
+ public HashStat stats;
+
+ public HashEntry() {
+
+ }
+
+ /**
+ * hashCode that combines two strings using xor.
+ * @return a hash code value on the entire object.
+ */
+ public int hashCode() {
+ int result=0;
+ // this will not work well if some of the strings are equal.
+ result = engine.hashCode();
+ result ^= signature.hashCode();
+ //result ^= stats.hashCode();
+ //System.out.println("HashEntry: hashCode= " + result);
+ return result;
+ }
+
+ public void setengine(GString engine) {
+ this.engine=engine;
+ }
+
+ public void setstats(HashStat stats) {
+ this.stats=stats;
+ }
+
+ public void setsig(GString signature) {
+ this.signature=signature;
+ }
+
+ public GString getEngine() {
+ return engine;
+ }
+
+ public GString getSignature() {
+ return signature;
+ }
+
+ public HashStat getStats() {
+ return stats;
+ }
+
+ public boolean equals(Object o) {
+ HashEntry he = (HashEntry)o;
+ if(!(he.getEngine().equals(engine)))
+ return false;
+ if(!(he.getSignature().equals(signature)))
+ return false;
+ //if(!(he.getStats().equals(stats)))
+ // return false;
+ return true;
+ }
+
+ public int askForSpam() {
+ int[] users = stats.getUsers();
+ int spamConfidence=0;
+ for(int i=0; i<users.length; i++) {
+ int userid = users[i];
+ spamConfidence += stats.userstat[userid].getChecked();
+ }
+ return spamConfidence;
+ }
+}
--- /dev/null
+public class HashStat {
+ int[] userid;
+ FilterStatistic[] userstat;
+ int[] listofusers;
+ public HashStat() {
+ userid = global new int[8]; //max users for our system=8
+ userstat = global new FilterStatistic[8];
+ for(int i=0; i<8; i++) {
+ userstat[i] = global new FilterStatistic();
+ }
+ }
+
+ public void setuser(int id, int spam, int ham, int unknown) {
+ userid[id] = 1;
+ userstat[id].setSpam(spam);
+ userstat[id].setHam(ham);
+ userstat[id].setUnknown(unknown);
+ }
+
+ public void setuserid(int id) {
+ userid[id] = 1;
+ }
+
+ public int getuser(int id) {
+ return userid[id];
+ }
+
+ public int getspamcount(int userid) {
+ return userstat[userid].getSpam();
+ }
+
+ public int gethamcount(int userid) {
+ return userstat[userid].getHam();
+ }
+
+ public int getunknowncount(int userid) {
+ return userstat[userid].getUnknown();
+ }
+
+ public void incSpamCount(int userid) {
+ userstat[userid].increaseSpam();
+ }
+
+ public void incHamCount(int userid) {
+ userstat[userid].increaseHam();
+ }
+
+ public int[] getUsers() {
+ int nusers = numUsers();
+ listofusers = global new int[nusers];
+ int j=0;
+ for(int i=0; i<8; i++) {
+ if(userid[i] == 1) {
+ listofusers[j]=i;
+ j++;
+ }
+ }
+ return listofusers;
+ }
+
+ public int numUsers() {
+ int count=0;
+ for(int i=0; i<8; i++) {
+ if(userid[i] == 1) {
+ count++;
+ }
+ }
+ return count;
+ }
+}
--- /dev/null
+
+// This class computes MD5 hashes.
+// Manually translated by Jon Howell <jonh@cs.dartmouth.edu>
+// from some public domain C code (md5.c) included with the ssh-1.2.22 source.
+// Tue Jan 19 15:55:50 EST 1999
+// $Id: MD5.java,v 1.1 2010/02/13 00:09:44 jihoonl Exp $
+//
+// To compute the message digest of a chunk of bytes, create an
+// MD5 object 'md5', call md5.update() as needed on buffers full
+// of bytes, and then call md5.md5final(), which
+// will fill a supplied 16-byte array with the digest.
+//
+// A main() method is included that hashes the data on System.in.
+//
+// It seems to run around 25-30 times slower (JDK1.1.6) than optimized C
+// (gcc -O4, version 2.7.2.3). Measured on a Sun Ultra 5 (SPARC 270MHz).
+//
+// Comments from md5.c from ssh-1.2.22, the basis for this code:
+//
+/* This code has been heavily hacked by Tatu Ylonen <ylo@cs.hut.fi> to
+ make it compile on machines like Cray that don't have a 32 bit integer
+ type. */
+/*
+ * This code implements the MD5 message-digest algorithm.
+ * The algorithm is due to Ron Rivest. This code was
+ * written by Colin Plumb in 1993, no copyright is claimed.
+ * This code is in the public domain; do with it what you wish.
+ *
+ * Equivalent code is available from RSA Data Security, Inc.
+ * This code has been tested against that, and is equivalent,
+ * except that you don't need to include two pages of legalese
+ * with every copy.
+ *
+ * To compute the message digest of a chunk of bytes, declare an
+ * MD5Context structure, pass it to MD5Init, call MD5Update as
+ * needed on buffers full of bytes, and then call MD5Final, which
+ * will fill a supplied 16-byte array with the digest.
+ */
+
+public class MD5 {
+ int buf[]; // These were originally unsigned ints.
+ // This Java code makes an effort to avoid sign traps.
+ // buf[] is where the hash accumulates.
+ long bits; // This is the count of bits hashed so far.
+ byte in[]; // This is a buffer where we stash bytes until we have
+ // enough (64) to perform a transform operation.
+ int inint[];
+ // inint[] used and discarded inside transform(),
+ // but why allocate it over and over?
+ // (In the C version this is allocated on the stack.)
+
+ public MD5() {
+ buf = new int[4];
+ // fill the hash accumulator with a seed value
+ buf[0] = 0x67452301;
+ buf[1] = 0xefcdab89;
+ buf[2] = 0x98badcfe;
+ buf[3] = 0x10325476;
+
+ // initially, we've hashed zero bits
+ bits = 0L;
+
+ in = new byte[64];
+ inint = new int[16];
+ }
+
+ public void update(byte[] newbuf) {
+ update(newbuf, 0, newbuf.length);
+ }
+
+ public void update(byte[] newbuf, int length) {
+ update(newbuf, 0, length);
+ }
+
+ public void update(byte[] newbuf, int bufstart, int buflen) {
+ int t;
+ int len = buflen;
+
+ // shash old bits value for the "Bytes already in" computation
+ // just below.
+ t = (int) bits; // (int) cast should just drop high bits, I hope
+
+ /* update bitcount */
+ /* the C code used two 32-bit ints separately, and carefully
+ * ensured that the carry carried.
+ * Java has a 64-bit long, which is just what the code really wants.
+ */
+ bits += (long)(len<<3);
+
+ t = (t >>> 3) & 0x3f; /* Bytes already in this->in */
+
+ /* Handle any leading odd-sized chunks */
+ /* (that is, any left-over chunk left by last update() */
+
+ if (t!=0) {
+ int p = t;
+ t = 64 - t;
+ if (len < t) {
+ arraycopy(newbuf, bufstart, in, p, len);
+ return;
+ }
+ arraycopy(newbuf, bufstart, in, p, t);
+ transform();
+ bufstart += t;
+ len -= t;
+ }
+
+ /* Process data in 64-byte chunks */
+ while (len >= 64) {
+ arraycopy(newbuf, bufstart, in, 0, 64);
+ transform();
+ bufstart += 64;
+ len -= 64;
+ }
+
+ /* Handle any remaining bytes of data. */
+ /* that is, stash them for the next update(). */
+ arraycopy(newbuf, bufstart, in, 0, len);
+ }
+
+ public void arraycopy(byte[] src, int srcPos, byte[] dest, int destPos, int len) {
+ for (int i = 0; i < len; i++) {
+ dest[destPos+i] = src[srcPos+i];
+ }
+ return;
+ }
+
+ /*
+ * Final wrapup - pad to 64-byte boundary with the bit pattern
+ * 1 0* (64-bit count of bits processed, MSB-first)
+ */
+ public void md5final(byte[] digest) {
+ /* "final" is a poor method name in Java. :v) */
+ int count;
+ int p; // in original code, this is a pointer; in this java code
+ // it's an index into the array this->in.
+
+ /* Compute number of bytes mod 64 */
+ count = (int) ((bits >>> 3) & 0x3F);
+
+ /* Set the first char of padding to 0x80. This is safe since there is
+ always at least one byte free */
+ p = count;
+ in[p++] = (byte) 0x80;
+
+ /* Bytes of padding needed to make 64 bytes */
+ count = 64 - 1 - count;
+
+ /* Pad out to 56 mod 64 */
+ if (count < 8) {
+ /* Two lots of padding: Pad the first block to 64 bytes */
+ zeroByteArray(in, p, count);
+ transform();
+
+ /* Now fill the next block with 56 bytes */
+ zeroByteArray(in, 0, 56);
+ } else {
+ /* Pad block to 56 bytes */
+ zeroByteArray(in, p, count - 8);
+ }
+
+ /* Append length in bits and transform */
+ // Could use a PUT_64BIT... func here. This is a fairly
+ // direct translation from the C code, where bits was an array
+ // of two 32-bit ints.
+ int lowbits = (int) bits;
+ int highbits = (int) (bits >>> 32);
+ PUT_32BIT_LSB_FIRST(in, 56, lowbits);
+ PUT_32BIT_LSB_FIRST(in, 60, highbits);
+
+ transform();
+ PUT_32BIT_LSB_FIRST(digest, 0, buf[0]);
+ PUT_32BIT_LSB_FIRST(digest, 4, buf[1]);
+ PUT_32BIT_LSB_FIRST(digest, 8, buf[2]);
+ PUT_32BIT_LSB_FIRST(digest, 12, buf[3]);
+
+ /* zero sensitive data */
+ /* notice this misses any sneaking out on the stack. The C
+ * version uses registers in some spots, perhaps because
+ * they care about this.
+ */
+ zeroByteArray(in);
+ zeroIntArray(buf);
+ bits = 0;
+ zeroIntArray(inint);
+ }
+
+ /*
+ public static void main(String args[]) {
+ // This main() method was created to easily test
+ // this class. It hashes whatever's on System.in.
+
+ byte buf[] = new byte[397];
+ // arbitrary buffer length designed to irritate update()
+ int rc;
+ MD5 md = new MD5();
+ byte out[] = new byte[16];
+ int i;
+ int len = 0;
+
+ try {
+ while ((rc = System.in.read(buf, 0, 397)) > 0) {
+ md.update(buf, rc);
+ len += rc;
+ }
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ return;
+ }
+ md.md5final(out);
+
+ System.out.println("file length: "+len);
+ System.out.println("hash: "+dumpBytes(out));
+ }
+ */
+
+
+ /////////////////////////////////////////////////////////////////////
+ // Below here ye will only finde private functions //
+ /////////////////////////////////////////////////////////////////////
+
+ // There must be a way to do these functions that's
+ // built into Java, and I just haven't noticed it yet.
+
+ private void zeroByteArray(byte[] a) {
+ zeroByteArray(a, 0, a.length);
+ }
+
+ private void zeroByteArray(byte[] a, int start, int length) {
+ setByteArray(a, (byte) 0, start, length);
+ }
+
+ private void setByteArray(byte[] a, byte val, int start, int length) {
+ int i;
+ int end = start+length;
+ for (i=start; i<end; i++) {
+ a[i] = val;
+ }
+ }
+
+ private void zeroIntArray(int[] a) {
+ zeroIntArray(a, 0, a.length);
+ }
+
+ private void zeroIntArray(int[] a, int start, int length) {
+ setIntArray(a, (int) 0, start, length);
+ }
+
+ private void setIntArray(int[] a, int val, int start, int length) {
+ int i;
+ int end = start+length;
+ for (i=start; i<end; i++) {
+ a[i] = val;
+ }
+ }
+
+ private int MD5STEP1(int w, int x, int y, int z, int data, int s) {
+ w += (z ^ (x & (y ^ z))) + data;
+ w = w<<s | w>>>(32-s);
+ w += x;
+ return w;
+ }
+
+ private int MD5STEP2(int w, int x, int y, int z, int data, int s) {
+ w += (y ^ (z & (x ^ y))) + data;
+ w = w<<s | w>>>(32-s);
+ w += x;
+ return w;
+ }
+
+ private int MD5STEP3(int w, int x, int y, int z, int data, int s) {
+ w += (x ^ y ^ z) + data;
+ w = w<<s | w>>>(32-s);
+ w += x;
+ return w;
+ }
+
+ private int MD5STEP4(int w, int x, int y, int z, int data, int s) {
+ w += (y ^ (x | ~z)) + data;
+ w = w<<s | w>>>(32-s);
+ w += x;
+ return w;
+ }
+
+ private void transform() {
+ /* load in[] byte array into an internal int array */
+ int i;
+ int[] inint = new int[16];
+
+ for (i=0; i<16; i++) {
+ inint[i] = GET_32BIT_LSB_FIRST(in, 4*i);
+ }
+
+ int a, b, c, d;
+ a = buf[0];
+ b = buf[1];
+ c = buf[2];
+ d = buf[3];
+
+ a = MD5STEP1(a, b, c, d, inint[0] + 0xd76aa478, 7);
+ d = MD5STEP1(d, a, b, c, inint[1] + 0xe8c7b756, 12);
+ c = MD5STEP1(c, d, a, b, inint[2] + 0x242070db, 17);
+ b = MD5STEP1(b, c, d, a, inint[3] + 0xc1bdceee, 22);
+ a = MD5STEP1(a, b, c, d, inint[4] + 0xf57c0faf, 7);
+ d = MD5STEP1(d, a, b, c, inint[5] + 0x4787c62a, 12);
+ c = MD5STEP1(c, d, a, b, inint[6] + 0xa8304613, 17);
+ b = MD5STEP1(b, c, d, a, inint[7] + 0xfd469501, 22);
+ a = MD5STEP1(a, b, c, d, inint[8] + 0x698098d8, 7);
+ d = MD5STEP1(d, a, b, c, inint[9] + 0x8b44f7af, 12);
+ c = MD5STEP1(c, d, a, b, inint[10] + 0xffff5bb1, 17);
+ b = MD5STEP1(b, c, d, a, inint[11] + 0x895cd7be, 22);
+ a = MD5STEP1(a, b, c, d, inint[12] + 0x6b901122, 7);
+ d = MD5STEP1(d, a, b, c, inint[13] + 0xfd987193, 12);
+ c = MD5STEP1(c, d, a, b, inint[14] + 0xa679438e, 17);
+ b = MD5STEP1(b, c, d, a, inint[15] + 0x49b40821, 22);
+
+ a = MD5STEP2(a, b, c, d, inint[1] + 0xf61e2562, 5);
+ d = MD5STEP2(d, a, b, c, inint[6] + 0xc040b340, 9);
+ c = MD5STEP2(c, d, a, b, inint[11] + 0x265e5a51, 14);
+ b = MD5STEP2(b, c, d, a, inint[0] + 0xe9b6c7aa, 20);
+ a = MD5STEP2(a, b, c, d, inint[5] + 0xd62f105d, 5);
+ d = MD5STEP2(d, a, b, c, inint[10] + 0x02441453, 9);
+ c = MD5STEP2(c, d, a, b, inint[15] + 0xd8a1e681, 14);
+ b = MD5STEP2(b, c, d, a, inint[4] + 0xe7d3fbc8, 20);
+ a = MD5STEP2(a, b, c, d, inint[9] + 0x21e1cde6, 5);
+ d = MD5STEP2(d, a, b, c, inint[14] + 0xc33707d6, 9);
+ c = MD5STEP2(c, d, a, b, inint[3] + 0xf4d50d87, 14);
+ b = MD5STEP2(b, c, d, a, inint[8] + 0x455a14ed, 20);
+ a = MD5STEP2(a, b, c, d, inint[13] + 0xa9e3e905, 5);
+ d = MD5STEP2(d, a, b, c, inint[2] + 0xfcefa3f8, 9);
+ c = MD5STEP2(c, d, a, b, inint[7] + 0x676f02d9, 14);
+ b = MD5STEP2(b, c, d, a, inint[12] + 0x8d2a4c8a, 20);
+
+ a = MD5STEP3(a, b, c, d, inint[5] + 0xfffa3942, 4);
+ d = MD5STEP3(d, a, b, c, inint[8] + 0x8771f681, 11);
+ c = MD5STEP3(c, d, a, b, inint[11] + 0x6d9d6122, 16);
+ b = MD5STEP3(b, c, d, a, inint[14] + 0xfde5380c, 23);
+ a = MD5STEP3(a, b, c, d, inint[1] + 0xa4beea44, 4);
+ d = MD5STEP3(d, a, b, c, inint[4] + 0x4bdecfa9, 11);
+ c = MD5STEP3(c, d, a, b, inint[7] + 0xf6bb4b60, 16);
+ b = MD5STEP3(b, c, d, a, inint[10] + 0xbebfbc70, 23);
+ a = MD5STEP3(a, b, c, d, inint[13] + 0x289b7ec6, 4);
+ d = MD5STEP3(d, a, b, c, inint[0] + 0xeaa127fa, 11);
+ c = MD5STEP3(c, d, a, b, inint[3] + 0xd4ef3085, 16);
+ b = MD5STEP3(b, c, d, a, inint[6] + 0x04881d05, 23);
+ a = MD5STEP3(a, b, c, d, inint[9] + 0xd9d4d039, 4);
+ d = MD5STEP3(d, a, b, c, inint[12] + 0xe6db99e5, 11);
+ c = MD5STEP3(c, d, a, b, inint[15] + 0x1fa27cf8, 16);
+ b = MD5STEP3(b, c, d, a, inint[2] + 0xc4ac5665, 23);
+
+ a = MD5STEP4(a, b, c, d, inint[0] + 0xf4292244, 6);
+ d = MD5STEP4(d, a, b, c, inint[7] + 0x432aff97, 10);
+ c = MD5STEP4(c, d, a, b, inint[14] + 0xab9423a7, 15);
+ b = MD5STEP4(b, c, d, a, inint[5] + 0xfc93a039, 21);
+ a = MD5STEP4(a, b, c, d, inint[12] + 0x655b59c3, 6);
+ d = MD5STEP4(d, a, b, c, inint[3] + 0x8f0ccc92, 10);
+ c = MD5STEP4(c, d, a, b, inint[10] + 0xffeff47d, 15);
+ b = MD5STEP4(b, c, d, a, inint[1] + 0x85845dd1, 21);
+ a = MD5STEP4(a, b, c, d, inint[8] + 0x6fa87e4f, 6);
+ d = MD5STEP4(d, a, b, c, inint[15] + 0xfe2ce6e0, 10);
+ c = MD5STEP4(c, d, a, b, inint[6] + 0xa3014314, 15);
+ b = MD5STEP4(b, c, d, a, inint[13] + 0x4e0811a1, 21);
+ a = MD5STEP4(a, b, c, d, inint[4] + 0xf7537e82, 6);
+ d = MD5STEP4(d, a, b, c, inint[11] + 0xbd3af235, 10);
+ c = MD5STEP4(c, d, a, b, inint[2] + 0x2ad7d2bb, 15);
+ b = MD5STEP4(b, c, d, a, inint[9] + 0xeb86d391, 21);
+
+ buf[0] += a;
+ buf[1] += b;
+ buf[2] += c;
+ buf[3] += d;
+ }
+
+ private int GET_32BIT_LSB_FIRST(byte[] b, int off) {
+ return
+ ((int)(b[off+0]&0xff)) |
+ ((int)(b[off+1]&0xff) << 8) |
+ ((int)(b[off+2]&0xff) << 16) |
+ ((int)(b[off+3]&0xff) << 24);
+ }
+
+ private void PUT_32BIT_LSB_FIRST(byte[] b, int off, int value) {
+ b[off+0] = (byte) (value & 0xff);
+ b[off+1] = (byte) ((value >> 8) & 0xff);
+ b[off+2] = (byte) ((value >> 16)& 0xff);
+ b[off+3] = (byte) ((value >> 24)& 0xff);
+ }
+
+ // These are debug routines I was using while trying to
+ // get this code to generate the same hashes as the C version.
+ // (IIRC, all the errors were due to the absence of unsigned
+ // ints in Java.)
+ /*
+ private void debugStatus(String m) {
+ System.out.println(m+":");
+ System.out.println("in: "+dumpBytes(in));
+ System.out.println("bits: "+bits);
+ System.out.println("buf: "
+ +Integer.toHexString(buf[0])+" "
+ +Integer.toHexString(buf[1])+" "
+ +Integer.toHexString(buf[2])+" "
+ +Integer.toHexString(buf[3]));
+ }
+
+ private static String dumpBytes(byte[] bytes) {
+ int i;
+ StringBuffer sb = new StringBuffer();
+ for (i=0; i<bytes.length; i++) {
+ if (i%32 == 0 && i!=0) {
+ sb.append("\n");
+ }
+ String s = Integer.toHexString(bytes[i]);
+ if (s.length() < 2) {
+ s = "0"+s;
+ }
+ if (s.length() > 2) {
+ s = s.substring(s.length()-2);
+ }
+ sb.append(s);
+ }
+ return sb.toString();
+ }
+ */
+}
--- /dev/null
+/**
+ * This class is a container for all data contained in an Email Message.
+ **/
+public class Mail {
+
+ String header; // the full header
+ //String sentOn; // time the message was sent
+ //String receivedOn; // time when the message arrived
+ String from; // the "from" field
+ String to; // the "to" field
+ String cc;
+ String subject;
+ String body;
+ String noURLBody;
+ String sourceCode;
+ String spam;
+ boolean hasAttachement;
+ String encoding; //rich text, plain, html
+
+ String messageID; // cached message ID for reuse (takes a lot of memory and is used all over the place)
+ //same as hashcode of a class
+ boolean isSpam;
+
+ /**
+ * this is a really simple implementation of a tokenizer
+ * used to build tokens from an email and divide email into parts
+ **/
+ int MAX_TOKEN_SIZE;
+
+ public Mail() {
+ messageID=null;
+ }
+
+ public Mail(String fileName) // read a mail from file
+ {
+ //System.out.println("DEBUG: fileName= " + fileName);
+
+ BufferedReader fileinput = new BufferedReader(new FileInputStream(fileName));
+ String line;
+ boolean chk = false;
+
+ while((line = fileinput.readLine()) != null)
+ {
+ chk = true;
+
+ Vector splittedLine = line.split();
+ if(((String)(splittedLine.elementAt(0))).equals("Spam:"))
+ {
+ spam = (String)(splittedLine.elementAt(1));
+ }
+ else if(((String)(splittedLine.elementAt(0))).equals("Header:")) // message id
+ {
+ header = (String)splittedLine.elementAt(1);
+ }
+ else if(((String)(splittedLine.elementAt(0))).equals("To:")) // receiver
+ {
+ to = (String)splittedLine.elementAt(1);
+ }
+ else if(((String)(splittedLine.elementAt(0))).equals("From:")) // sender
+ {
+ from = (String)splittedLine.elementAt(1);
+ }
+ else if(((String)(splittedLine.elementAt(0))).equals("Cc:")) // cc
+ {
+ cc = (String)splittedLine.elementAt(1);
+ }
+ else if(((String)(splittedLine.elementAt(0))).equals("Subject:")) // Subject
+ {
+ subject = (String)splittedLine.elementAt(1);
+ break;
+ }
+ } // parsed messageID, To, from, cc, Title
+
+ /**
+ * error checking
+ **/
+ if(!chk)
+ System.out.println("no line read");
+
+
+ body = new String();
+ byte[] readBody = new byte[256];
+
+ while((fileinput.read(readBody)>0))
+ {
+ body += new String(readBody);
+ readBody = new byte[256];
+ }
+
+ fileinput.close();
+
+ MAX_TOKEN_SIZE = 1024;
+ }
+
+ // -------------------------------------------------------
+
+ public void setHeader(String header) {
+ this.header = header;
+ }
+
+ public String getHeader() {
+ return header;
+ }
+
+
+ /*
+ public void setSentOn(String sentOn) {
+ this.sentOn = sentOn;
+ }
+
+ public String getSentOn() {
+ return sentOn;
+ }
+
+ public Date getSentOnAsDate() {
+ String sentOn = getSentOn();
+ return parseDate(sentOn);
+ }
+
+ public void setReceivedOn(String receivedOn) {
+ this.receivedOn = receivedOn;
+ }
+
+ public String getReceivedOn() {
+ return receivedOn;
+ }
+
+ public Date getReceivedOnAsDate() {
+ String receivedOn = getReceivedOn();
+ return parseDate(receivedOn);
+ }
+ */
+
+
+ /**
+ * Parses a given Date-String in into a real Date-Object
+ *
+ * @param stringDate the string in format dd.mm.yyyy hh:mm
+ * @return a Date containing the info of the string or the actual date and time if something fails.
+ */
+ /*
+ public Date parseDate(String stringDate) {
+ // date is in this format: dd.mm.yyyy hh:mm
+ if (stringDate == null || "N/A".equals(stringDate)) {
+ return new Date();
+ }
+ try {
+ synchronized (MAIL_TIME_FORMAT) {
+ return MAIL_TIME_FORMAT.parse(stringDate);
+ }
+ } catch (Throwable e) {
+ return new Date();
+ }
+ }
+ */
+
+ public void setFrom(String from) {
+ this.from = from;
+ }
+
+ public String getFrom() {
+ return from;
+ }
+
+ public void setTo(String to) {
+ this.to = to;
+ }
+
+ public String getTo() {
+ return to;
+ }
+
+ public void setCc(String cc) {
+ this.cc = cc;
+ }
+
+ public String getCc() {
+ return cc;
+ }
+
+ public void setSubject(String subject) {
+ this.subject = subject;
+ }
+
+ public String getSubject() {
+ return subject;
+ }
+
+ public void setBody(String body) {
+ this.body = body;
+ }
+
+ public String getBody() {
+ return body;
+ }
+
+ public void setSourceCode(String sourceCode) {
+ this.sourceCode = sourceCode;
+ }
+
+ public String getSourceCode() {
+ return sourceCode;
+ }
+
+ public void setHasAttachement(boolean hasAttachement) {
+ this.hasAttachement = hasAttachement;
+ }
+
+ public boolean getHasAttachement() {
+ return hasAttachement;
+ }
+
+ public void setEncoding(String encoding) {
+ this.encoding = encoding;
+ }
+
+ public String getEncoding() {
+ return encoding;
+ }
+
+ public boolean isTextEncoding() {
+ return getEncoding().toLowerCase().indexOf("plain") >= 0;
+ }
+
+ public boolean isHTMLEncoding() {
+ return getEncoding().toLowerCase().indexOf("html") >= 0;
+ }
+
+ /*
+ public String toString() {
+ return getBody() + "," + getCc() + "," + getEncoding() + "," + getFrom() + "," + getHasAttachement() + "," + getHeader() + "," + getReceivedOn() + "," + getSentOn() + "," + getSourceCode() + "," + getSubject() + "," + getTo();
+ }
+ */
+
+ public String toString() {
+ return getBody() + "," + getCc() + "," + getEncoding() + "," + getFrom() + "," + getHasAttachement() + "," + getHeader() + "," + getSourceCode() + "," + getSubject() + "," + getTo();
+ }
+
+ /*
+ public String getID() {
+ if (messageID == null) { // no cached version
+ // Take the message-ID header as ID (if present)
+ String[] messageIDs = getHeaderField("Message-ID");
+ if ((messageIDs != null) && (messageIDs.length > 0)) {
+ messageID = messageIDs[0];
+ } else { // otherwise, hash header and body as ID
+ return String.valueOf(getHeader().hashCode() + getBody().hashCode());
+ }
+ }
+
+ return messageID;
+ }
+ */
+
+ public String[] getHeaderField(String fieldName) {
+
+ }
+
+ public String extractEMailAddress() {
+
+ }
+
+ /*
+ public boolean equals(Object o) {
+ if (o instanceof Mail) {
+ Mail mail = (Mail)o;
+ return this.getID().equals(mail.getID());
+ }
+
+ return false;
+ }
+ */
+
+ public Vector getCommonPart()
+ {
+ Vector returnStrings = new Vector();
+
+ // add header, sender, and title
+ returnStrings.addElement(header);
+ returnStrings.addElement(from);
+ returnStrings.addElement(subject);
+
+ return returnStrings;
+ }
+
+ public String getBodyString()
+ {
+ return body;
+ }
+
+ public Vector returnEmail() {
+ Vector myemail = new Vector();
+ myemail.addElement(getCommonPart());
+ //System.out.println("DEBUG: getCommonPart.size= " + getCommonPart().size());
+ myemail.addElement(getURLs());
+ //System.out.println("DEBUG: getURLs.size= " + getURLs().size());
+ myemail.addElement(getSplittedBody(MAX_TOKEN_SIZE));
+ //System.out.println("DEBUG: getSplittedBody.size= " + getSplittedBody(MAX_TOKEN_SIZE).size());
+ return myemail;
+ }
+
+ public Vector getURLs()
+ {
+ Vector returnStrings = new Vector();
+ Vector splittedBody = body.split();
+
+ // add URL and email in the body
+ for(int i=0; i<splittedBody.size(); i++)
+ {
+ String segment = (String)(splittedBody.elementAt(i));
+ if(segment.startsWith("http://")) // URL
+ {
+ returnStrings.addElement(segment);
+ }
+ else if(isEmailAccount(segment)) // email
+ {
+ returnStrings.addElement(segment);
+ }
+ }
+
+ return returnStrings;
+ }
+
+ // check if it is email account string
+ private boolean isEmailAccount(String str)
+ {
+ if(str.contains("@") && str.contains("."))
+ return true;
+ else
+ return false;
+ }
+
+ public void setNoURLBody()
+ {
+ Vector splittedBody = body.split();
+ int totalsize=0;
+ for(int i=0; i< splittedBody.size();i ++) {
+ String segment = (String)(splittedBody.elementAt(i));
+ if(!(segment.startsWith("http://") || isEmailAccount(segment)))
+ totalsize+=segment.length();
+ }
+
+ StringBuffer sb=new StringBuffer(totalsize);
+ for(int i=0; i< splittedBody.size();i ++) {
+ String segment = (String)(splittedBody.elementAt(i));
+ if(!(segment.startsWith("http://") || isEmailAccount(segment))) {
+ sb.append(segment);
+ }
+ }
+ noURLBody=sb.toString();
+ }
+
+ // setNoURLBody method has to be called before this method
+ // parameter : bytesize to split.
+ public Vector getSplittedBody(int size)
+ {
+ setNoURLBody();
+ Vector returnStrings = new Vector();
+ int end=noURLBody.length();
+
+ for(int i=1; i< end; i+=size)
+ {
+ if((i+size)>=end) {
+ String str=noURLBody.substring(i, end);
+ returnStrings.addElement(str);
+ }
+ else {
+ String str=noURLBody.substring(i, i+size);
+ returnStrings.addElement(str);
+ }
+ }
+ return returnStrings;
+ }
+
+
+ public void setIsSpam(boolean spam) {
+ isSpam = spam;
+ }
+
+ public boolean getIsSpam() {
+ if(spam.equals("yes"))
+ return true;
+ return false;
+ }
+
+ /**
+ * Returns result to the Spam filter
+ **/
+ public Vector checkMail(int userid) {
+ //Preprocess emails
+
+ //long startGetParts=System.currentTimeMillis();
+ Vector partsOfMailStrings = returnEmail();
+ //long stopGetParts=System.currentTimeMillis();
+ //System.out.println("Time to read email= " + (stopGetParts-startGetParts));
+
+ //Compute signatures
+ SignatureComputer sigComp = new SignatureComputer();
+ //Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings
+ //long startGetsignatures=System.currentTimeMillis();
+ Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of vector of strings
+ //long stopGetsignatures=System.currentTimeMillis();
+ //System.out.println("Time to Getsignatures= " + (stopGetsignatures-startGetsignatures));
+
+ return signatures;
+ }
+
+ /* For tests only */
+ /*
+ public static void main(String[] args)
+ {
+ Mail mail = new Mail("./emails/email1");
+
+ String[] a = mail.createMailStrings();
+
+ for(String b : a)
+ {
+ System.out.println(b);
+ }
+ }
+ */
+}
--- /dev/null
+public class SignatureComputer {
+ public EphemeralSignature sig4; //signature engines
+ public WhiplashSignature sig8; //signature engines
+
+ int[] enginesToUseForCheck;
+
+ public SignatureComputer() {
+ sig4 = new EphemeralSignature(); //default values
+ sig8 = new WhiplashSignature();
+ createEnginesToCheck();
+ }
+
+ /**
+ * constructor to be used when some parsing has already taken place with the
+ * server-provides value <code>randomNumberSeed</code>.
+ *
+ * @param randomNumberSeed
+ * a non-negative number used for seeding the random number generator
+ * before starting to hash values.
+ * @param separator
+ * how the mail-text should be splitted into lines. (== what chars
+ * separate 2 lines)
+ */
+ public SignatureComputer(int randomNumberSeed, String separator) {
+ sig4 = new EphemeralSignature(randomNumberSeed,separator);
+ sig8 = new WhiplashSignature();
+ createEnginesToCheck();
+ }
+
+ /**
+ * the constructor to be used most of the time. you can hand over the
+ * seed-string exactly as it is provided by the razor-server.
+ *
+ * @param seedAndSeparator
+ * a string containing the seed value for the RNG and a separator list
+ * (separated by ' <b>- </b>'). default value is
+ * <code>"7542-10"</code> which means server-seed 7542 and only one
+ * separator 10 (which is ascii '\n').
+ */
+ public SignatureComputer(String seedAndSeparator) {
+ sig4 = new EphemeralSignature(seedAndSeparator);
+ sig8 = new WhiplashSignature();
+ createEnginesToCheck();
+ }
+
+ /**
+ *
+ */
+ public void createEnginesToCheck() {
+ enginesToUseForCheck = new int[2];
+ enginesToUseForCheck[0] = 4; //Ephemeral engine
+ enginesToUseForCheck[1] = 8;//Whiplash engine
+ }
+
+ public boolean isSigSupported(int sig) {
+ boolean found = false;
+ for (int i = 0; i < enginesToUseForCheck.length && !found; i++) {
+ if (enginesToUseForCheck[i] == sig) {
+ found = true;
+ }
+ }
+ return found;
+ }
+
+ public boolean isSigSupported(String sig) {
+ return (sig != null && isSigSupported(Integer.parseInt(sig)));
+ }
+
+ public String getDefaultEngine() {
+ return "4";
+ }
+
+ public Vector computeSigs(Vector EmailParts) {
+ if (EmailParts == null) return null;
+
+ Vector printableSigs = new Vector(); // vector of strings
+
+ /**
+ * Step -I
+ * Get signatures for the common parts
+ **/
+
+ Vector commonpart = (Vector) (EmailParts.elementAt(0));
+ for (int mailIndex = 0; mailIndex < commonpart.size(); mailIndex++) {
+ String mail = (String) (commonpart.elementAt(mailIndex));
+
+ if (mail == null) continue;
+
+ /*
+ * Compute Sig for email header that are cleaned.
+ */
+ for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
+ int engineNo = enginesToUseForCheck[engineIndex];
+ String sig = null;
+
+ /* EphemeralSignature calculator */
+ if(engineNo==4) {
+ sig = computeSignature(engineNo,mail);
+ }
+
+ if(engineNo==8) {
+ continue;
+ }
+
+ if((engineNo!=4)) {
+ System.out.println("Err: Common part Couldn't find the signature engine: " + engineNo);
+ }
+
+ if (sig != null) {
+ String hash = engineNo + ":" + sig;
+ printableSigs.addElement(hash);
+
+ //System.out.println("DEBUG: mail= " +mail + " hash= " + hash);
+
+ } else {
+ // we didn't produce a signature for the mail.
+ }
+ }//engine
+ }//common part
+
+ /**
+ * Step -II
+ * Get signatures for the body parts without URLs
+ **/
+ Vector getBodywithNoURLs = (Vector)(EmailParts.elementAt(2));
+ for (int mailIndex = 0; mailIndex < getBodywithNoURLs.size(); mailIndex++) {
+ String mail = (String) (getBodywithNoURLs.elementAt(mailIndex));
+
+
+ if (mail == null) continue;
+
+ /*
+ * Compute Sig for email header that are cleaned.
+ */
+ for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
+ int engineNo = enginesToUseForCheck[engineIndex];
+ String sig = null;
+
+ /* EphemeralSignature calculator */
+ if(engineNo==4) {
+ sig = computeSignature(engineNo,mail);
+ }
+
+ if(engineNo==8)
+ continue;
+
+ if(engineNo!=4) {
+ System.out.println("Err: body parts without URL Couldn't find the signature engine: " + engineNo);
+ }
+
+ if (sig != null) {
+ String hash = engineNo + ":" + sig;
+ printableSigs.addElement(hash);
+ } else {
+ // we didn't produce a signature for the mail.
+ }
+ }//engine
+ }
+
+ /**
+ * Step -III
+ * Get signatures for the body parts with URLs
+ **/
+ Vector getURLs = (Vector)(EmailParts.elementAt(1));
+ for (int mailIndex = 0; mailIndex < getURLs.size(); mailIndex++) {
+ String mail = (String) (getURLs.elementAt(mailIndex));
+
+ /*
+ * Compute Sig for bodyparts that are cleaned.
+ */
+ for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
+ int engineNo = enginesToUseForCheck[engineIndex];
+ if(engineNo==4)
+ continue;
+
+ /* WhiplashSignature calculator */
+ String[] hosts = null;
+ String sig = null;
+ if(engineNo==8) {
+ //hosts = computeSignature(engineNo,mail);
+ hosts = sig8.computeSignature(mail);
+ if(hosts != null) {
+ for(int i=0; i<hosts.length; i++) {
+ sig = hosts[i];
+ //sig = (String) (hosts.elementAt(i));
+ String hash = engineNo + ":" + sig;
+ printableSigs.addElement(hash);
+ }
+ } else {
+ // we didn't produce a signature for the mail.
+ }
+ }
+
+ if(engineNo!=8) {
+ System.out.println("Err: body parts with URL Couldn't find the signature engine: " + engineNo);
+ }
+
+ /*
+ if (sig != null) {
+ String hash = engineNo + ":" + sig;
+ printableSigs.addElement(hash);
+ } else {
+ we didn't produce a signature for the mail.
+ }
+ */
+ }//engine
+ }
+
+ // OLD IMPLEMENTATION
+//
+// for (int mailIndex = 0; mailIndex < EmailParts.size(); mailIndex++) {
+// String mail = (String) (EmailParts.elementAt(mailIndex));
+//
+// if (mail == null) continue;
+//
+// /*
+// * Compute Sig for bodyparts that are cleaned.
+// */
+// for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
+// int engineNo = enginesToUseForCheck[engineIndex];
+// String sig = null;
+//
+// /* EphemeralSignature calculator */
+// if(engineNo==4) {
+// sig = computeSignature(engineNo,mail);
+// if(sig != null) {
+// String hash = engineNo + ":" + sig;
+// printableSigs.addElement(hash);
+// } else {
+// // we didn't produce a signature for the mail.
+// }
+// }
+//
+// /* WhiplashSignature calculator */
+// String[] hosts = null;
+// if(engineNo==8) {
+// //hosts = computeSignature(engineNo,mail);
+// hosts = sig8.computeSignature(mail);
+// if(hosts != null) {
+// for(int i=0; i<hosts.length; i++) {
+// sig = hosts[i];
+// //sig = (String) (hosts.elementAt(i));
+// String hash = engineNo + ":" + sig;
+// printableSigs.addElement(hash);
+// }
+// } else {
+// // we didn't produce a signature for the mail.
+// }
+// }
+//
+// if(engineNo!=4 || engineNo!=8) {
+// System.out.println("Err: Couldn't find the signature engine: " + engineNo);
+// }
+//
+// /*
+// if (sig != null) {
+// String hash = engineNo + ":" + sig;
+// printableSigs.addElement(hash);
+// } else {
+// // we didn't produce a signature for the mail.
+// }
+// */
+// }//engine
+// }//each emails part
+ return printableSigs;
+ }//computeSigs
+
+ /**
+ * @param engineNo
+ * @param email
+ * @return
+ */
+ private String computeSignature(int engineNo, String mail) {
+ if(engineNo==4) {
+ //String s1 = this.sig4.computeSignature(mail);
+ return this.sig4.computeSignature(mail);
+ //return new String { this.sig4.computeSignature(mail) };
+ }
+
+ /*
+ if(engineNo==8) {
+ //String cleanedButKeepHTML = Preprocessor.preprocess(mail,Preprocessor.ConfigParams.NO_DEHTML);
+ //return this.sig8.computeSignature(cleanedButKeepHTML);
+ return this.sig8.computeSignature(mail);
+ }
+ */
+ return null;
+ }
+}
--- /dev/null
+public class SpamFilter extends Thread {
+ DistributedHashMap mydhmap;
+
+ int id; //thread id
+
+ /**
+ * Total number of iterations
+ **/
+ int numiter;
+
+ /**
+ * Total number of emails
+ **/
+ int numemail;
+
+ /**
+ * Total number of threads
+ **/
+ int nthreads;
+
+ public SpamFilter() {
+
+ }
+
+ public SpamFilter(int numiter, int numemail,int id, DistributedHashMap mydhmap, int nthreads) {
+ this.numiter=numiter;
+ this.numemail=numemail;
+ this.id = id;
+ this.mydhmap = mydhmap;
+ this.nthreads = nthreads;
+ }
+
+ public void run() {
+ int niter;
+ int nemails;
+ int thid;
+ int correct=0;
+ int wrong=0;
+
+ atomic {
+ niter=numiter;
+ nemails=numemail;
+ thid = id;
+ }
+
+ Random rand = new Random(thid);
+ int i;
+
+ for(i=0; i<niter; i++) {
+ correct =0;
+ wrong = 0;
+ for(int j=0; j<nemails; j++) {
+ // long start = System.currentTimeMillis();
+ int pickemail = rand.nextInt(100);
+
+// System.out.println("pickemail= " + pickemail);
+
+ // randomly pick emails
+ pickemail+=1;
+ Mail email = new Mail("../emails/email"+pickemail);
+ Vector signatures = email.checkMail(thid);
+
+ //check with global data structure
+ int[] confidenceVals=null;
+ // long startcheck = System.currentTimeMillis();
+ atomic {
+ confidenceVals = check(signatures,thid);
+ }
+ // long stopcheckMail = System.currentTimeMillis();
+ // long diff = (stopcheckMail-startcheck);
+ // System.out.println("check takes= " + diff + "millisecs");
+
+ /* Only for debugging
+ for(int k=0; k<signatures.size();k++) {
+ System.out.println("confidenceVals["+k+"]= "+confidenceVals[k]);
+ }
+ */
+
+ //---- create and return results --------
+ FilterResult filterResult = new FilterResult();
+ //long startgetResult = System.currentTimeMillis();
+ boolean filterAnswer = filterResult.getResult(confidenceVals);
+ //long stopgetResult = System.currentTimeMillis();
+ //diff = (stopgetResult-startgetResult);
+ //System.out.println("getResult takes= " + diff + "millisecs");
+
+ //---- get user's take on email and send feedback ------
+ boolean userAnswer = email.getIsSpam();
+
+// System.out.println("userAnswer= " + userAnswer + " filterAnswer= " + filterAnswer);
+
+ if(filterAnswer != userAnswer) {
+ /* wrong answer from the spam filter */
+ wrong++;
+ //long startsendFeedBack = System.currentTimeMillis();
+ atomic {
+ sendFeedBack(signatures, userAnswer, thid, rand);
+ }
+ //long stopsendFeedBack = System.currentTimeMillis();
+ //diff = (stopsendFeedBack-startsendFeedBack);
+ //System.out.println("sendFeedback takes= " + diff + "millisecs");
+ }
+ else {
+ /* Correct answer from the spam filter */
+ correct++;
+ }
+ //long stop = System.currentTimeMillis();
+ //diff = stop-start;
+// System.out.println("time to complete iteration" + j + " = " + diff + " millisecs");
+ } //end num emails
+ System.out.println((i+1)+"th iteration correct = " + correct + " Wrong = " + wrong + " percentage = " + ((float)correct/(float)nemails));
+ }//end num iter
+ // Sanity check
+// System.out.println((i)+"th iteration correct = " + correct + " Wrong = " + wrong + " percentage = " + ((float)correct/(float)nemails));
+ System.out.println("\n\n\n I'm Done\n\n\n");
+
+ RecoveryStat.printRecoveryStat();
+ }
+
+ public static void main(String[] args) {
+ int[] mid = new int[8];
+ mid[0] = (128<<24)|(195<<16)|(180<<8)|21; //dw-2
+ mid[1] = (128<<24)|(195<<16)|(180<<8)|26; //dw-7
+/*
+ mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dc-1.calit2
+ mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dc-2.calit2
+ mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dc-3.calit2
+ mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc-4.calit2
+ mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dc-5.calit2
+ mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dc-6.calit2
+ mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dc-7.calit2
+ mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dc-8.calit2
+*/
+ //Read options from command prompt
+ SpamFilter sf = new SpamFilter();
+ SpamFilter.parseCmdLine(args, sf);
+ int nthreads = sf.nthreads;
+
+ //Create Global data structure
+ DistributedHashMap dhmap;
+ SpamFilter[] spf;
+ atomic {
+ dhmap = global new DistributedHashMap(500, 0.75f);
+ }
+ atomic {
+ spf = global new SpamFilter[nthreads];
+ for(int i=0; i<nthreads; i++) {
+ spf[i] = global new SpamFilter(sf.numiter, sf.numemail, i, dhmap, nthreads);
+ }
+ }
+
+ /* ---- Start Threads ---- */
+ SpamFilter tmp;
+ for(int i = 0; i<nthreads; i++) {
+ atomic {
+ tmp = spf[i];
+ }
+ tmp.start(mid[i]);
+ }
+
+ /* ---- Join threads----- */
+ for(int i = 0; i<nthreads; i++) {
+ atomic {
+ tmp = spf[i];
+ }
+ tmp.join();
+ }
+
+ System.out.println("Finished");
+ }
+
+ public static void parseCmdLine(String args[], SpamFilter sf) {
+ int i = 1;
+
+ sf.nthreads = new Integer(args[0]).intValue();
+
+
+ String arg;
+ while (i < args.length && args[i].startsWith("-")) {
+ arg = args[i++];
+ //check options
+ if(arg.equals("-n")) { //num of iterations
+ if(i < args.length) {
+ sf.numiter = new Integer(args[i++]).intValue();
+ }
+ } else if(arg.equals("-e")) { //num of emails
+ if(i < args.length) {
+ sf.numemail = new Integer(args[i++]).intValue();
+ }
+ }
+
+ /*else if(arg.equals("-t")) { //num of threads
+ if(i < args.length) {
+ sf.nthreads = new Integer(args[i++]).intValue();
+ }
+ }
+ */
+ else if(arg.equals("-h")) {
+ sf.usage();
+ }
+ }
+ if(sf.nthreads == 0) {
+ sf.usage();
+ }
+ }
+
+ /**
+ * The usage routine describing the program
+ **/
+ public void usage() {
+ System.out.println("usage: ./spamfilter <num thread> -n <num iterations> -e <num emails>\n");
+ System.out.println( " -n : num iterations");
+ System.out.println( " -e : number of emails");
+ }
+
+ /**
+ * Returns result to the Spam filter
+ **/
+ /*
+ public boolean checkMail(Mail mail, int userid) {
+ //Preprocess emails
+ //Vector partsOfMailStrings = mail.createMailStringsWithURL();
+ /*
+ Vector partsOfMailStrings = mail.getCommonPart();
+ partsOfMailStrings.addElement(mail.getBodyString());
+
+ //Compute signatures
+ SignatureComputer sigComp = new SignatureComputer();
+ Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings
+
+ //check with global data structure
+ int[] confidenceVals = check(signatures,userid);
+
+ //---- create and return results --------
+ FilterResult filterResult = new FilterResult();
+ boolean spam = filterResult.getResult(confidenceVals);
+
+ return spam;
+ }
+ */
+
+ public int[] check(Vector signatures, int userid) {
+ int numparts = signatures.size();
+
+ //System.out.println("check() numparts= " + numparts);
+
+ int[] confidenceVals = new int[numparts];
+ for(int i=0; i<numparts; i++) {
+ String part = (String)(signatures.elementAt(i));
+ char tmpengine = part.charAt(0);
+ GString engine=null;
+ if(tmpengine == '4') { //Ephemeral Signature calculator
+ String tmpstr = new String("4");
+ engine = global new GString(tmpstr);
+ }
+ if(tmpengine == '8') { //Whiplash Signature calculator
+ String tmpstr = new String("8");
+ engine = global new GString(tmpstr);
+ }
+
+ //System.out.println("check(): engine= " + engine.toLocalString());
+
+ String str = new String(part.substring(2));//a:b index of a =0, index of : =1, index of b =2
+ GString signature = global new GString(str);
+ HashEntry myhe = global new HashEntry();
+ myhe.setengine(engine);
+ myhe.setsig(signature);
+
+ //find object in distributedhashMap: if no object then add object
+ if(!mydhmap.containsKey(myhe)) {
+ //add new object
+ HashStat mystat = global new HashStat();
+ mystat.setuser(userid, 0, 0, -1);
+ myhe.setstats(mystat);
+ FilterStatistic fs = global new FilterStatistic(0,0,-1);
+ mydhmap.put(myhe, fs);
+ confidenceVals[i] = 0;
+ } else { //read exsisting object
+ // ----- now connect to global data structure and ask for spam -----
+ HashEntry tmphe = (HashEntry)(mydhmap.getKey(myhe));
+ FilterStatistic fs = (FilterStatistic) (mydhmap.get(tmphe)); //get the value from hash
+
+ //System.out.println(fs.toString()+"\n");
+
+ confidenceVals[i] = fs.getChecked();
+ }
+ }
+
+ // --> the mail client is able to determine if it is spam or not
+ // --- According to the "any"-logic (in Core#check_logic) in original Razor ---
+ // If any answer is spam, the entire email is spam.
+ return confidenceVals;
+ }
+
+ /**
+ * This method sends feedback from the user to a distributed
+ * spam database and trains the spam database to check future
+ * emails and detect spam
+ **/
+ public void sendFeedBack(Vector signatures, boolean isSpam, int id, Random myrand) {
+
+ for(int i=0;i<signatures.size();i++) {
+ String part = (String)(signatures.elementAt(i));
+ //
+ // Signature is of form a:b
+ // where a = string representing a signature engine
+ // either "4" or "8"
+ // b = string representing signature
+ //
+ char tmpengine = part.charAt(0); //
+
+ GString engine=null;
+
+ if(tmpengine == '4') {
+ String tmpstr = new String("4");
+ engine = global new GString(tmpstr);
+ }
+
+ if(tmpengine == '8') {
+ String tmpstr = new String("8");
+ engine = global new GString(tmpstr);
+ }
+
+ //System.out.println("sendFeedBack(): engine= " + engine.toLocalString());
+
+ String tmpsig = new String(part.substring(2));
+ GString signature = global new GString(tmpsig);
+
+ //System.out.println("sendFeedBack(): signature= " + signature.toLocalString());
+
+ HashEntry myhe = global new HashEntry();
+ myhe.setengine(engine);
+ myhe.setsig(signature);
+
+ // ----- now connect to global data structure and update stats -----
+ if(mydhmap.containsKey(myhe)) {
+ HashEntry tmphe = (HashEntry)(mydhmap.getKey(myhe));
+
+
+ if(tmphe.stats.userid[id] != 1) {
+ tmphe.stats.setuserid(id);
+ }
+
+ //---- get value from distributed hash and update spam count
+ FilterStatistic fs = (FilterStatistic) (mydhmap.get(myhe));
+
+ //System.out.println(fs.toString());
+
+ //Allow users to give incorrect feedback
+ int pickemail = myrand.nextInt(100);
+ /* Randomly allow user to provide incorrect feedback */
+ if(pickemail < 95) {
+ //give correct feedback 95% of times
+ //Increment spam or ham value
+ if(isSpam) {
+ tmphe.stats.incSpamCount(id);
+ fs.increaseSpam();
+ } else {
+ tmphe.stats.incHamCount(id);
+ fs.increaseHam();
+ }
+ } else {
+ // Give incorrect feedback 5% of times
+ if(isSpam) {
+ tmphe.stats.incHamCount(id);
+ fs.increaseHam();
+ } else {
+ tmphe.stats.incSpamCount(id);
+ fs.increaseSpam();
+ }
+ } //end of pickemail
+ }//end of if
+ }//end of for
+ }//end of sendFeeback()
+}
+
+
--- /dev/null
+
+/*
+ Part of the Spamato project (www.spamato.net)
+ Copyright (C) 2005 ETHZ, DCG
+ contact by email: info@spamato.net
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+ $Id: WhiplashSignature.java,v 1.1 2010/02/13 00:09:44 jihoonl Exp $
+ */
+public class WhiplashSignature {
+ char[] b64table;
+
+ public WhiplashSignature() {
+ b64table = new char[64];
+
+ for (int i= 0; i <= 25; i++) {
+ b64table[i] = (char) ((i + 65) & 0xff);
+ }
+ for (int i= 26; i <= 51; i++) {
+ b64table[i] = (char) ((i + 71) & 0xff);
+ }
+ for (int i= 52; i <= 61; i++) {
+ b64table[i] = (char) ((i - 4) & 0xff);
+ }
+ b64table[62]= '-';
+ b64table[63]= '_';
+ }
+
+ public String[] computeSignature(String text) {
+
+ //System.out.println("Inside computeSignature");
+ //Current: Simplify the host extraction and signature computation
+ String[] sigs = whiplash(text);
+ // TODO: Extract canonical domain name and convert to Base64
+ /*
+ if(sigs != null) {
+ for(int i = 0; i<sigs.length; i++) {
+ sigs[i] = hexToBase64(sigs[i]);
+ System.out.println("sigs[i]= " + sigs[i]);
+ }
+ }
+ */
+ return sigs;
+ }
+
+ /**
+ * converts a hex-string in a base64-string exactly as it is done in razor.
+ * @param hex a hex-value
+ * @return a base64-equivalent of <code>hex</code>.
+ */
+ public String hexToBase64(String hex){
+ if(hex == null)
+ return null;
+ int[] b64s = new int[hex.length()*2/3 + ((hex.length()*2)%3)];
+ int i=0;
+ int b64count = 0;
+
+ while(i < hex.length()){
+ //process 3 hex char chunks at a time
+ int upperBorder = Math.imin(i+3,hex.length());
+ String hex3 = hex.substring(i,upperBorder);
+ i+=3;
+
+ int bv = convertHexToRazorEncoding(hex3);
+ //now the right endian encoding
+ b64s[b64count++] = ((0xfc0 & bv)>>>6); //higher 6 bits
+ b64s[b64count++] = (0x3f & bv) ; //lower 6 bits
+
+ }
+ String bs = "";
+ for (int j= 0; j < b64s.length; j++) {
+ bs += b64table[ b64s[j] ];
+ }
+ return bs;
+ }
+
+ /**
+ * razor does some special conversion using perl's <code>pack()</code> which
+ * we must do manually in java.
+ */
+ private int convertHexToRazorEncoding(String hex3) {
+ if((hex3 == null))
+ return 0; //error
+ int res = 0;
+ int cur = Integer.parseInt(hex3.substring(0,1),16);
+ cur = mirror4LSBits(cur);
+ res |= ( (cur&0xf) << 8);
+ if(hex3.length() >=2) {
+ cur = Integer.parseInt(hex3.substring(1,2),16);
+ } else {
+ cur = 0;
+ }
+ //cur = ( hex3.length() >=2 ? Integer.parseInt(hex3.substring(1,2),16) : 0);
+ cur = mirror4LSBits(cur);
+ res |= ((cur & 0xf) << 4);
+ if(hex3.length() >= 3) {
+ cur = Integer.parseInt(hex3.substring(2,3),16);
+ } else {
+ cur = 0;
+ }
+ //cur = ( hex3.length() >= 3 ? Integer.parseInt(hex3.substring(2,3),16): 0);
+ cur = mirror4LSBits(cur);
+ res |= (cur & 0xf);
+
+ return res;
+ }
+
+ /**
+ * mirrors the 4 least significant bytes of an integer
+ * @param cur an int containing 4 Least Singificant bytes like <code>00000...00abcd</code>
+ * @return the mirrored 4 least significant bytes <code>00000...00dcba</code>. all bits except <code>a-b</code> are lost.
+ */
+ public int mirror4LSBits(int cur) {
+ int res = 0;
+ res |= (cur & 0x8)>>>3;
+ res |= (cur & 0x4)>>>1;
+ res |= (cur & 0x2)<<1;
+ res |= (cur & 0x1)<<3;
+ return res;
+ }
+
+ public String[] whiplash(String text) {
+
+ if (text == null) {
+ return null;
+ }
+ String[] hosts = extractHosts(text);
+ if (hosts == null || hosts.length < 1) {
+ return null;
+ }
+ String[] sigs = new String[hosts.length];
+
+ for (int i = 0; i < hosts.length; i++) {
+ MD5 md = new MD5();
+ String host = hosts[i];
+ int len = host.length();
+ byte buf[] = host.getBytes();
+ byte sig[] = new byte[16];
+ md.update(buf, len);
+ md.md5final(sig);
+ String signature = new String(sig);
+
+ // System.out.println("DEBUG: host= " + host + " whiplash sig= " + signature);
+
+ sigs[i] = signature;
+ }
+ return sigs;
+ }
+
+ public String[] extractHosts(String text) {
+ //System.out.println("Inside extractHosts");
+ Vector hosts = new Vector();
+ String buf = new String(text);
+
+ //System.out.println("DEBUG: extractHosts() string= " + buf);
+
+ /* Extract hosts from http:// links */
+ int idx;
+ String strwww = new String("www.");
+ while ((idx = buf.indexOf(strwww)) != -1) {
+ int startidx = idx + strwww.length();
+ String strcom = new String(".");
+ buf = buf.subString(startidx);
+ int endidx = buf.indexOf(strcom);
+ String host = buf.subString(0, endidx);
+ //System.out.println("DEBUG: http links extracted host= " + host);
+
+ buf = buf.subString(endidx);
+ endidx = buf.indexOf(strcom);
+ host += buf.subString(0, endidx);
+
+ hosts.addElement(host);
+ buf = buf.subString(endidx+strcom.length());
+ }
+
+ /* Extract hosts from email addressess */
+ buf = new String(text);
+ String strrate = new String("@");
+ while ((idx = buf.indexOf(strrate)) != -1) {
+ int startidx = idx + strrate.length();
+ String strdot = new String(".");
+ buf = buf.subString(startidx);
+ int endidx = buf.indexOf(strdot);
+ String host = buf.subString(0, endidx);
+ //System.out.println("DEBUG: email addr extracted host= " + host);
+
+ buf = buf.subString(endidx);
+ endidx = buf.indexOf(strdot);
+ host += buf.subString(0, endidx);
+
+ hosts.addElement(host);
+ buf = buf.subString(endidx+strdot.length());
+ }
+
+ if (hosts.size() == 0) {
+ return null;
+ }
+
+ String[] retbuf = new String[hosts.size()];
+ for (int i = 0; i < hosts.size(); i++) {
+ retbuf[i] = (String) (hosts.elementAt(i));
+ }
+
+ return retbuf;
+ }
+
+// Testing the signature computation
+// public static void main(String[] args) {
+// /* String testVector = " Test Vectors: \n"+
+// "\n" +
+// "1. http:www.nodg.com@www.geocities.com/nxcisdsfdfdsy/off\n"+
+// "2. http:www.ksleybiuh.com@213.171.60.74/getoff/\n"+
+// "3. <http:links.verotel.com/cgi-bin/showsite.verotel?vercode=12372:9804000000374206>\n"+
+// "4. http:217.12.4.7/rmi/http:definethis.net/526/index.html\n"+
+// "5. http:magalygr8sex.free-host.com/h.html\n"+
+// "6. http:%3CVenkatrs%3E@218.80.74.102/thecard/4index.htm\n"+
+// "7. http:EBCDVKIGURGGCEOKXHINOCANVQOIDOXJWTWGPC@218.80.74.102/thecard/5in\n"+
+// "8. http:g.india2.bag.gs/remove_page.htm\n"+
+// "9. https:220.97.40.149\n"+
+// "10. http:mjaked.biz/unsubscribe.ddd?leaving\n"+
+// "11. http:g5j99m8@it.rd.yahoo.com/bassi/*http:www.lekobas.com/c/index.php\n"+
+// "12. <a href=\"http:Chettxuydyhv vwyyrcmgbxzj n as ecq kkurxtrvaug nfsygjjjwhfkpaklh t a qsc exinscfjtxr\n"+
+// " jobg @www.mmv9.org?affil=19\">look great / feel great</a>\n"+
+// "13. <A HREF=\"http:href=www.churchwomen.comhref=www.cairn.nethref=www.teeter.orghref=www.lefty.bizhref=wwwbehold.pitfall@www.mmstong5f.com/host/index.asp?ID=01910?href=www.corrode.comhref=www.ode.nethref=www.clergy.orghref=www.aberrate.biz\" >\n"+
+// "14. www.pillzthatwork.com # anything that starts with www. \n";
+// */
+// String testVector = "<html>\n"+
+// "<body>\n"+
+// "<p>Our first autolink: www.autolink1.com or another link like www.autolink2.co.uk or how about https:plaintextlink1.co.uk or http:plaintextlink2.com</p>\n"+
+// "<p>now a masked link <a href=\"http://www.hiddenlink1.com\">http://www.coveringlink1.com</a> and another link http:plaintextlink3.net and how about https:plaintextlink4.to</p>\n"+
+// "<p>another masked link <A Href=\"http://www.hiddenlink2.com\">https:coveringlink2.com</A> and another link https:plaintextlink5.com</p>\n"+
+// "</body>\n"+
+// "</html>\n";
+// String test1 = "Our first autolink: www.autolink1.com or another link like www.autolink2.co.uk or how about https:plaintextlink1.co.uk or http:plaintextlink2.com</p>\n";
+// WhiplashSignature whiplash = new WhiplashSignature();
+// String[] hosts = whiplash.computeSignature(testVector);
+// //String[] hosts = whiplash.computeSignature(test1);
+// for (int i = 0; i < hosts.length; i++) {
+// String string = hosts[i];
+// System.out.println("host " + i + ":\t" + string);
+// }
+// }
+
+}
--- /dev/null
+128.195.180.21
+128.195.180.26
+#128.195.136.162
+#128.195.136.163
+#128.195.136.164
+#128.195.136.165
+#128.195.136.166
+#128.195.136.167
+#128.195.136.168
+#128.195.136.169
+
+
--- /dev/null
+MAINCLASS=SpamFilter
+SRC=${MAINCLASS}.java \
+ DistributedHashMap.java \
+ Mail.java \
+ FilterResult.java \
+ HashEntry.java \
+ HashStat.java \
+ SignatureComputer.java \
+ FilterStatistic.java \
+ EphemeralSignature.java \
+ GString.java \
+ WhiplashSignature.java
+
+FLAGS= -dsm -recoverystats -recovery -nooptimize -mainclass ${MAINCLASS}
+
+default:
+ ../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC}
+
+clean:
+ rm -rf tmpbuilddirectory*
+ rm *.bin
+++ /dev/null
-public class GlobalQuery {
- GlobalString hostname;
- GlobalString path;
- int depth;
-
- public GlobalQuery(GlobalString hostname, GlobalString path) {
- this.hostname = hostname;
- this.path = path;
- this.depth = 0;
- }
-
- public GlobalQuery(GlobalString hostname, GlobalString path, int depth) {
- this.hostname = global new GlobalString(hostname);
- this.path = global new GlobalString(path);
- this.depth = depth;
- }
-
- public int getDepth() {
- return depth;
- }
-
- public GlobalString getHostName() {
- return hostname;
- }
-
- public GlobalString getPath() {
- return path;
- }
-
- public GlobalString makewebcanonical(GlobalString page) {
- GlobalStringBuffer b = global new GlobalStringBuffer(getHostName(page));
- b.append("/");
- b.append(getPathName(page));
- return b.toGlobalString();
- }
-
- public GlobalString getHostName(GlobalString page) {
- GlobalString http = global new GlobalString("http://");
- GlobalString https = global new GlobalString("https://");
- int beginindex;
- int endindex;
-
- if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
- return getHostName();
- }
- else if (page.indexOf(https) != -1) {
- beginindex = page.indexOf(https) + https.length();
- }
- else {
- beginindex = page.indexOf(http) + http.length();
- }
- endindex = page.indexOf('/',beginindex+1);
-
- if ((beginindex == -1)) {
- System.printString("ERROR");
- }
- if (endindex == -1)
- endindex = page.length();
-
- return page.subString(beginindex, endindex);
- }
-
-
- public GlobalString getPathName(GlobalString page) {
- GlobalString http = global new GlobalString("http://");
- GlobalString https = global new GlobalString("https://");
- int beginindex;
- int nextindex;
-
- if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
- GlobalString path = getPath();
- int lastindex = path.lastindexOf('/');
- if (lastindex == -1)
- return page;
-
- GlobalStringBuffer sb = global new GlobalStringBuffer(path.subString(0,lastindex+1));
- sb.append(page);
- return sb.toGlobalString();
- }
- else if (page.indexOf(https) != -1) {
- beginindex = page.indexOf(https) + https.length();
- }
- else {
- beginindex = page.indexOf(http) + http.length();
- }
- nextindex = page.indexOf('/',beginindex+1);
-
- if ((beginindex == -1) || (nextindex == -1))
- return global new GlobalString("index.html");
- return page.subString(nextindex+1, page.length());
- }
-}
+++ /dev/null
-public class LocalQuery {
- String hostname;
- String path;
- StringBuffer response;
- int depth;
-
- public LocalQuery(String hostname, String path, int depth) {
- this.hostname = new String(hostname);
- this.path = new String(path);
- response = new StringBuffer();
- this.depth = depth;
- }
-
- public int getDepth() {
- return depth;
- }
-
- public String getHostName() {
- return hostname;
- }
-
- public String getPath() {
- return path;
- }
-
- public void outputFile() {
- StringBuffer sb = new StringBuffer(hostname);
- sb.append(path);
- FileOutputStream fos = new FileOutputStream(sb.toString().replace('/','#'));
- fos.write(response.toString().getBytes());
- fos.close();
- }
-
- public String makewebcanonical(String page) {
- StringBuffer b = new StringBuffer(getHostName(page));
- b.append("/");
- b.append(getPathName(page));
- return b.toString();
- }
-
- public String getHostName(String page) {
- String http = new String("http://");
- String https = new String("https://");
- int beginindex;
- int endindex;
-
- if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
- return getHostName();
- }
- else if (page.indexOf(https) != -1) {
- beginindex = page.indexOf(https) + https.length();
- }
- else {
- beginindex = page.indexOf(http) + http.length();
- }
- endindex = page.indexOf('/',beginindex+1);
-
- if ((beginindex == -1)) {
- System.printString("ERROR");
- }
- if (endindex == -1)
- endindex = page.length();
-
- return page.subString(beginindex, endindex);
- }
-
- public String getPathName(String page) {
- String http = new String("http://");
- String https = new String("https://");
- int beginindex;
- int nextindex;
-
- if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
- String path = getPath();
- int lastindex = path.lastindexOf('/');
- if (lastindex == -1)
- return page;
-
- StringBuffer sb = new StringBuffer(path.subString(0,lastindex+1));
- sb.append(page);
- return sb.toString();
- }
- else if (page.indexOf(https) != -1) {
- beginindex = page.indexOf(https) + https.length();
- }
- else {
- beginindex = page.indexOf(http) + http.length();
- }
- nextindex = page.indexOf('/',beginindex+1);
-
- if ((beginindex==-1) || (nextindex==-1))
- return new String("index.html");
- return page.subString(nextindex+1, page.length());
- }
-}
+++ /dev/null
-public class QueryTask extends Task {
- int maxDepth;
- int maxSearchDepth;
- GlobalQueue toprocess;
- DistributedHashMap results;
- DistributedLinkedList results_list;
- DistributedHashMap visitedList;
- GlobalString gTitle;
- GlobalString workingURL;
-
- public QueryTask(GlobalQueue todoList, DistributedHashMap visitedList, int maxDepth, int maxSearchDepth, DistributedHashMap results, DistributedLinkedList results_list) {
- this.todoList = todoList;
- this.visitedList = visitedList;
- this.maxDepth = maxDepth;
- this.maxSearchDepth = maxSearchDepth;
- this.results = results;
- this.results_list = results_list;
- toprocess = global new GlobalQueue();
- }
-
- public void execute() {
- int depth;
- int max;
- int maxSearch;
-
- atomic {
- depth = ((GlobalQuery)myWork).getDepth();
- max = this.maxDepth;
- maxSearch = this.maxSearchDepth;
- }
-
- if (depth < max) {
- /* global variables */
- GlobalQuery gq;
-
- /* local variables */
- LocalQuery lq;
- String hostname;
- String path;
- String title;
-
- atomic {
- gq = (GlobalQuery)myWork;
- hostname = new String(GlobalString.toLocalCharArray(gq.getHostName()));
- path = new String(GlobalString.toLocalCharArray(gq.getPath()));
-
- GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
- gsb.append("/");
- gsb.append(path);
- workingURL = global new GlobalString(gsb.toGlobalString());
- gTitle = null;
- }
- lq = new LocalQuery(hostname, path, depth);
-
-/* System.printString("["+lq.getDepth()+"] ");
- System.printString("Processing - Hostname : ");
- System.printString(hostname);
- System.printString(", Path : ");
- System.printString(path);
- System.printString("\n");
-*/
- if (isDocument(path)) {
- return;
- }
-
- Socket s = new Socket();
-
- if(s.connect(hostname, 80) == -1) {
- return;
- }
-
- requestQuery(hostname, path, s);
- readResponse(lq, s);
-
- if ((title = grabTitle(lq)) != null) {
- atomic {
- gTitle = global new GlobalString(title);
- }
- atomic {
- toprocess = processPage(lq);
- }
- }
- s.close();
- }
- }
-
-
- public static boolean isDocument(String str) {
- int index = str.lastindexOf('.');
-
- if (index != -1) {
- if ((str.subString(index+1)).equals("pdf")) return true;
- else if ((str.subString(index+1)).equals("ps")) return true;
- else if ((str.subString(index+1)).equals("ppt")) return true;
- else if ((str.subString(index+1)).equals("pptx")) return true;
- else if ((str.subString(index+1)).equals("jpg")) return true;
- else if ((str.subString(index+1)).equals("mp3")) return true;
- else if ((str.subString(index+1)).equals("wmv")) return true;
- else if ((str.subString(index+1)).equals("doc")) return true;
- else if ((str.subString(index+1)).equals("docx")) return true;
- else if ((str.subString(index+1)).equals("mov")) return true;
- else if ((str.subString(index+1)).equals("flv")) return true;
- else if ((str.subString(index+1)).equals("tar")) return true;
- else if ((str.subString(index+1)).equals("tgz")) return true;
- else return false;
- }
- return false;
- }
-
- public void done(Object obj) {
- if ((gTitle != null) && (gTitle.length() > 0)) {
- processedList();
- }
-
- int searchCnt = 0;
- while(!toprocess.isEmpty()) {
- GlobalQuery q = (GlobalQuery)toprocess.pop();
-
- GlobalString hostname = global new GlobalString(q.getHostName());
- GlobalString path = global new GlobalString(q.getPath());
-
- GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
- gsb.append("/");
- gsb.append(path);
-
- if (!visitedList.containsKey(gsb.toGlobalString()) && (searchCnt < maxSearchDepth)) {
- todoList.push(q);
-
- GlobalString str = global new GlobalString("1");
- visitedList.put(gsb.toGlobalString(), str);
- results_list.add(gsb.toGlobalString());
- searchCnt++;
- }
- }
- }
-
- public void output() {
- String str;
- Iterator iter = results_list.iterator();
-
- while (iter.hasNext() == true) {
- str = ((GlobalString)(iter.next())).toLocalString();
- System.printString(str + "\n");
- }
- }
-
- public static String grabTitle(LocalQuery lq) {
- String sBrace = new String("<");
- String strTitle = new String("title>");
- String searchstr = lq.response.toString();
- String title = null;
- char ch;
-
- int mindex = -1;
- int endquote = -1;
- int i, j;
- String tmp;
-
- for (i = 0; i < searchstr.length(); i++) {
- if (searchstr.charAt(i) == '<') {
- i++;
- if (searchstr.length() > (i+strTitle.length())) {
- tmp = searchstr.subString(i, i+strTitle.length());
- if (tmp.equalsIgnoreCase("title>")) {
- mindex = i + tmp.length();
- for (j = mindex; j < searchstr.length(); j++) {
- if (searchstr.charAt(j) == '<') {
- j++;
- tmp = searchstr.subString(j, j+strTitle.length()+1);
- if (tmp.equalsIgnoreCase("/title>")) {
- endquote = j - 1;
- break;
- }
- }
- }
- }
- }
- }
- }
-
- if (mindex != -1) {
- title = searchstr.subString(mindex, endquote);
- if (Character.isWhitespace(title.charAt(0))){
- mindex=0;
- while (Character.isWhitespace(title.charAt(mindex++)));
- mindex--;
- if (mindex >= title.length()) return null;
- title = new String(title.subString(mindex));
- }
-
- if (Character.isWhitespace(title.charAt(title.length()-1))) {
- endquote=title.length()-1;
- while (Character.isWhitespace(title.charAt(endquote--)));
- endquote += 2;
- if (mindex >= endquote) return null;
- title = new String(title.subString(0, endquote));
- }
-
- if (isErrorPage(title)) {
- return null;
- }
- }
-
- return title;
- }
-
- public static boolean isErrorPage(String str) {
- if (str.equals("301 Moved Permanently"))
- return true;
- else if (str.equals("302 Found"))
- return true;
- else if (str.equals("404 Not Found"))
- return true;
- else if (str.equals("403 Forbidden"))
- return true;
- else if (str.equals("404 File Not Found"))
- return true;
- else
- return false;
- }
-
- public static void requestQuery(String hostname, String path, Socket sock) {
- StringBuffer req = new StringBuffer("GET ");
- req.append("/");
- req.append(path);
- req.append(" HTTP/1.0\r\nHost: ");
- req.append(hostname);
- req.append("\r\n\r\n");
- sock.write(req.toString().getBytes());
- }
-
- public static void readResponse(LocalQuery lq, Socket sock) {
- // state 0 - nothing
- // state 1 - \r
- // state 2 - \r\n
- // state 3 - \r\n\r
- // state 4 - \r\n\r\n
- byte[] buffer = new byte[1024];
- int numchars;
-
- do {
- numchars = sock.read(buffer);
-
- String curr = (new String(buffer)).subString(0, numchars);
-
- lq.response.append(curr);
- buffer = new byte[1024];
- } while(numchars > 0);
- }
-
- public void processedList() {
- LinkedList ll;
- GlobalString token = null;
- int mindex = 0;
- int endquote = 0;
-
- while (endquote != -1) {
- endquote = gTitle.indexOf(' ', mindex);
-
- if (endquote != -1) {
- token = gTitle.subString(mindex, endquote);
- mindex = endquote + 1;
- if (filter(token)) {
- continue;
- }
- token = refine(token);
- }
- else {
- token = gTitle.subString(mindex);
- token = refine(token);
- }
-
- GlobalQueue q = (GlobalQueue)results.get(token);
- if (q == null) {
- q = global new GlobalQueue();
- }
- q.push(workingURL);
- results.put(token, q);
- }
- }
-
- public boolean filter(GlobalString str) {
- if (str.equals("of")) return true;
- else if (str.equals("for")) return true;
- else if (str.equals("a")) return true;
- else if (str.equals("an")) return true;
- else if (str.equals("the")) return true;
- else if (str.equals("at")) return true;
- else if (str.equals("and")) return true;
- else if (str.equals("or")) return true;
- else if (str.equals("but")) return true;
- else if (str.equals("to")) return true;
- else if (str.equals("The")) return true;
- else if (str.length() == 1) {
- if (str.charAt(0) == '.') return true;
- else if (str.charAt(0) == '.') return true;
- else if (str.charAt(0) == '-') return true;
- else if (str.charAt(0) == '=') return true;
- else if (str.charAt(0) == '_') return true;
- else if (str.charAt(0) == ':') return true;
- else if (str.charAt(0) == ';') return true;
- else if (str.charAt(0) == '\'') return true;
- else if (str.charAt(0) == '\"') return true;
- else if (str.charAt(0) == '|') return true;
- else if (str.charAt(0) == '@') return true;
- else if (str.charAt(0) == '&') return true;
- else if (str.charAt(0) == ' ') return true;
- }
- else return false;
- }
-
- public GlobalString refine(GlobalString str) {
- str = refinePrefix(str);
- str = refinePostfix(str);
- return str;
- }
-
- public GlobalString refinePrefix(GlobalString str) {
- if (str.charAt(0) == '&') { // &
- return str.subString(1);
- }
- else if (str.charAt(0) == '/') { // &
- return str.subString(1);
- }
- return str;
- }
-
- public GlobalString refinePostfix(GlobalString str) {
- if (str.charAt(str.length()-1) == ',') { // ,
- return str.subString(0, str.length()-1);
- }
- else if (str.charAt(str.length()-1) == ':') { // :
- return str.subString(0, str.length()-1);
- }
- else if (str.charAt(str.length()-1) == ';') { // ;
- return str.subString(0, str.length()-1);
- }
- else if (str.charAt(str.length()-1) == '!') { // !
- return str.subString(0, str.length()-1);
- }
- else if (str.charAt(str.length()-1) == 's') { // 's
- if (str.charAt(str.length()-2) == '\'')
- return str.subString(0, str.length()-2);
- }
- else if (str.charAt(str.length()-1) == '-') {
- int index = str.length()-2;
- while (Character.isWhitespace(str.charAt(index--)));
- return str.subString(0, index+2);
- }
- return str;
- }
-
- public static GlobalQueue processPage(LocalQuery lq) {
- int index = 0;
- String href = new String("href=\"");
- String searchstr = lq.response.toString();
- int depth;
- boolean cont = true;
- GlobalQueue toprocess;
-
- depth = lq.getDepth() + 1;
-
- toprocess = global new GlobalQueue();
- while(cont) {
- int mindex = searchstr.indexOf(href,index);
- if (mindex != -1) {
- int endquote = searchstr.indexOf('"', mindex+href.length());
- if (endquote != -1) {
- String match = searchstr.subString(mindex+href.length(), endquote);
- String match2 = lq.makewebcanonical(match);
-
- GlobalString ghostname;
- GlobalString gpath;
-
- ghostname = global new GlobalString(lq.getHostName(match));
- gpath = global new GlobalString(lq.getPathName(match));
-
- if (match2 != null) {
- GlobalQuery gq = global new GlobalQuery(ghostname, gpath, depth);
- toprocess.push(gq);
- }
- index = endquote;
- } else cont = false;
- } else cont = false;
- }
- return toprocess;
- }
-}
+++ /dev/null
-/*
-Usage :
- ./Spider.java master <num_thread> <first machine> <first page> <maxDepth>
-*/
-
-
-public class Spider {
- public static void main(String[] args) {
- int NUM_THREADS = 3;
- int maxDepth = 3;
- int maxSearchDepth = 10;
- int i, j;
- Work[] works;
- QueryTask[] qt;
- GlobalQuery[] currentWorkList;
- String fm = "www.uci.edu";
- String fp = "";
-
- if(args.length != 4) {
- System.out.println("./Spider.java master <num_thread> <first machine> <first page> <maxDepth>");
- System.exit(0);
- }
- else {
- NUM_THREADS = Integer.parseInt(args[0]);
- fm = args[1];
- fp = args[2];
- maxDepth = Integer.parseInt(args[3]);
- }
-
- GlobalString firstmachine;
- GlobalString firstpage;
-
- int mid[] = new int[8];
- mid[0] = (128<<24)|(195<<16)|(180<<8)|21;
- mid[1] = (128<<24)|(195<<16)|(180<<8)|26;
-/* mid[0] = (128<<24)|(195<<16)|(136<<8)|162;
- mid[1] = (128<<24)|(195<<16)|(136<<8)|163;
- mid[2] = (128<<24)|(195<<16)|(136<<8)|164;
- mid[3] = (128<<24)|(195<<16)|(136<<8)|165;
- mid[4] = (128<<24)|(195<<16)|(136<<8)|166;
- mid[5] = (128<<24)|(195<<16)|(136<<8)|167;
- mid[6] = (128<<24)|(195<<16)|(136<<8)|168;
- mid[7] = (128<<24)|(195<<16)|(136<<8)|169;
- */
- atomic {
- firstmachine = global new GlobalString(fm);
- if (args.length == 3) {
- firstpage = global new GlobalString(fp);
- }
- else
- firstpage = global new GlobalString("");;
-
- works = global new Work[NUM_THREADS];
- qt = global new QueryTask[NUM_THREADS];
- currentWorkList = global new GlobalQuery[NUM_THREADS];
-
- GlobalQuery firstquery = global new GlobalQuery(firstmachine, firstpage);
-
- GlobalQueue todoList = global new GlobalQueue();
- DistributedHashMap visitedList = global new DistributedHashMap(500, 500, 0.75f);
- DistributedHashMap results = global new DistributedHashMap(100, 100, 0.75f);
- DistributedLinkedList results_list = global new DistributedLinkedList();
-
- todoList.push(firstquery);
-
- for (i = 0; i < NUM_THREADS; i++) {
- qt[i] = global new QueryTask(todoList, visitedList, maxDepth, maxSearchDepth, results, results_list);
- works[i] = global new Work(qt[i], NUM_THREADS, i, currentWorkList);
- }
- }
- System.printString("Finished to create Objects\n");
-
- Work tmp;
- for (i = 0; i < NUM_THREADS; i++) {
- atomic {
- tmp = works[i];
- }
- Thread.myStart(tmp, mid[i]);
- }
-
- for (i = 0; i < NUM_THREADS; i++) {
- atomic {
- tmp = works[i];
- }
- tmp.join();
- }
- }
-}
+++ /dev/null
-128.195.180.21
-#128.195.180.24
-128.195.180.26
-#128.195.136.162
-#128.195.136.163
-#128.195.136.164
SRC1=${MAINCLASS}.java
SRC2=Local${SUBCLASS}.java
SRC3=${SUBCLASS}Task.java
-FLAGS= -32bit -nooptimize -mainclass ${MAINCLASS}
+FLAGS= -optimize -thread -mainclass ${MAINCLASS}
default:
../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC1}
+++ /dev/null
-MAINCLASS=Spider
-SUBCLASS=Query
-SRC1=${MAINCLASS}.java
-SRC2=Global${SUBCLASS}.java
-SRC3=${SUBCLASS}Task.java
-FLAGS= -recoverystats -recovery -dsmtask -dsm -dsmtask -32bit -nooptimize -debug -mainclass ${MAINCLASS}
-default:
- ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC1}
-
-clean:
- rm -rf tmpbuilddirectory
- rm *.bin
--- /dev/null
+public class GlobalQuery {
+ GlobalString hostname;
+ GlobalString path;
+ int depth;
+
+ public GlobalQuery(GlobalString hostname, GlobalString path) {
+ this.hostname = hostname;
+ this.path = path;
+ this.depth = 0;
+ }
+
+ public GlobalQuery(GlobalString hostname, GlobalString path, int depth) {
+ this.hostname = global new GlobalString(hostname);
+ this.path = global new GlobalString(path);
+ this.depth = depth;
+ }
+
+ public int getDepth() {
+ return depth;
+ }
+
+ public GlobalString getHostName() {
+ return hostname;
+ }
+
+ public GlobalString getPath() {
+ return path;
+ }
+
+ public GlobalString makewebcanonical(GlobalString page) {
+ GlobalStringBuffer b = global new GlobalStringBuffer(getHostName(page));
+ b.append("/");
+ b.append(getPathName(page));
+ return b.toGlobalString();
+ }
+
+ public GlobalString getHostName(GlobalString page) {
+ GlobalString http = global new GlobalString("http://");
+ GlobalString https = global new GlobalString("https://");
+ int beginindex;
+ int endindex;
+
+ if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
+ return getHostName();
+ }
+ else if (page.indexOf(https) != -1) {
+ beginindex = page.indexOf(https) + https.length();
+ }
+ else {
+ beginindex = page.indexOf(http) + http.length();
+ }
+ endindex = page.indexOf('/',beginindex+1);
+
+ if ((beginindex == -1)) {
+ System.printString("ERROR");
+ }
+ if (endindex == -1)
+ endindex = page.length();
+
+ return page.subString(beginindex, endindex);
+ }
+
+
+ public GlobalString getPathName(GlobalString page) {
+ GlobalString http = global new GlobalString("http://");
+ GlobalString https = global new GlobalString("https://");
+ int beginindex;
+ int nextindex;
+
+ if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
+ GlobalString path = getPath();
+ int lastindex = path.lastindexOf('/');
+ if (lastindex == -1)
+ return page;
+
+ GlobalStringBuffer sb = global new GlobalStringBuffer(path.subString(0,lastindex+1));
+ sb.append(page);
+ return sb.toGlobalString();
+ }
+ else if (page.indexOf(https) != -1) {
+ beginindex = page.indexOf(https) + https.length();
+ }
+ else {
+ beginindex = page.indexOf(http) + http.length();
+ }
+ nextindex = page.indexOf('/',beginindex+1);
+
+ if ((beginindex == -1) || (nextindex == -1))
+ return global new GlobalString("index.html");
+ return page.subString(nextindex+1, page.length());
+ }
+}
--- /dev/null
+public class LocalQuery {
+ String hostname;
+ String path;
+ StringBuffer response;
+ int depth;
+
+ public LocalQuery(String hostname, String path, int depth) {
+ this.hostname = new String(hostname);
+ this.path = new String(path);
+ response = new StringBuffer();
+ this.depth = depth;
+ }
+
+ public int getDepth() {
+ return depth;
+ }
+
+ public String getHostName() {
+ return hostname;
+ }
+
+ public String getPath() {
+ return path;
+ }
+
+ public void outputFile() {
+ StringBuffer sb = new StringBuffer(hostname);
+ sb.append(path);
+ FileOutputStream fos = new FileOutputStream(sb.toString().replace('/','#'));
+ fos.write(response.toString().getBytes());
+ fos.close();
+ }
+
+ public String makewebcanonical(String page) {
+ StringBuffer b = new StringBuffer(getHostName(page));
+ b.append("/");
+ b.append(getPathName(page));
+ return b.toString();
+ }
+
+ public String getHostName(String page) {
+ String http = new String("http://");
+ String https = new String("https://");
+ int beginindex;
+ int endindex;
+
+ if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
+ return getHostName();
+ }
+ else if (page.indexOf(https) != -1) {
+ beginindex = page.indexOf(https) + https.length();
+ }
+ else {
+ beginindex = page.indexOf(http) + http.length();
+ }
+ endindex = page.indexOf('/',beginindex+1);
+
+ if ((beginindex == -1)) {
+ System.printString("ERROR");
+ }
+ if (endindex == -1)
+ endindex = page.length();
+
+ return page.subString(beginindex, endindex);
+ }
+
+ public String getPathName(String page) {
+ String http = new String("http://");
+ String https = new String("https://");
+ int beginindex;
+ int nextindex;
+
+ if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
+ String path = getPath();
+ int lastindex = path.lastindexOf('/');
+ if (lastindex == -1)
+ return page;
+
+ StringBuffer sb = new StringBuffer(path.subString(0,lastindex+1));
+ sb.append(page);
+ return sb.toString();
+ }
+ else if (page.indexOf(https) != -1) {
+ beginindex = page.indexOf(https) + https.length();
+ }
+ else {
+ beginindex = page.indexOf(http) + http.length();
+ }
+ nextindex = page.indexOf('/',beginindex+1);
+
+ if ((beginindex==-1) || (nextindex==-1))
+ return new String("index.html");
+ return page.subString(nextindex+1, page.length());
+ }
+}
--- /dev/null
+public class QueryTask extends Task {
+ int maxDepth;
+ int maxSearchDepth;
+ GlobalQueue toprocess;
+ DistributedHashMap results;
+ DistributedLinkedList results_list;
+ DistributedHashMap visitedList;
+ GlobalString gTitle;
+ GlobalString workingURL;
+
+ public QueryTask(GlobalQueue todoList, DistributedHashMap visitedList, int maxDepth, int maxSearchDepth, DistributedHashMap results, DistributedLinkedList results_list) {
+ this.todoList = todoList;
+ this.visitedList = visitedList;
+ this.maxDepth = maxDepth;
+ this.maxSearchDepth = maxSearchDepth;
+ this.results = results;
+ this.results_list = results_list;
+ toprocess = global new GlobalQueue();
+ }
+
+ public void execute() {
+ int depth;
+ int max;
+ int maxSearch;
+
+ atomic {
+ depth = ((GlobalQuery)myWork).getDepth();
+ max = this.maxDepth;
+ maxSearch = this.maxSearchDepth;
+ }
+
+ if (depth < max) {
+ /* global variables */
+ GlobalQuery gq;
+
+ /* local variables */
+ LocalQuery lq;
+ String hostname;
+ String path;
+ String title;
+
+ atomic {
+ gq = (GlobalQuery)myWork;
+ hostname = new String(GlobalString.toLocalCharArray(gq.getHostName()));
+ path = new String(GlobalString.toLocalCharArray(gq.getPath()));
+
+ GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
+ gsb.append("/");
+ gsb.append(path);
+ workingURL = global new GlobalString(gsb.toGlobalString());
+ gTitle = null;
+ }
+ lq = new LocalQuery(hostname, path, depth);
+
+/* System.printString("["+lq.getDepth()+"] ");
+ System.printString("Processing - Hostname : ");
+ System.printString(hostname);
+ System.printString(", Path : ");
+ System.printString(path);
+ System.printString("\n");
+*/
+ if (isDocument(path)) {
+ return;
+ }
+
+ Socket s = new Socket();
+
+ if(s.connect(hostname, 80) == -1) {
+ return;
+ }
+
+ requestQuery(hostname, path, s);
+ readResponse(lq, s);
+
+ if ((title = grabTitle(lq)) != null) {
+ atomic {
+ gTitle = global new GlobalString(title);
+ }
+ atomic {
+ toprocess = processPage(lq);
+ }
+ }
+ s.close();
+ }
+ }
+
+
+ public static boolean isDocument(String str) {
+ int index = str.lastindexOf('.');
+
+ if (index != -1) {
+ if ((str.subString(index+1)).equals("pdf")) return true;
+ else if ((str.subString(index+1)).equals("ps")) return true;
+ else if ((str.subString(index+1)).equals("ppt")) return true;
+ else if ((str.subString(index+1)).equals("pptx")) return true;
+ else if ((str.subString(index+1)).equals("jpg")) return true;
+ else if ((str.subString(index+1)).equals("mp3")) return true;
+ else if ((str.subString(index+1)).equals("wmv")) return true;
+ else if ((str.subString(index+1)).equals("doc")) return true;
+ else if ((str.subString(index+1)).equals("docx")) return true;
+ else if ((str.subString(index+1)).equals("mov")) return true;
+ else if ((str.subString(index+1)).equals("flv")) return true;
+ else if ((str.subString(index+1)).equals("tar")) return true;
+ else if ((str.subString(index+1)).equals("tgz")) return true;
+ else return false;
+ }
+ return false;
+ }
+
+ public void done(Object obj) {
+ if ((gTitle != null) && (gTitle.length() > 0)) {
+ processedList();
+ }
+
+ int searchCnt = 0;
+ while(!toprocess.isEmpty()) {
+ GlobalQuery q = (GlobalQuery)toprocess.pop();
+
+ GlobalString hostname = global new GlobalString(q.getHostName());
+ GlobalString path = global new GlobalString(q.getPath());
+
+ GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
+ gsb.append("/");
+ gsb.append(path);
+
+ if (!visitedList.containsKey(gsb.toGlobalString()) && (searchCnt < maxSearchDepth)) {
+ todoList.push(q);
+
+ GlobalString str = global new GlobalString("1");
+ visitedList.put(gsb.toGlobalString(), str);
+ results_list.add(gsb.toGlobalString());
+ searchCnt++;
+ }
+ }
+ }
+
+ public void output() {
+ String str;
+ Iterator iter = results_list.iterator();
+
+ while (iter.hasNext() == true) {
+ str = ((GlobalString)(iter.next())).toLocalString();
+ System.printString(str + "\n");
+ }
+ }
+
+ public static String grabTitle(LocalQuery lq) {
+ String sBrace = new String("<");
+ String strTitle = new String("title>");
+ String searchstr = lq.response.toString();
+ String title = null;
+ char ch;
+
+ int mindex = -1;
+ int endquote = -1;
+ int i, j;
+ String tmp;
+
+ for (i = 0; i < searchstr.length(); i++) {
+ if (searchstr.charAt(i) == '<') {
+ i++;
+ if (searchstr.length() > (i+strTitle.length())) {
+ tmp = searchstr.subString(i, i+strTitle.length());
+ if (tmp.equalsIgnoreCase("title>")) {
+ mindex = i + tmp.length();
+ for (j = mindex; j < searchstr.length(); j++) {
+ if (searchstr.charAt(j) == '<') {
+ j++;
+ tmp = searchstr.subString(j, j+strTitle.length()+1);
+ if (tmp.equalsIgnoreCase("/title>")) {
+ endquote = j - 1;
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (mindex != -1) {
+ title = searchstr.subString(mindex, endquote);
+ if (Character.isWhitespace(title.charAt(0))){
+ mindex=0;
+ while (Character.isWhitespace(title.charAt(mindex++)));
+ mindex--;
+ if (mindex >= title.length()) return null;
+ title = new String(title.subString(mindex));
+ }
+
+ if (Character.isWhitespace(title.charAt(title.length()-1))) {
+ endquote=title.length()-1;
+ while (Character.isWhitespace(title.charAt(endquote--)));
+ endquote += 2;
+ if (mindex >= endquote) return null;
+ title = new String(title.subString(0, endquote));
+ }
+
+ if (isErrorPage(title)) {
+ return null;
+ }
+ }
+
+ return title;
+ }
+
+ public static boolean isErrorPage(String str) {
+ if (str.equals("301 Moved Permanently"))
+ return true;
+ else if (str.equals("302 Found"))
+ return true;
+ else if (str.equals("404 Not Found"))
+ return true;
+ else if (str.equals("403 Forbidden"))
+ return true;
+ else if (str.equals("404 File Not Found"))
+ return true;
+ else
+ return false;
+ }
+
+ public static void requestQuery(String hostname, String path, Socket sock) {
+ StringBuffer req = new StringBuffer("GET ");
+ req.append("/");
+ req.append(path);
+ req.append(" HTTP/1.0\r\nHost: ");
+ req.append(hostname);
+ req.append("\r\n\r\n");
+ sock.write(req.toString().getBytes());
+ }
+
+ public static void readResponse(LocalQuery lq, Socket sock) {
+ // state 0 - nothing
+ // state 1 - \r
+ // state 2 - \r\n
+ // state 3 - \r\n\r
+ // state 4 - \r\n\r\n
+ byte[] buffer = new byte[1024];
+ int numchars;
+
+ do {
+ numchars = sock.read(buffer);
+
+ String curr = (new String(buffer)).subString(0, numchars);
+
+ lq.response.append(curr);
+ buffer = new byte[1024];
+ } while(numchars > 0);
+ }
+
+ public void processedList() {
+ LinkedList ll;
+ GlobalString token = null;
+ int mindex = 0;
+ int endquote = 0;
+
+ while (endquote != -1) {
+ endquote = gTitle.indexOf(' ', mindex);
+
+ if (endquote != -1) {
+ token = gTitle.subString(mindex, endquote);
+ mindex = endquote + 1;
+ if (filter(token)) {
+ continue;
+ }
+ token = refine(token);
+ }
+ else {
+ token = gTitle.subString(mindex);
+ token = refine(token);
+ }
+
+ GlobalQueue q = (GlobalQueue)results.get(token);
+ if (q == null) {
+ q = global new GlobalQueue();
+ }
+ q.push(workingURL);
+ results.put(token, q);
+ }
+ }
+
+ public boolean filter(GlobalString str) {
+ if (str.equals("of")) return true;
+ else if (str.equals("for")) return true;
+ else if (str.equals("a")) return true;
+ else if (str.equals("an")) return true;
+ else if (str.equals("the")) return true;
+ else if (str.equals("at")) return true;
+ else if (str.equals("and")) return true;
+ else if (str.equals("or")) return true;
+ else if (str.equals("but")) return true;
+ else if (str.equals("to")) return true;
+ else if (str.equals("The")) return true;
+ else if (str.length() == 1) {
+ if (str.charAt(0) == '.') return true;
+ else if (str.charAt(0) == '.') return true;
+ else if (str.charAt(0) == '-') return true;
+ else if (str.charAt(0) == '=') return true;
+ else if (str.charAt(0) == '_') return true;
+ else if (str.charAt(0) == ':') return true;
+ else if (str.charAt(0) == ';') return true;
+ else if (str.charAt(0) == '\'') return true;
+ else if (str.charAt(0) == '\"') return true;
+ else if (str.charAt(0) == '|') return true;
+ else if (str.charAt(0) == '@') return true;
+ else if (str.charAt(0) == '&') return true;
+ else if (str.charAt(0) == ' ') return true;
+ }
+ else return false;
+ }
+
+ public GlobalString refine(GlobalString str) {
+ str = refinePrefix(str);
+ str = refinePostfix(str);
+ return str;
+ }
+
+ public GlobalString refinePrefix(GlobalString str) {
+ if (str.charAt(0) == '&') { // &
+ return str.subString(1);
+ }
+ else if (str.charAt(0) == '/') { // &
+ return str.subString(1);
+ }
+ return str;
+ }
+
+ public GlobalString refinePostfix(GlobalString str) {
+ if (str.charAt(str.length()-1) == ',') { // ,
+ return str.subString(0, str.length()-1);
+ }
+ else if (str.charAt(str.length()-1) == ':') { // :
+ return str.subString(0, str.length()-1);
+ }
+ else if (str.charAt(str.length()-1) == ';') { // ;
+ return str.subString(0, str.length()-1);
+ }
+ else if (str.charAt(str.length()-1) == '!') { // !
+ return str.subString(0, str.length()-1);
+ }
+ else if (str.charAt(str.length()-1) == 's') { // 's
+ if (str.charAt(str.length()-2) == '\'')
+ return str.subString(0, str.length()-2);
+ }
+ else if (str.charAt(str.length()-1) == '-') {
+ int index = str.length()-2;
+ while (Character.isWhitespace(str.charAt(index--)));
+ return str.subString(0, index+2);
+ }
+ return str;
+ }
+
+ public static GlobalQueue processPage(LocalQuery lq) {
+ int index = 0;
+ String href = new String("href=\"");
+ String searchstr = lq.response.toString();
+ int depth;
+ boolean cont = true;
+ GlobalQueue toprocess;
+
+ depth = lq.getDepth() + 1;
+
+ toprocess = global new GlobalQueue();
+ while(cont) {
+ int mindex = searchstr.indexOf(href,index);
+ if (mindex != -1) {
+ int endquote = searchstr.indexOf('"', mindex+href.length());
+ if (endquote != -1) {
+ String match = searchstr.subString(mindex+href.length(), endquote);
+ String match2 = lq.makewebcanonical(match);
+
+ GlobalString ghostname;
+ GlobalString gpath;
+
+ ghostname = global new GlobalString(lq.getHostName(match));
+ gpath = global new GlobalString(lq.getPathName(match));
+
+ if (match2 != null) {
+ GlobalQuery gq = global new GlobalQuery(ghostname, gpath, depth);
+ toprocess.push(gq);
+ }
+ index = endquote;
+ } else cont = false;
+ } else cont = false;
+ }
+ return toprocess;
+ }
+}
--- /dev/null
+/*
+Usage :
+ ./Spider.java master <num_thread> <first machine> <first page> <maxDepth>
+*/
+
+
+public class Spider {
+ public static void main(String[] args) {
+ int NUM_THREADS = 3;
+ int maxDepth = 3;
+ int maxSearchDepth = 10;
+ int i, j;
+ Work[] works;
+ QueryTask[] qt;
+ GlobalQuery[] currentWorkList;
+ String fm = "www.uci.edu";
+ String fp = "";
+
+ if(args.length != 4) {
+ System.out.println("./Spider.java master <num_thread> <first machine> <first page> <maxDepth>");
+ System.exit(0);
+ }
+ else {
+ NUM_THREADS = Integer.parseInt(args[0]);
+ fm = args[1];
+ fp = args[2];
+ maxDepth = Integer.parseInt(args[3]);
+ }
+
+ GlobalString firstmachine;
+ GlobalString firstpage;
+
+ int mid[] = new int[8];
+ mid[0] = (128<<24)|(195<<16)|(180<<8)|21;
+ mid[1] = (128<<24)|(195<<16)|(180<<8)|26;
+ mid[2] = (128<<24)|(195<<16)|(180<<8)|24;
+/*
+ mid[0] = (128<<24)|(195<<16)|(136<<8)|162;
+ mid[1] = (128<<24)|(195<<16)|(136<<8)|163;
+ mid[2] = (128<<24)|(195<<16)|(136<<8)|164;
+ mid[3] = (128<<24)|(195<<16)|(136<<8)|165;
+ mid[4] = (128<<24)|(195<<16)|(136<<8)|166;
+ mid[5] = (128<<24)|(195<<16)|(136<<8)|167;
+ mid[6] = (128<<24)|(195<<16)|(136<<8)|168;
+ mid[7] = (128<<24)|(195<<16)|(136<<8)|169;
+ */
+ atomic {
+ firstmachine = global new GlobalString(fm);
+ if (args.length == 3) {
+ firstpage = global new GlobalString(fp);
+ }
+ else
+ firstpage = global new GlobalString("");;
+
+ works = global new Work[NUM_THREADS];
+ qt = global new QueryTask[NUM_THREADS];
+ currentWorkList = global new GlobalQuery[NUM_THREADS];
+
+ GlobalQuery firstquery = global new GlobalQuery(firstmachine, firstpage);
+
+ GlobalQueue todoList = global new GlobalQueue();
+ DistributedHashMap visitedList = global new DistributedHashMap(500, 500, 0.75f);
+ DistributedHashMap results = global new DistributedHashMap(100, 100, 0.75f);
+ DistributedLinkedList results_list = global new DistributedLinkedList();
+
+ todoList.push(firstquery);
+
+ for (i = 0; i < NUM_THREADS; i++) {
+ qt[i] = global new QueryTask(todoList, visitedList, maxDepth, maxSearchDepth, results, results_list);
+ works[i] = global new Work(qt[i], NUM_THREADS, i, currentWorkList);
+ }
+ }
+ System.printString("Finished to create Objects\n");
+
+ Work tmp;
+ for (i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = works[i];
+ }
+ Thread.myStart(tmp, mid[i]);
+ }
+
+ for (i = 0; i < NUM_THREADS; i++) {
+ atomic {
+ tmp = works[i];
+ }
+ tmp.join();
+ }
+ }
+}
--- /dev/null
+128.195.180.21
+128.195.180.26
+#128.195.180.24
+#128.195.136.162
+#128.195.136.163
+#128.195.136.164
--- /dev/null
+MAINCLASS=Spider
+SUBCLASS=Query
+SRC1=${MAINCLASS}.java
+SRC2=Global${SUBCLASS}.java
+SRC3=${SUBCLASS}Task.java
+FLAGS= -recoverystats -recovery -dsm -dsmtask -32bit -nooptimize -debug -mainclass ${MAINCLASS}
+default:
+ ../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC1}
+
+clean:
+ rm -rf tmpbuilddirectory
+ rm *.bin
-FileSystem:data
+Spider:"www.uci.edu" "" 5
# !/bin/sh
BASEDIR=`pwd`
+RECOVERYDIR='recovery'
+JAVASINGLEDIR='java'
ITERATIONS=10
WAITTIME=300
let "k= $NUM_MACHINE"
echo ${BASEDIR}/${BM_DIR} > ~/.tmpdir
- DIR=`echo ${BASEDIR}\/${BM_DIR}`;
+ DIR=`echo ${BASEDIR}\/${BM_DIR}\/${RECOVERYDIR}`;
echo "DIR = $DIR";
-
+
# Run machines
while [ $k -gt 1 ]; do
echo "SSH into dc-${k}"