From 55aff1a2c50c1dc95240444107a591f414a8b576 Mon Sep 17 00:00:00 2001 From: adash Date: Thu, 4 Mar 2010 00:17:45 +0000 Subject: [PATCH] changes to benchmarks javasingle version of spamfilter --- .../Recovery/FileSystem/recovery/dstm.conf | 3 - .../Recovery/FileSystem/recovery/makefile | 2 +- .../Recovery/Game/recovery/RainForest.java | 4 +- .../Recovery/Game/recovery/makefile | 4 +- .../SpamFilter/java/BufferedReader.java | 86 ++++ .../SpamFilter/java/DistributedHashMap.java | 157 +++++++ .../SpamFilter/java/EphemeralSignature.java | 47 ++ .../SpamFilter/java/FilterResult.java | 80 ++++ .../SpamFilter/java/FilterStatistic.java | 66 +++ .../Recovery/SpamFilter/java/GString.java | 151 +++++++ .../Recovery/SpamFilter/java/HashEntry.java | 68 +++ .../Recovery/SpamFilter/java/HashStat.java | 70 +++ .../Recovery/SpamFilter/java/MD5.java | 424 ++++++++++++++++++ .../Recovery/SpamFilter/java/Mail.java | 422 +++++++++++++++++ .../SpamFilter/java/SignatureComputer.java | 289 ++++++++++++ .../Recovery/SpamFilter/java/SpamFilter.java | 374 +++++++++++++++ .../SpamFilter/java/WhiplashSignature.java | 258 +++++++++++ .../Recovery/SpamFilter/java/makefile | 21 + .../Recovery/SpamFilter/recovery/GString.java | 18 - .../SpamFilter/recovery/SpamFilter.java | 12 +- .../Recovery/SpamFilter/recovery/dstm.conf | 13 +- .../Recovery/SpamFilter/recovery/makefile | 2 +- .../Recovery/Spider/recovery/makefile | 2 +- 23 files changed, 2527 insertions(+), 46 deletions(-) create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/BufferedReader.java create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/DistributedHashMap.java create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/EphemeralSignature.java create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterResult.java create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterStatistic.java create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/GString.java create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/HashEntry.java create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/HashStat.java create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/MD5.java create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/Mail.java create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/SignatureComputer.java create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/SpamFilter.java create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/WhiplashSignature.java create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/makefile diff --git a/Robust/src/Benchmarks/Recovery/FileSystem/recovery/dstm.conf b/Robust/src/Benchmarks/Recovery/FileSystem/recovery/dstm.conf index 74a1e24c..e69de29b 100644 --- a/Robust/src/Benchmarks/Recovery/FileSystem/recovery/dstm.conf +++ b/Robust/src/Benchmarks/Recovery/FileSystem/recovery/dstm.conf @@ -1,3 +0,0 @@ -128.195.180.21 -#128.195.180.24 -128.195.180.26 diff --git a/Robust/src/Benchmarks/Recovery/FileSystem/recovery/makefile b/Robust/src/Benchmarks/Recovery/FileSystem/recovery/makefile index cf672af4..0379d038 100644 --- a/Robust/src/Benchmarks/Recovery/FileSystem/recovery/makefile +++ b/Robust/src/Benchmarks/Recovery/FileSystem/recovery/makefile @@ -1,6 +1,6 @@ MAINCLASS=FileSystem SRC1=${MAINCLASS}.java -FLAGS= -recoverystats -dsm -recovery -nooptimize -mainclass ${MAINCLASS} +FLAGS= -recovery -recoverystats -dsm -dsmcaching -optimize -mainclass ${MAINCLASS} default: ../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC1} diff --git a/Robust/src/Benchmarks/Recovery/Game/recovery/RainForest.java b/Robust/src/Benchmarks/Recovery/Game/recovery/RainForest.java index 9e3fedcc..fb8b7ce6 100644 --- a/Robust/src/Benchmarks/Recovery/Game/recovery/RainForest.java +++ b/Robust/src/Benchmarks/Recovery/Game/recovery/RainForest.java @@ -1,6 +1,6 @@ #define ROW 400 /* columns in the map */ #define COLUMN 100 /* rows of in the map */ -#define ROUNDS 1000 /* Number of moves by each player */ +#define ROUNDS 2000 /* Number of moves by each player */ #define PLAYERS 20 /* Number of Players when num Players != num of client machines */ #define RATI0 0.5 /* Number of lumberjacks to number of planters */ #define BLOCK 3 /* Area around the gamer to consider */ @@ -103,7 +103,7 @@ public class RainForest extends Thread { RecoveryStat.printRecoveryStat(); while(true) { - sleep(1000000); + sleep(300000000); } } diff --git a/Robust/src/Benchmarks/Recovery/Game/recovery/makefile b/Robust/src/Benchmarks/Recovery/Game/recovery/makefile index b4b5a69a..7958db59 100644 --- a/Robust/src/Benchmarks/Recovery/Game/recovery/makefile +++ b/Robust/src/Benchmarks/Recovery/Game/recovery/makefile @@ -11,8 +11,8 @@ SRC=tmp${MAINCLASS}.java \ AStarPathFinder.java \ ../../../../ClassLibrary/JavaDSM/Thread.java -FLAGS1=-dsm -recoverystats -recovery -optimize -mainclass ${MAINCLASS} -DSMFLAGS=-dsm -optimize -mainclass ${MAINCLASS} +FLAGS1=-dsm -dsmcaching -recoverystats -recovery -optimize -mainclass ${MAINCLASS} +DSMFLAGS=-dsm -dsmcaching -optimize -mainclass ${MAINCLASS} default: cpp ${MAINCLASS}.java > tmp1${MAINCLASS}.java diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/BufferedReader.java b/Robust/src/Benchmarks/Recovery/SpamFilter/java/BufferedReader.java new file mode 100644 index 00000000..d4c96fdd --- /dev/null +++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/BufferedReader.java @@ -0,0 +1,86 @@ +public class BufferedReader { + FileInputStream fr; + byte[] buffer; + int offset; + int end; + + public BufferedReader(FileInputStream fr) { + this.fr=fr; + this.buffer=new byte[2048]; + } + + public int read() { + if (offset=arraylen) + return off; + array[off++]=buffer[offset]; + } + readBuffer(); + if (end==0) + return off; + if (end<0) + return end; + } while(true); + } + + public void readBuffer() { + offset=0; + end=fr.read(buffer); + } + + public String readLine() { + String str=null; + do { + boolean foundcr=false; + int index=offset; + for(;index> 27)) ^ str.charAt(i); + } + + return hash; + } + */ + +} diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterResult.java b/Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterResult.java new file mode 100644 index 00000000..196006be --- /dev/null +++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterResult.java @@ -0,0 +1,80 @@ +/** + * A FilterResult encapsulates the result of a filter made by checking a mail. + **/ +public class FilterResult { + /** + * This value is used if type is ERROR or UNKNOWN. + */ + public double NO_RESULT; + + /** + * A result value greater or equal this value indicates that the filter has + * decided on spam. + */ + public int SPAM_THRESHOLD; + public int ABSOLUTE_SPAM; + public int ABSOLUTE_HAM; + + //public double result; // the result, a value between -1 (ham) and 1000 (spam), + // negative values for "error", "unknown" etc. + + // ----------------------------------------------------------------------------- + + public FilterResult(double result) { + SPAM_THRESHOLD=50; + ABSOLUTE_SPAM=100; + ABSOLUTE_HAM=0; + NO_RESULT=-1; + //this.result = result; + } + + public FilterResult() { + SPAM_THRESHOLD=50; + ABSOLUTE_SPAM=100; + ABSOLUTE_HAM=0; + NO_RESULT=-1; + } + + public boolean getResult(int[] confidenceVals) { + int[] res = new int[3]; //3 equals spam, ham and unknown + for(int i=0; i= 0 && confidenceVals[i] < SPAM_THRESHOLD) + res[1]+=1; //ham + if(confidenceVals[i] >= SPAM_THRESHOLD) + res[2]+=1;//spam + } + int maxVotes=0; + int max; + for(int i=0; i<3;i++) { + if(res[i] > maxVotes) { + maxVotes = res[i]; + max = i; + } + } + if(max==0) + return false; + if(max==1) + return false; + if(max==2) + return true; + + System.out.println("Err: getResult() Control shouldn't come here, max= " + max); + return false; + } + + /* + public void addProperty(String key, String value) { + properties.put(key,value); + } + + public String getProperty(String key) { + return properties.get(key); + } + + public HashMap getProperties() { + return properties; + } + */ +} diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterStatistic.java b/Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterStatistic.java new file mode 100644 index 00000000..2e326a5d --- /dev/null +++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterStatistic.java @@ -0,0 +1,66 @@ +public class FilterStatistic { + int unknown; + int spam; + int ham; + + // ------------------------------------------------------- + + public FilterStatistic() { + this.spam = 0; + this.ham = 0; + this.unknown = 0; + } + + public FilterStatistic(int spam, int ham, int unknown) { + this.spam = spam; + this.ham = ham; + this.unknown = unknown; + } + + public int getChecked() { + return getSpam() + getHam() + getUnknown(); + } + + public int getHam() { + return ham; + } + + public int getSpam() { + return spam; + } + + public void setHam(int i) { + ham = i; + } + + public void setSpam(int i) { + spam = i; + } + + public int getUnknown() { + return unknown; + } + + public void setUnknown(int u) { + unknown = u; + } + + public void increaseSpam() { + setSpam(getSpam() + 1); + } + + public void increaseHam() { + setHam(getHam() + 1); + } + + public void increaseUnknown() { + setUnknown(getUnknown() + 1); + } + + public String toString() { + String str = "Filterstats_spam_"+spam; + str += "_ham_" +ham; + str += "_unknown_"+unknown; + return str; + } +} diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/GString.java b/Robust/src/Benchmarks/Recovery/SpamFilter/java/GString.java new file mode 100644 index 00000000..8d8fb8cd --- /dev/null +++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/GString.java @@ -0,0 +1,151 @@ +public class GString { + public char value[]; + public int count; + public int offset; + + public GString() { + } + + public GString(char c) { + char[] str = new char[1]; + str[0] = c; + GString(str); + } + + public GString(String str) { + value = new char[str.count]; + for(int i =0; i< str.count;i++) { + value[i] = str.value[i+str.offset]; + } + count = str.count; + offset = 0; + } + + public GString(GString gstr) { + this.value = gstr.value; + this.count = gstr.count; + this.offset = gstr.offset; + } + + /* + public GString(StringBuffer gsb) { + value = new char[gsb.length()]; + count = gsb.length(); + offset = 0; + for (int i = 0; i < count; i++) + value[i] = gsb.value[i]; + } + */ + + public GString(char str[]) { + char charstr[]=new char[str.length]; + for(int i=0; i 0; i--) + if (this.charAt(i) == ch) + return i; + return -1; + } + + public char charAt(int i) { + return value[i+offset]; + } + + public int indexOf(String str) { + return this.indexOf(str, 0); + } + + public int indexOf(String str, int fromIndex) { + if (fromIndex < 0) + fromIndex = 0; + for (int i = fromIndex; i <= (count-str.count); i++) + if (regionMatches(i, str, 0, str.count)) + return i; + return -1; + } + + public boolean regionMatches(int toffset, String other, int ooffset, int len) { + if (toffset < 0 || ooffset < 0 || (toffset+len) > count || (ooffset+len) > other.count) + return false; + + for (int i = 0; i < len; i++) { + if (other.value[i+other.offset+ooffset] != this.value[i+this.offset+toffset]) + return false; + } + return true; + } + + public String subString(int beginIndex, int endIndex) { + return substring(beginIndex, endIndex); + } + + public String substring(int beginIndex, int endIndex) { + String str; + str = new String(); + str.value = this.value; + str.count = endIndex-beginIndex; + str.offset = this.offset + beginIndex; + return str; + } + + public static String valueOf(Object o) { + if (o==null) + return "null"; + else + return o.toString(); + } + + public String toLocalString() { + return new String(toLocalCharArray(this)); + } + + public static char[] toLocalCharArray(GString str) { + char[] c; + int length; + length = str.length(); + c = new char[length]; + for (int i = 0; i < length; i++) { + c[i] = str.value[i+str.offset]; + } + return c; + } + + public int hashCode() { + String s = this.toLocalString(); + return s.hashCode(); + } + + public boolean equals(Object o) { + if(o == null) + return false; + if(!(o instanceof GString)) + return false; + GString gs = (GString)o; + String s1 = gs.toLocalString(); + String s2 = this.toLocalString(); + if(s2.equals(s1)) + return true; + return false; + } +} diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/HashEntry.java b/Robust/src/Benchmarks/Recovery/SpamFilter/java/HashEntry.java new file mode 100644 index 00000000..f76828ca --- /dev/null +++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/HashEntry.java @@ -0,0 +1,68 @@ +public class HashEntry { + public GString engine; + public GString signature; + public HashStat stats; + + public HashEntry() { + + } + + /** + * hashCode that combines two strings using xor. + * @return a hash code value on the entire object. + */ + public int hashCode() { + int result=0; + // this will not work well if some of the strings are equal. + result = engine.hashCode(); + result ^= signature.hashCode(); + //result ^= stats.hashCode(); + //System.out.println("HashEntry: hashCode= " + result); + return result; + } + + public void setengine(GString engine) { + this.engine=engine; + } + + public void setstats(HashStat stats) { + this.stats=stats; + } + + public void setsig(GString signature) { + this.signature=signature; + } + + public GString getEngine() { + return engine; + } + + public GString getSignature() { + return signature; + } + + public HashStat getStats() { + return stats; + } + + public boolean equals(Object o) { + HashEntry he = (HashEntry)o; + if(!(he.getEngine().equals(engine))) + return false; + if(!(he.getSignature().equals(signature))) + return false; + //if(!(he.getStats().equals(stats))) + // return false; + return true; + } + + public int askForSpam() { + int[] users = stats.getUsers(); + int spamConfidence=0; + for(int i=0; i +// from some public domain C code (md5.c) included with the ssh-1.2.22 source. +// Tue Jan 19 15:55:50 EST 1999 +// $Id: MD5.java,v 1.1 2010/03/04 00:17:44 adash Exp $ +// +// To compute the message digest of a chunk of bytes, create an +// MD5 object 'md5', call md5.update() as needed on buffers full +// of bytes, and then call md5.md5final(), which +// will fill a supplied 16-byte array with the digest. +// +// A main() method is included that hashes the data on System.in. +// +// It seems to run around 25-30 times slower (JDK1.1.6) than optimized C +// (gcc -O4, version 2.7.2.3). Measured on a Sun Ultra 5 (SPARC 270MHz). +// +// Comments from md5.c from ssh-1.2.22, the basis for this code: +// +/* This code has been heavily hacked by Tatu Ylonen to + make it compile on machines like Cray that don't have a 32 bit integer + type. */ +/* + * This code implements the MD5 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. + * + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. + * + * To compute the message digest of a chunk of bytes, declare an + * MD5Context structure, pass it to MD5Init, call MD5Update as + * needed on buffers full of bytes, and then call MD5Final, which + * will fill a supplied 16-byte array with the digest. + */ + +public class MD5 { + int buf[]; // These were originally unsigned ints. + // This Java code makes an effort to avoid sign traps. + // buf[] is where the hash accumulates. + long bits; // This is the count of bits hashed so far. + byte in[]; // This is a buffer where we stash bytes until we have + // enough (64) to perform a transform operation. + int inint[]; + // inint[] used and discarded inside transform(), + // but why allocate it over and over? + // (In the C version this is allocated on the stack.) + + public MD5() { + buf = new int[4]; + // fill the hash accumulator with a seed value + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + + // initially, we've hashed zero bits + bits = 0L; + + in = new byte[64]; + inint = new int[16]; + } + + public void update(byte[] newbuf) { + update(newbuf, 0, newbuf.length); + } + + public void update(byte[] newbuf, int length) { + update(newbuf, 0, length); + } + + public void update(byte[] newbuf, int bufstart, int buflen) { + int t; + int len = buflen; + + // shash old bits value for the "Bytes already in" computation + // just below. + t = (int) bits; // (int) cast should just drop high bits, I hope + + /* update bitcount */ + /* the C code used two 32-bit ints separately, and carefully + * ensured that the carry carried. + * Java has a 64-bit long, which is just what the code really wants. + */ + bits += (long)(len<<3); + + t = (t >>> 3) & 0x3f; /* Bytes already in this->in */ + + /* Handle any leading odd-sized chunks */ + /* (that is, any left-over chunk left by last update() */ + + if (t!=0) { + int p = t; + t = 64 - t; + if (len < t) { + arraycopy(newbuf, bufstart, in, p, len); + return; + } + arraycopy(newbuf, bufstart, in, p, t); + transform(); + bufstart += t; + len -= t; + } + + /* Process data in 64-byte chunks */ + while (len >= 64) { + arraycopy(newbuf, bufstart, in, 0, 64); + transform(); + bufstart += 64; + len -= 64; + } + + /* Handle any remaining bytes of data. */ + /* that is, stash them for the next update(). */ + arraycopy(newbuf, bufstart, in, 0, len); + } + + public void arraycopy(byte[] src, int srcPos, byte[] dest, int destPos, int len) { + for (int i = 0; i < len; i++) { + dest[destPos+i] = src[srcPos+i]; + } + return; + } + + /* + * Final wrapup - pad to 64-byte boundary with the bit pattern + * 1 0* (64-bit count of bits processed, MSB-first) + */ + public void md5final(byte[] digest) { + /* "final" is a poor method name in Java. :v) */ + int count; + int p; // in original code, this is a pointer; in this java code + // it's an index into the array this->in. + + /* Compute number of bytes mod 64 */ + count = (int) ((bits >>> 3) & 0x3F); + + /* Set the first char of padding to 0x80. This is safe since there is + always at least one byte free */ + p = count; + in[p++] = (byte) 0x80; + + /* Bytes of padding needed to make 64 bytes */ + count = 64 - 1 - count; + + /* Pad out to 56 mod 64 */ + if (count < 8) { + /* Two lots of padding: Pad the first block to 64 bytes */ + zeroByteArray(in, p, count); + transform(); + + /* Now fill the next block with 56 bytes */ + zeroByteArray(in, 0, 56); + } else { + /* Pad block to 56 bytes */ + zeroByteArray(in, p, count - 8); + } + + /* Append length in bits and transform */ + // Could use a PUT_64BIT... func here. This is a fairly + // direct translation from the C code, where bits was an array + // of two 32-bit ints. + int lowbits = (int) bits; + int highbits = (int) (bits >>> 32); + PUT_32BIT_LSB_FIRST(in, 56, lowbits); + PUT_32BIT_LSB_FIRST(in, 60, highbits); + + transform(); + PUT_32BIT_LSB_FIRST(digest, 0, buf[0]); + PUT_32BIT_LSB_FIRST(digest, 4, buf[1]); + PUT_32BIT_LSB_FIRST(digest, 8, buf[2]); + PUT_32BIT_LSB_FIRST(digest, 12, buf[3]); + + /* zero sensitive data */ + /* notice this misses any sneaking out on the stack. The C + * version uses registers in some spots, perhaps because + * they care about this. + */ + zeroByteArray(in); + zeroIntArray(buf); + bits = 0; + zeroIntArray(inint); + } + + /* + public static void main(String args[]) { + // This main() method was created to easily test + // this class. It hashes whatever's on System.in. + + byte buf[] = new byte[397]; + // arbitrary buffer length designed to irritate update() + int rc; + MD5 md = new MD5(); + byte out[] = new byte[16]; + int i; + int len = 0; + + try { + while ((rc = System.in.read(buf, 0, 397)) > 0) { + md.update(buf, rc); + len += rc; + } + } catch (IOException ex) { + ex.printStackTrace(); + return; + } + md.md5final(out); + + System.out.println("file length: "+len); + System.out.println("hash: "+dumpBytes(out)); + } + */ + + + ///////////////////////////////////////////////////////////////////// + // Below here ye will only finde private functions // + ///////////////////////////////////////////////////////////////////// + + // There must be a way to do these functions that's + // built into Java, and I just haven't noticed it yet. + + private void zeroByteArray(byte[] a) { + zeroByteArray(a, 0, a.length); + } + + private void zeroByteArray(byte[] a, int start, int length) { + setByteArray(a, (byte) 0, start, length); + } + + private void setByteArray(byte[] a, byte val, int start, int length) { + int i; + int end = start+length; + for (i=start; i>>(32-s); + w += x; + return w; + } + + private int MD5STEP2(int w, int x, int y, int z, int data, int s) { + w += (y ^ (z & (x ^ y))) + data; + w = w<>>(32-s); + w += x; + return w; + } + + private int MD5STEP3(int w, int x, int y, int z, int data, int s) { + w += (x ^ y ^ z) + data; + w = w<>>(32-s); + w += x; + return w; + } + + private int MD5STEP4(int w, int x, int y, int z, int data, int s) { + w += (y ^ (x | ~z)) + data; + w = w<>>(32-s); + w += x; + return w; + } + + private void transform() { + /* load in[] byte array into an internal int array */ + int i; + int[] inint = new int[16]; + + for (i=0; i<16; i++) { + inint[i] = GET_32BIT_LSB_FIRST(in, 4*i); + } + + int a, b, c, d; + a = buf[0]; + b = buf[1]; + c = buf[2]; + d = buf[3]; + + a = MD5STEP1(a, b, c, d, inint[0] + 0xd76aa478, 7); + d = MD5STEP1(d, a, b, c, inint[1] + 0xe8c7b756, 12); + c = MD5STEP1(c, d, a, b, inint[2] + 0x242070db, 17); + b = MD5STEP1(b, c, d, a, inint[3] + 0xc1bdceee, 22); + a = MD5STEP1(a, b, c, d, inint[4] + 0xf57c0faf, 7); + d = MD5STEP1(d, a, b, c, inint[5] + 0x4787c62a, 12); + c = MD5STEP1(c, d, a, b, inint[6] + 0xa8304613, 17); + b = MD5STEP1(b, c, d, a, inint[7] + 0xfd469501, 22); + a = MD5STEP1(a, b, c, d, inint[8] + 0x698098d8, 7); + d = MD5STEP1(d, a, b, c, inint[9] + 0x8b44f7af, 12); + c = MD5STEP1(c, d, a, b, inint[10] + 0xffff5bb1, 17); + b = MD5STEP1(b, c, d, a, inint[11] + 0x895cd7be, 22); + a = MD5STEP1(a, b, c, d, inint[12] + 0x6b901122, 7); + d = MD5STEP1(d, a, b, c, inint[13] + 0xfd987193, 12); + c = MD5STEP1(c, d, a, b, inint[14] + 0xa679438e, 17); + b = MD5STEP1(b, c, d, a, inint[15] + 0x49b40821, 22); + + a = MD5STEP2(a, b, c, d, inint[1] + 0xf61e2562, 5); + d = MD5STEP2(d, a, b, c, inint[6] + 0xc040b340, 9); + c = MD5STEP2(c, d, a, b, inint[11] + 0x265e5a51, 14); + b = MD5STEP2(b, c, d, a, inint[0] + 0xe9b6c7aa, 20); + a = MD5STEP2(a, b, c, d, inint[5] + 0xd62f105d, 5); + d = MD5STEP2(d, a, b, c, inint[10] + 0x02441453, 9); + c = MD5STEP2(c, d, a, b, inint[15] + 0xd8a1e681, 14); + b = MD5STEP2(b, c, d, a, inint[4] + 0xe7d3fbc8, 20); + a = MD5STEP2(a, b, c, d, inint[9] + 0x21e1cde6, 5); + d = MD5STEP2(d, a, b, c, inint[14] + 0xc33707d6, 9); + c = MD5STEP2(c, d, a, b, inint[3] + 0xf4d50d87, 14); + b = MD5STEP2(b, c, d, a, inint[8] + 0x455a14ed, 20); + a = MD5STEP2(a, b, c, d, inint[13] + 0xa9e3e905, 5); + d = MD5STEP2(d, a, b, c, inint[2] + 0xfcefa3f8, 9); + c = MD5STEP2(c, d, a, b, inint[7] + 0x676f02d9, 14); + b = MD5STEP2(b, c, d, a, inint[12] + 0x8d2a4c8a, 20); + + a = MD5STEP3(a, b, c, d, inint[5] + 0xfffa3942, 4); + d = MD5STEP3(d, a, b, c, inint[8] + 0x8771f681, 11); + c = MD5STEP3(c, d, a, b, inint[11] + 0x6d9d6122, 16); + b = MD5STEP3(b, c, d, a, inint[14] + 0xfde5380c, 23); + a = MD5STEP3(a, b, c, d, inint[1] + 0xa4beea44, 4); + d = MD5STEP3(d, a, b, c, inint[4] + 0x4bdecfa9, 11); + c = MD5STEP3(c, d, a, b, inint[7] + 0xf6bb4b60, 16); + b = MD5STEP3(b, c, d, a, inint[10] + 0xbebfbc70, 23); + a = MD5STEP3(a, b, c, d, inint[13] + 0x289b7ec6, 4); + d = MD5STEP3(d, a, b, c, inint[0] + 0xeaa127fa, 11); + c = MD5STEP3(c, d, a, b, inint[3] + 0xd4ef3085, 16); + b = MD5STEP3(b, c, d, a, inint[6] + 0x04881d05, 23); + a = MD5STEP3(a, b, c, d, inint[9] + 0xd9d4d039, 4); + d = MD5STEP3(d, a, b, c, inint[12] + 0xe6db99e5, 11); + c = MD5STEP3(c, d, a, b, inint[15] + 0x1fa27cf8, 16); + b = MD5STEP3(b, c, d, a, inint[2] + 0xc4ac5665, 23); + + a = MD5STEP4(a, b, c, d, inint[0] + 0xf4292244, 6); + d = MD5STEP4(d, a, b, c, inint[7] + 0x432aff97, 10); + c = MD5STEP4(c, d, a, b, inint[14] + 0xab9423a7, 15); + b = MD5STEP4(b, c, d, a, inint[5] + 0xfc93a039, 21); + a = MD5STEP4(a, b, c, d, inint[12] + 0x655b59c3, 6); + d = MD5STEP4(d, a, b, c, inint[3] + 0x8f0ccc92, 10); + c = MD5STEP4(c, d, a, b, inint[10] + 0xffeff47d, 15); + b = MD5STEP4(b, c, d, a, inint[1] + 0x85845dd1, 21); + a = MD5STEP4(a, b, c, d, inint[8] + 0x6fa87e4f, 6); + d = MD5STEP4(d, a, b, c, inint[15] + 0xfe2ce6e0, 10); + c = MD5STEP4(c, d, a, b, inint[6] + 0xa3014314, 15); + b = MD5STEP4(b, c, d, a, inint[13] + 0x4e0811a1, 21); + a = MD5STEP4(a, b, c, d, inint[4] + 0xf7537e82, 6); + d = MD5STEP4(d, a, b, c, inint[11] + 0xbd3af235, 10); + c = MD5STEP4(c, d, a, b, inint[2] + 0x2ad7d2bb, 15); + b = MD5STEP4(b, c, d, a, inint[9] + 0xeb86d391, 21); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; + } + + private int GET_32BIT_LSB_FIRST(byte[] b, int off) { + return + ((int)(b[off+0]&0xff)) | + ((int)(b[off+1]&0xff) << 8) | + ((int)(b[off+2]&0xff) << 16) | + ((int)(b[off+3]&0xff) << 24); + } + + private void PUT_32BIT_LSB_FIRST(byte[] b, int off, int value) { + b[off+0] = (byte) (value & 0xff); + b[off+1] = (byte) ((value >> 8) & 0xff); + b[off+2] = (byte) ((value >> 16)& 0xff); + b[off+3] = (byte) ((value >> 24)& 0xff); + } + + // These are debug routines I was using while trying to + // get this code to generate the same hashes as the C version. + // (IIRC, all the errors were due to the absence of unsigned + // ints in Java.) + /* + private void debugStatus(String m) { + System.out.println(m+":"); + System.out.println("in: "+dumpBytes(in)); + System.out.println("bits: "+bits); + System.out.println("buf: " + +Integer.toHexString(buf[0])+" " + +Integer.toHexString(buf[1])+" " + +Integer.toHexString(buf[2])+" " + +Integer.toHexString(buf[3])); + } + + private static String dumpBytes(byte[] bytes) { + int i; + StringBuffer sb = new StringBuffer(); + for (i=0; i 2) { + s = s.substring(s.length()-2); + } + sb.append(s); + } + return sb.toString(); + } + */ +} diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/Mail.java b/Robust/src/Benchmarks/Recovery/SpamFilter/java/Mail.java new file mode 100644 index 00000000..3afbe19c --- /dev/null +++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/Mail.java @@ -0,0 +1,422 @@ +/** + * This class is a container for all data contained in an Email Message. + **/ +public class Mail { + + String header; // the full header + //String sentOn; // time the message was sent + //String receivedOn; // time when the message arrived + String from; // the "from" field + String to; // the "to" field + String cc; + String subject; + String body; + String noURLBody; + String sourceCode; + String spam; + boolean hasAttachement; + String encoding; //rich text, plain, html + + String messageID; // cached message ID for reuse (takes a lot of memory and is used all over the place) + //same as hashcode of a class + boolean isSpam; + + /** + * this is a really simple implementation of a tokenizer + * used to build tokens from an email and divide email into parts + **/ + int MAX_TOKEN_SIZE; + + public Mail() { + messageID=null; + } + + public Mail(String fileName) // read a mail from file + { + //System.out.println("DEBUG: fileName= " + fileName); + + BufferedReader fileinput = new BufferedReader(new FileInputStream(fileName)); + String line; + boolean chk = false; + + while((line = fileinput.readLine()) != null) + { + chk = true; + + Vector splittedLine = line.split(); + if(((String)(splittedLine.elementAt(0))).equals("Spam:")) + { + spam = (String)(splittedLine.elementAt(1)); + } + else if(((String)(splittedLine.elementAt(0))).equals("Header:")) // message id + { + header = (String)splittedLine.elementAt(1); + } + else if(((String)(splittedLine.elementAt(0))).equals("To:")) // receiver + { + to = (String)splittedLine.elementAt(1); + } + else if(((String)(splittedLine.elementAt(0))).equals("From:")) // sender + { + from = (String)splittedLine.elementAt(1); + } + else if(((String)(splittedLine.elementAt(0))).equals("Cc:")) // cc + { + cc = (String)splittedLine.elementAt(1); + } + else if(((String)(splittedLine.elementAt(0))).equals("Subject:")) // Subject + { + subject = (String)splittedLine.elementAt(1); + break; + } + } // parsed messageID, To, from, cc, Title + + /** + * error checking + **/ + if(!chk) + System.out.println("no line read"); + + + body = new String(); + byte[] readBody = new byte[256]; + + while((fileinput.read(readBody)>0)) + { + body += new String(readBody); + readBody = new byte[256]; + } + + fileinput.close(); + + MAX_TOKEN_SIZE = 1024; + } + + // ------------------------------------------------------- + + public void setHeader(String header) { + this.header = header; + } + + public String getHeader() { + return header; + } + + + /* + public void setSentOn(String sentOn) { + this.sentOn = sentOn; + } + + public String getSentOn() { + return sentOn; + } + + public Date getSentOnAsDate() { + String sentOn = getSentOn(); + return parseDate(sentOn); + } + + public void setReceivedOn(String receivedOn) { + this.receivedOn = receivedOn; + } + + public String getReceivedOn() { + return receivedOn; + } + + public Date getReceivedOnAsDate() { + String receivedOn = getReceivedOn(); + return parseDate(receivedOn); + } + */ + + + /** + * Parses a given Date-String in into a real Date-Object + * + * @param stringDate the string in format dd.mm.yyyy hh:mm + * @return a Date containing the info of the string or the actual date and time if something fails. + */ + /* + public Date parseDate(String stringDate) { + // date is in this format: dd.mm.yyyy hh:mm + if (stringDate == null || "N/A".equals(stringDate)) { + return new Date(); + } + try { + synchronized (MAIL_TIME_FORMAT) { + return MAIL_TIME_FORMAT.parse(stringDate); + } + } catch (Throwable e) { + return new Date(); + } + } + */ + + public void setFrom(String from) { + this.from = from; + } + + public String getFrom() { + return from; + } + + public void setTo(String to) { + this.to = to; + } + + public String getTo() { + return to; + } + + public void setCc(String cc) { + this.cc = cc; + } + + public String getCc() { + return cc; + } + + public void setSubject(String subject) { + this.subject = subject; + } + + public String getSubject() { + return subject; + } + + public void setBody(String body) { + this.body = body; + } + + public String getBody() { + return body; + } + + public void setSourceCode(String sourceCode) { + this.sourceCode = sourceCode; + } + + public String getSourceCode() { + return sourceCode; + } + + public void setHasAttachement(boolean hasAttachement) { + this.hasAttachement = hasAttachement; + } + + public boolean getHasAttachement() { + return hasAttachement; + } + + public void setEncoding(String encoding) { + this.encoding = encoding; + } + + public String getEncoding() { + return encoding; + } + + public boolean isTextEncoding() { + return getEncoding().toLowerCase().indexOf("plain") >= 0; + } + + public boolean isHTMLEncoding() { + return getEncoding().toLowerCase().indexOf("html") >= 0; + } + + /* + public String toString() { + return getBody() + "," + getCc() + "," + getEncoding() + "," + getFrom() + "," + getHasAttachement() + "," + getHeader() + "," + getReceivedOn() + "," + getSentOn() + "," + getSourceCode() + "," + getSubject() + "," + getTo(); + } + */ + + public String toString() { + return getBody() + "," + getCc() + "," + getEncoding() + "," + getFrom() + "," + getHasAttachement() + "," + getHeader() + "," + getSourceCode() + "," + getSubject() + "," + getTo(); + } + + /* + public String getID() { + if (messageID == null) { // no cached version + // Take the message-ID header as ID (if present) + String[] messageIDs = getHeaderField("Message-ID"); + if ((messageIDs != null) && (messageIDs.length > 0)) { + messageID = messageIDs[0]; + } else { // otherwise, hash header and body as ID + return String.valueOf(getHeader().hashCode() + getBody().hashCode()); + } + } + + return messageID; + } + */ + + public String[] getHeaderField(String fieldName) { + + } + + public String extractEMailAddress() { + + } + + /* + public boolean equals(Object o) { + if (o instanceof Mail) { + Mail mail = (Mail)o; + return this.getID().equals(mail.getID()); + } + + return false; + } + */ + + public Vector getCommonPart() + { + Vector returnStrings = new Vector(); + + // add header, sender, and title + returnStrings.addElement(header); + returnStrings.addElement(from); + returnStrings.addElement(subject); + + return returnStrings; + } + + public String getBodyString() + { + return body; + } + + public Vector returnEmail() { + Vector myemail = new Vector(); + myemail.addElement(getCommonPart()); + //System.out.println("DEBUG: getCommonPart.size= " + getCommonPart().size()); + myemail.addElement(getURLs()); + //System.out.println("DEBUG: getURLs.size= " + getURLs().size()); + myemail.addElement(getSplittedBody(MAX_TOKEN_SIZE)); + //System.out.println("DEBUG: getSplittedBody.size= " + getSplittedBody(MAX_TOKEN_SIZE).size()); + return myemail; + } + + public Vector getURLs() + { + Vector returnStrings = new Vector(); + Vector splittedBody = body.split(); + + // add URL and email in the body + for(int i=0; i=end) { + String str=noURLBody.substring(i, end); + returnStrings.addElement(str); + } + else { + String str=noURLBody.substring(i, i+size); + returnStrings.addElement(str); + } + } + return returnStrings; + } + + + public void setIsSpam(boolean spam) { + isSpam = spam; + } + + public boolean getIsSpam() { + if(spam.equals("yes")) + return true; + return false; + } + + /** + * Returns result to the Spam filter + **/ + public Vector checkMail(int userid) { + //Preprocess emails + + //long startGetParts=System.currentTimeMillis(); + Vector partsOfMailStrings = returnEmail(); + //long stopGetParts=System.currentTimeMillis(); + //System.out.println("Time to read email= " + (stopGetParts-startGetParts)); + + //Compute signatures + SignatureComputer sigComp = new SignatureComputer(); + //Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings + //long startGetsignatures=System.currentTimeMillis(); + Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of vector of strings + //long stopGetsignatures=System.currentTimeMillis(); + //System.out.println("Time to Getsignatures= " + (stopGetsignatures-startGetsignatures)); + + return signatures; + } + + /* For tests only */ + /* + public static void main(String[] args) + { + Mail mail = new Mail("./emails/email1"); + + String[] a = mail.createMailStrings(); + + for(String b : a) + { + System.out.println(b); + } + } + */ +} diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/SignatureComputer.java b/Robust/src/Benchmarks/Recovery/SpamFilter/java/SignatureComputer.java new file mode 100644 index 00000000..4f98a2d0 --- /dev/null +++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/SignatureComputer.java @@ -0,0 +1,289 @@ +public class SignatureComputer { + public EphemeralSignature sig4; //signature engines + public WhiplashSignature sig8; //signature engines + + int[] enginesToUseForCheck; + + public SignatureComputer() { + sig4 = new EphemeralSignature(); //default values + sig8 = new WhiplashSignature(); + createEnginesToCheck(); + } + + /** + * constructor to be used when some parsing has already taken place with the + * server-provides value randomNumberSeed. + * + * @param randomNumberSeed + * a non-negative number used for seeding the random number generator + * before starting to hash values. + * @param separator + * how the mail-text should be splitted into lines. (== what chars + * separate 2 lines) + */ + public SignatureComputer(int randomNumberSeed, String separator) { + sig4 = new EphemeralSignature(randomNumberSeed,separator); + sig8 = new WhiplashSignature(); + createEnginesToCheck(); + } + + /** + * the constructor to be used most of the time. you can hand over the + * seed-string exactly as it is provided by the razor-server. + * + * @param seedAndSeparator + * a string containing the seed value for the RNG and a separator list + * (separated by ' - '). default value is + * "7542-10" which means server-seed 7542 and only one + * separator 10 (which is ascii '\n'). + */ + public SignatureComputer(String seedAndSeparator) { + sig4 = new EphemeralSignature(seedAndSeparator); + sig8 = new WhiplashSignature(); + createEnginesToCheck(); + } + + /** + * + */ + public void createEnginesToCheck() { + enginesToUseForCheck = new int[2]; + enginesToUseForCheck[0] = 4; //Ephemeral engine + enginesToUseForCheck[1] = 8;//Whiplash engine + } + + public boolean isSigSupported(int sig) { + boolean found = false; + for (int i = 0; i < enginesToUseForCheck.length && !found; i++) { + if (enginesToUseForCheck[i] == sig) { + found = true; + } + } + return found; + } + + public boolean isSigSupported(String sig) { + return (sig != null && isSigSupported(Integer.parseInt(sig))); + } + + public String getDefaultEngine() { + return "4"; + } + + public Vector computeSigs(Vector EmailParts) { + if (EmailParts == null) return null; + + Vector printableSigs = new Vector(); // vector of strings + + /** + * Step -I + * Get signatures for the common parts + **/ + + Vector commonpart = (Vector) (EmailParts.elementAt(0)); + for (int mailIndex = 0; mailIndex < commonpart.size(); mailIndex++) { + String mail = (String) (commonpart.elementAt(mailIndex)); + + if (mail == null) continue; + + /* + * Compute Sig for email header that are cleaned. + */ + for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) { + int engineNo = enginesToUseForCheck[engineIndex]; + String sig = null; + + /* EphemeralSignature calculator */ + if(engineNo==4) { + sig = computeSignature(engineNo,mail); + } + + if(engineNo==8) { + continue; + } + + if((engineNo!=4)) { + System.out.println("Err: Common part Couldn't find the signature engine: " + engineNo); + } + + if (sig != null) { + String hash = engineNo + ":" + sig; + printableSigs.addElement(hash); + + //System.out.println("DEBUG: mail= " +mail + " hash= " + hash); + + } else { + // we didn't produce a signature for the mail. + } + }//engine + }//common part + + /** + * Step -II + * Get signatures for the body parts without URLs + **/ + Vector getBodywithNoURLs = (Vector)(EmailParts.elementAt(2)); + for (int mailIndex = 0; mailIndex < getBodywithNoURLs.size(); mailIndex++) { + String mail = (String) (getBodywithNoURLs.elementAt(mailIndex)); + + + if (mail == null) continue; + + /* + * Compute Sig for email header that are cleaned. + */ + for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) { + int engineNo = enginesToUseForCheck[engineIndex]; + String sig = null; + + /* EphemeralSignature calculator */ + if(engineNo==4) { + sig = computeSignature(engineNo,mail); + } + + if(engineNo==8) + continue; + + if(engineNo!=4) { + System.out.println("Err: body parts without URL Couldn't find the signature engine: " + engineNo); + } + + if (sig != null) { + String hash = engineNo + ":" + sig; + printableSigs.addElement(hash); + } else { + // we didn't produce a signature for the mail. + } + }//engine + } + + /** + * Step -III + * Get signatures for the body parts with URLs + **/ + Vector getURLs = (Vector)(EmailParts.elementAt(1)); + for (int mailIndex = 0; mailIndex < getURLs.size(); mailIndex++) { + String mail = (String) (getURLs.elementAt(mailIndex)); + + /* + * Compute Sig for bodyparts that are cleaned. + */ + for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) { + int engineNo = enginesToUseForCheck[engineIndex]; + if(engineNo==4) + continue; + + /* WhiplashSignature calculator */ + String[] hosts = null; + String sig = null; + if(engineNo==8) { + //hosts = computeSignature(engineNo,mail); + hosts = sig8.computeSignature(mail); + if(hosts != null) { + for(int i=0; i -n -e \n"); + System.out.println( " -n : num iterations"); + System.out.println( " -e : number of emails"); + } + + /** + * Returns result to the Spam filter + **/ + /* + public boolean checkMail(Mail mail, int userid) { + //Preprocess emails + //Vector partsOfMailStrings = mail.createMailStringsWithURL(); + /* + Vector partsOfMailStrings = mail.getCommonPart(); + partsOfMailStrings.addElement(mail.getBodyString()); + + //Compute signatures + SignatureComputer sigComp = new SignatureComputer(); + Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings + + //check with data structure + int[] confidenceVals = check(signatures,userid); + + //---- create and return results -------- + FilterResult filterResult = new FilterResult(); + boolean spam = filterResult.getResult(confidenceVals); + + return spam; + } + */ + + public int[] check(Vector signatures, int userid) { + int numparts = signatures.size(); + + //System.out.println("check() numparts= " + numparts); + + int[] confidenceVals = new int[numparts]; + for(int i=0; i the mail client is able to determine if it is spam or not + // --- According to the "any"-logic (in Core#check_logic) in original Razor --- + // If any answer is spam, the entire email is spam. + return confidenceVals; + } + + /** + * This method sends feedback from the user to a distributed + * spam database and trains the spam database to check future + * emails and detect spam + **/ + public void sendFeedBack(Vector signatures, boolean isSpam, int id, Random myrand) { + + for(int i=0;ipack() which + * we must do manually in java. + */ + private int convertHexToRazorEncoding(String hex3) { + if((hex3 == null)) + return 0; //error + int res = 0; + int cur = Integer.parseInt(hex3.substring(0,1),16); + cur = mirror4LSBits(cur); + res |= ( (cur&0xf) << 8); + if(hex3.length() >=2) { + cur = Integer.parseInt(hex3.substring(1,2),16); + } else { + cur = 0; + } + //cur = ( hex3.length() >=2 ? Integer.parseInt(hex3.substring(1,2),16) : 0); + cur = mirror4LSBits(cur); + res |= ((cur & 0xf) << 4); + if(hex3.length() >= 3) { + cur = Integer.parseInt(hex3.substring(2,3),16); + } else { + cur = 0; + } + //cur = ( hex3.length() >= 3 ? Integer.parseInt(hex3.substring(2,3),16): 0); + cur = mirror4LSBits(cur); + res |= (cur & 0xf); + + return res; + } + + /** + * mirrors the 4 least significant bytes of an integer + * @param cur an int containing 4 Least Singificant bytes like 00000...00abcd + * @return the mirrored 4 least significant bytes 00000...00dcba. all bits except a-b are lost. + */ + public int mirror4LSBits(int cur) { + int res = 0; + res |= (cur & 0x8)>>>3; + res |= (cur & 0x4)>>>1; + res |= (cur & 0x2)<<1; + res |= (cur & 0x1)<<3; + return res; + } + + public String[] whiplash(String text) { + + if (text == null) { + return null; + } + String[] hosts = extractHosts(text); + if (hosts == null || hosts.length < 1) { + return null; + } + String[] sigs = new String[hosts.length]; + + for (int i = 0; i < hosts.length; i++) { + MD5 md = new MD5(); + String host = hosts[i]; + int len = host.length(); + byte buf[] = host.getBytes(); + byte sig[] = new byte[16]; + md.update(buf, len); + md.md5final(sig); + String signature = new String(sig); + + // System.out.println("DEBUG: host= " + host + " whiplash sig= " + signature); + + sigs[i] = signature; + } + return sigs; + } + + public String[] extractHosts(String text) { + //System.out.println("Inside extractHosts"); + Vector hosts = new Vector(); + String buf = new String(text); + + //System.out.println("DEBUG: extractHosts() string= " + buf); + + /* Extract hosts from http:// links */ + int idx; + String strwww = new String("www."); + while ((idx = buf.indexOf(strwww)) != -1) { + int startidx = idx + strwww.length(); + String strcom = new String("."); + buf = buf.subString(startidx); + int endidx = buf.indexOf(strcom); + String host = buf.subString(0, endidx); + //System.out.println("DEBUG: http links extracted host= " + host); + + buf = buf.subString(endidx); + endidx = buf.indexOf(strcom); + host += buf.subString(0, endidx); + + hosts.addElement(host); + buf = buf.subString(endidx+strcom.length()); + } + + /* Extract hosts from email addressess */ + buf = new String(text); + String strrate = new String("@"); + while ((idx = buf.indexOf(strrate)) != -1) { + int startidx = idx + strrate.length(); + String strdot = new String("."); + buf = buf.subString(startidx); + int endidx = buf.indexOf(strdot); + String host = buf.subString(0, endidx); + //System.out.println("DEBUG: email addr extracted host= " + host); + + buf = buf.subString(endidx); + endidx = buf.indexOf(strdot); + host += buf.subString(0, endidx); + + hosts.addElement(host); + buf = buf.subString(endidx+strdot.length()); + } + + if (hosts.size() == 0) { + return null; + } + + String[] retbuf = new String[hosts.size()]; + for (int i = 0; i < hosts.size(); i++) { + retbuf[i] = (String) (hosts.elementAt(i)); + } + + return retbuf; + } + +// Testing the signature computation +// public static void main(String[] args) { +// /* String testVector = " Test Vectors: \n"+ +// "\n" + +// "1. http:www.nodg.com@www.geocities.com/nxcisdsfdfdsy/off\n"+ +// "2. http:www.ksleybiuh.com@213.171.60.74/getoff/\n"+ +// "3. \n"+ +// "4. http:217.12.4.7/rmi/http:definethis.net/526/index.html\n"+ +// "5. http:magalygr8sex.free-host.com/h.html\n"+ +// "6. http:%3CVenkatrs%3E@218.80.74.102/thecard/4index.htm\n"+ +// "7. http:EBCDVKIGURGGCEOKXHINOCANVQOIDOXJWTWGPC@218.80.74.102/thecard/5in\n"+ +// "8. http:g.india2.bag.gs/remove_page.htm\n"+ +// "9. https:220.97.40.149\n"+ +// "10. http:mjaked.biz/unsubscribe.ddd?leaving\n"+ +// "11. http:g5j99m8@it.rd.yahoo.com/bassi/*http:www.lekobas.com/c/index.php\n"+ +// "12. look great / feel great\n"+ +// "13. \n"+ +// "14. www.pillzthatwork.com # anything that starts with www. \n"; +// */ +// String testVector = "\n"+ +// "\n"+ +// "

Our first autolink: www.autolink1.com or another link like www.autolink2.co.uk or how about https:plaintextlink1.co.uk or http:plaintextlink2.com

\n"+ +// "

now a masked link http://www.coveringlink1.com and another link http:plaintextlink3.net and how about https:plaintextlink4.to

\n"+ +// "

another masked link https:coveringlink2.com and another link https:plaintextlink5.com

\n"+ +// "\n"+ +// "\n"; +// String test1 = "Our first autolink: www.autolink1.com or another link like www.autolink2.co.uk or how about https:plaintextlink1.co.uk or http:plaintextlink2.com

\n"; +// WhiplashSignature whiplash = new WhiplashSignature(); +// String[] hosts = whiplash.computeSignature(testVector); +// //String[] hosts = whiplash.computeSignature(test1); +// for (int i = 0; i < hosts.length; i++) { +// String string = hosts[i]; +// System.out.println("host " + i + ":\t" + string); +// } +// } + +} diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/makefile b/Robust/src/Benchmarks/Recovery/SpamFilter/java/makefile new file mode 100644 index 00000000..e016d93c --- /dev/null +++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/makefile @@ -0,0 +1,21 @@ +MAINCLASS=SpamFilter +SRC=${MAINCLASS}.java \ + DistributedHashMap.java \ + Mail.java \ + FilterResult.java \ + HashEntry.java \ + HashStat.java \ + SignatureComputer.java \ + FilterStatistic.java \ + EphemeralSignature.java \ + GString.java \ + WhiplashSignature.java + +FLAGS=-optimize -thread -mainclass ${MAINCLASS} + +default: + ../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC} + +clean: + rm -rf tmpbuilddirectory* + rm *.bin diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/GString.java b/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/GString.java index 47e3fcf3..9665f58c 100644 --- a/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/GString.java +++ b/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/GString.java @@ -46,24 +46,6 @@ public class GString { this.offset=0; } - public static char[] toLocalCharArray(GString str) { - char[] c; - int length; - - length = str.length(); - - c = new char[length]; - - for (int i = 0; i < length; i++) { - c[i] = str.value[i+str.offset]; - } - return c; - } - - public String toLocalString() { - return new String(toLocalCharArray(this)); - } - public int length() { return count; } diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/SpamFilter.java b/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/SpamFilter.java index 2bed4b34..37e97cd3 100644 --- a/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/SpamFilter.java +++ b/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/SpamFilter.java @@ -53,10 +53,10 @@ public class SpamFilter extends Thread { correct =0; wrong = 0; for(int j=0; j