From 55aff1a2c50c1dc95240444107a591f414a8b576 Mon Sep 17 00:00:00 2001
From: adash
Date: Thu, 4 Mar 2010 00:17:45 +0000
Subject: [PATCH] changes to benchmarks javasingle version of spamfilter
---
.../Recovery/FileSystem/recovery/dstm.conf | 3 -
.../Recovery/FileSystem/recovery/makefile | 2 +-
.../Recovery/Game/recovery/RainForest.java | 4 +-
.../Recovery/Game/recovery/makefile | 4 +-
.../SpamFilter/java/BufferedReader.java | 86 ++++
.../SpamFilter/java/DistributedHashMap.java | 157 +++++++
.../SpamFilter/java/EphemeralSignature.java | 47 ++
.../SpamFilter/java/FilterResult.java | 80 ++++
.../SpamFilter/java/FilterStatistic.java | 66 +++
.../Recovery/SpamFilter/java/GString.java | 151 +++++++
.../Recovery/SpamFilter/java/HashEntry.java | 68 +++
.../Recovery/SpamFilter/java/HashStat.java | 70 +++
.../Recovery/SpamFilter/java/MD5.java | 424 ++++++++++++++++++
.../Recovery/SpamFilter/java/Mail.java | 422 +++++++++++++++++
.../SpamFilter/java/SignatureComputer.java | 289 ++++++++++++
.../Recovery/SpamFilter/java/SpamFilter.java | 374 +++++++++++++++
.../SpamFilter/java/WhiplashSignature.java | 258 +++++++++++
.../Recovery/SpamFilter/java/makefile | 21 +
.../Recovery/SpamFilter/recovery/GString.java | 18 -
.../SpamFilter/recovery/SpamFilter.java | 12 +-
.../Recovery/SpamFilter/recovery/dstm.conf | 13 +-
.../Recovery/SpamFilter/recovery/makefile | 2 +-
.../Recovery/Spider/recovery/makefile | 2 +-
23 files changed, 2527 insertions(+), 46 deletions(-)
create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/BufferedReader.java
create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/DistributedHashMap.java
create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/EphemeralSignature.java
create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterResult.java
create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterStatistic.java
create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/GString.java
create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/HashEntry.java
create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/HashStat.java
create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/MD5.java
create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/Mail.java
create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/SignatureComputer.java
create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/SpamFilter.java
create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/WhiplashSignature.java
create mode 100644 Robust/src/Benchmarks/Recovery/SpamFilter/java/makefile
diff --git a/Robust/src/Benchmarks/Recovery/FileSystem/recovery/dstm.conf b/Robust/src/Benchmarks/Recovery/FileSystem/recovery/dstm.conf
index 74a1e24c..e69de29b 100644
--- a/Robust/src/Benchmarks/Recovery/FileSystem/recovery/dstm.conf
+++ b/Robust/src/Benchmarks/Recovery/FileSystem/recovery/dstm.conf
@@ -1,3 +0,0 @@
-128.195.180.21
-#128.195.180.24
-128.195.180.26
diff --git a/Robust/src/Benchmarks/Recovery/FileSystem/recovery/makefile b/Robust/src/Benchmarks/Recovery/FileSystem/recovery/makefile
index cf672af4..0379d038 100644
--- a/Robust/src/Benchmarks/Recovery/FileSystem/recovery/makefile
+++ b/Robust/src/Benchmarks/Recovery/FileSystem/recovery/makefile
@@ -1,6 +1,6 @@
MAINCLASS=FileSystem
SRC1=${MAINCLASS}.java
-FLAGS= -recoverystats -dsm -recovery -nooptimize -mainclass ${MAINCLASS}
+FLAGS= -recovery -recoverystats -dsm -dsmcaching -optimize -mainclass ${MAINCLASS}
default:
../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC1}
diff --git a/Robust/src/Benchmarks/Recovery/Game/recovery/RainForest.java b/Robust/src/Benchmarks/Recovery/Game/recovery/RainForest.java
index 9e3fedcc..fb8b7ce6 100644
--- a/Robust/src/Benchmarks/Recovery/Game/recovery/RainForest.java
+++ b/Robust/src/Benchmarks/Recovery/Game/recovery/RainForest.java
@@ -1,6 +1,6 @@
#define ROW 400 /* columns in the map */
#define COLUMN 100 /* rows of in the map */
-#define ROUNDS 1000 /* Number of moves by each player */
+#define ROUNDS 2000 /* Number of moves by each player */
#define PLAYERS 20 /* Number of Players when num Players != num of client machines */
#define RATI0 0.5 /* Number of lumberjacks to number of planters */
#define BLOCK 3 /* Area around the gamer to consider */
@@ -103,7 +103,7 @@ public class RainForest extends Thread {
RecoveryStat.printRecoveryStat();
while(true) {
- sleep(1000000);
+ sleep(300000000);
}
}
diff --git a/Robust/src/Benchmarks/Recovery/Game/recovery/makefile b/Robust/src/Benchmarks/Recovery/Game/recovery/makefile
index b4b5a69a..7958db59 100644
--- a/Robust/src/Benchmarks/Recovery/Game/recovery/makefile
+++ b/Robust/src/Benchmarks/Recovery/Game/recovery/makefile
@@ -11,8 +11,8 @@ SRC=tmp${MAINCLASS}.java \
AStarPathFinder.java \
../../../../ClassLibrary/JavaDSM/Thread.java
-FLAGS1=-dsm -recoverystats -recovery -optimize -mainclass ${MAINCLASS}
-DSMFLAGS=-dsm -optimize -mainclass ${MAINCLASS}
+FLAGS1=-dsm -dsmcaching -recoverystats -recovery -optimize -mainclass ${MAINCLASS}
+DSMFLAGS=-dsm -dsmcaching -optimize -mainclass ${MAINCLASS}
default:
cpp ${MAINCLASS}.java > tmp1${MAINCLASS}.java
diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/BufferedReader.java b/Robust/src/Benchmarks/Recovery/SpamFilter/java/BufferedReader.java
new file mode 100644
index 00000000..d4c96fdd
--- /dev/null
+++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/BufferedReader.java
@@ -0,0 +1,86 @@
+public class BufferedReader {
+ FileInputStream fr;
+ byte[] buffer;
+ int offset;
+ int end;
+
+ public BufferedReader(FileInputStream fr) {
+ this.fr=fr;
+ this.buffer=new byte[2048];
+ }
+
+ public int read() {
+ if (offset=arraylen)
+ return off;
+ array[off++]=buffer[offset];
+ }
+ readBuffer();
+ if (end==0)
+ return off;
+ if (end<0)
+ return end;
+ } while(true);
+ }
+
+ public void readBuffer() {
+ offset=0;
+ end=fr.read(buffer);
+ }
+
+ public String readLine() {
+ String str=null;
+ do {
+ boolean foundcr=false;
+ int index=offset;
+ for(;index> 27)) ^ str.charAt(i);
+ }
+
+ return hash;
+ }
+ */
+
+}
diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterResult.java b/Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterResult.java
new file mode 100644
index 00000000..196006be
--- /dev/null
+++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterResult.java
@@ -0,0 +1,80 @@
+/**
+ * A FilterResult encapsulates the result of a filter made by checking a mail.
+ **/
+public class FilterResult {
+ /**
+ * This value is used if type is ERROR or UNKNOWN.
+ */
+ public double NO_RESULT;
+
+ /**
+ * A result value greater or equal this value indicates that the filter has
+ * decided on spam.
+ */
+ public int SPAM_THRESHOLD;
+ public int ABSOLUTE_SPAM;
+ public int ABSOLUTE_HAM;
+
+ //public double result; // the result, a value between -1 (ham) and 1000 (spam),
+ // negative values for "error", "unknown" etc.
+
+ // -----------------------------------------------------------------------------
+
+ public FilterResult(double result) {
+ SPAM_THRESHOLD=50;
+ ABSOLUTE_SPAM=100;
+ ABSOLUTE_HAM=0;
+ NO_RESULT=-1;
+ //this.result = result;
+ }
+
+ public FilterResult() {
+ SPAM_THRESHOLD=50;
+ ABSOLUTE_SPAM=100;
+ ABSOLUTE_HAM=0;
+ NO_RESULT=-1;
+ }
+
+ public boolean getResult(int[] confidenceVals) {
+ int[] res = new int[3]; //3 equals spam, ham and unknown
+ for(int i=0; i= 0 && confidenceVals[i] < SPAM_THRESHOLD)
+ res[1]+=1; //ham
+ if(confidenceVals[i] >= SPAM_THRESHOLD)
+ res[2]+=1;//spam
+ }
+ int maxVotes=0;
+ int max;
+ for(int i=0; i<3;i++) {
+ if(res[i] > maxVotes) {
+ maxVotes = res[i];
+ max = i;
+ }
+ }
+ if(max==0)
+ return false;
+ if(max==1)
+ return false;
+ if(max==2)
+ return true;
+
+ System.out.println("Err: getResult() Control shouldn't come here, max= " + max);
+ return false;
+ }
+
+ /*
+ public void addProperty(String key, String value) {
+ properties.put(key,value);
+ }
+
+ public String getProperty(String key) {
+ return properties.get(key);
+ }
+
+ public HashMap getProperties() {
+ return properties;
+ }
+ */
+}
diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterStatistic.java b/Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterStatistic.java
new file mode 100644
index 00000000..2e326a5d
--- /dev/null
+++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/FilterStatistic.java
@@ -0,0 +1,66 @@
+public class FilterStatistic {
+ int unknown;
+ int spam;
+ int ham;
+
+ // -------------------------------------------------------
+
+ public FilterStatistic() {
+ this.spam = 0;
+ this.ham = 0;
+ this.unknown = 0;
+ }
+
+ public FilterStatistic(int spam, int ham, int unknown) {
+ this.spam = spam;
+ this.ham = ham;
+ this.unknown = unknown;
+ }
+
+ public int getChecked() {
+ return getSpam() + getHam() + getUnknown();
+ }
+
+ public int getHam() {
+ return ham;
+ }
+
+ public int getSpam() {
+ return spam;
+ }
+
+ public void setHam(int i) {
+ ham = i;
+ }
+
+ public void setSpam(int i) {
+ spam = i;
+ }
+
+ public int getUnknown() {
+ return unknown;
+ }
+
+ public void setUnknown(int u) {
+ unknown = u;
+ }
+
+ public void increaseSpam() {
+ setSpam(getSpam() + 1);
+ }
+
+ public void increaseHam() {
+ setHam(getHam() + 1);
+ }
+
+ public void increaseUnknown() {
+ setUnknown(getUnknown() + 1);
+ }
+
+ public String toString() {
+ String str = "Filterstats_spam_"+spam;
+ str += "_ham_" +ham;
+ str += "_unknown_"+unknown;
+ return str;
+ }
+}
diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/GString.java b/Robust/src/Benchmarks/Recovery/SpamFilter/java/GString.java
new file mode 100644
index 00000000..8d8fb8cd
--- /dev/null
+++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/GString.java
@@ -0,0 +1,151 @@
+public class GString {
+ public char value[];
+ public int count;
+ public int offset;
+
+ public GString() {
+ }
+
+ public GString(char c) {
+ char[] str = new char[1];
+ str[0] = c;
+ GString(str);
+ }
+
+ public GString(String str) {
+ value = new char[str.count];
+ for(int i =0; i< str.count;i++) {
+ value[i] = str.value[i+str.offset];
+ }
+ count = str.count;
+ offset = 0;
+ }
+
+ public GString(GString gstr) {
+ this.value = gstr.value;
+ this.count = gstr.count;
+ this.offset = gstr.offset;
+ }
+
+ /*
+ public GString(StringBuffer gsb) {
+ value = new char[gsb.length()];
+ count = gsb.length();
+ offset = 0;
+ for (int i = 0; i < count; i++)
+ value[i] = gsb.value[i];
+ }
+ */
+
+ public GString(char str[]) {
+ char charstr[]=new char[str.length];
+ for(int i=0; i 0; i--)
+ if (this.charAt(i) == ch)
+ return i;
+ return -1;
+ }
+
+ public char charAt(int i) {
+ return value[i+offset];
+ }
+
+ public int indexOf(String str) {
+ return this.indexOf(str, 0);
+ }
+
+ public int indexOf(String str, int fromIndex) {
+ if (fromIndex < 0)
+ fromIndex = 0;
+ for (int i = fromIndex; i <= (count-str.count); i++)
+ if (regionMatches(i, str, 0, str.count))
+ return i;
+ return -1;
+ }
+
+ public boolean regionMatches(int toffset, String other, int ooffset, int len) {
+ if (toffset < 0 || ooffset < 0 || (toffset+len) > count || (ooffset+len) > other.count)
+ return false;
+
+ for (int i = 0; i < len; i++) {
+ if (other.value[i+other.offset+ooffset] != this.value[i+this.offset+toffset])
+ return false;
+ }
+ return true;
+ }
+
+ public String subString(int beginIndex, int endIndex) {
+ return substring(beginIndex, endIndex);
+ }
+
+ public String substring(int beginIndex, int endIndex) {
+ String str;
+ str = new String();
+ str.value = this.value;
+ str.count = endIndex-beginIndex;
+ str.offset = this.offset + beginIndex;
+ return str;
+ }
+
+ public static String valueOf(Object o) {
+ if (o==null)
+ return "null";
+ else
+ return o.toString();
+ }
+
+ public String toLocalString() {
+ return new String(toLocalCharArray(this));
+ }
+
+ public static char[] toLocalCharArray(GString str) {
+ char[] c;
+ int length;
+ length = str.length();
+ c = new char[length];
+ for (int i = 0; i < length; i++) {
+ c[i] = str.value[i+str.offset];
+ }
+ return c;
+ }
+
+ public int hashCode() {
+ String s = this.toLocalString();
+ return s.hashCode();
+ }
+
+ public boolean equals(Object o) {
+ if(o == null)
+ return false;
+ if(!(o instanceof GString))
+ return false;
+ GString gs = (GString)o;
+ String s1 = gs.toLocalString();
+ String s2 = this.toLocalString();
+ if(s2.equals(s1))
+ return true;
+ return false;
+ }
+}
diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/HashEntry.java b/Robust/src/Benchmarks/Recovery/SpamFilter/java/HashEntry.java
new file mode 100644
index 00000000..f76828ca
--- /dev/null
+++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/HashEntry.java
@@ -0,0 +1,68 @@
+public class HashEntry {
+ public GString engine;
+ public GString signature;
+ public HashStat stats;
+
+ public HashEntry() {
+
+ }
+
+ /**
+ * hashCode that combines two strings using xor.
+ * @return a hash code value on the entire object.
+ */
+ public int hashCode() {
+ int result=0;
+ // this will not work well if some of the strings are equal.
+ result = engine.hashCode();
+ result ^= signature.hashCode();
+ //result ^= stats.hashCode();
+ //System.out.println("HashEntry: hashCode= " + result);
+ return result;
+ }
+
+ public void setengine(GString engine) {
+ this.engine=engine;
+ }
+
+ public void setstats(HashStat stats) {
+ this.stats=stats;
+ }
+
+ public void setsig(GString signature) {
+ this.signature=signature;
+ }
+
+ public GString getEngine() {
+ return engine;
+ }
+
+ public GString getSignature() {
+ return signature;
+ }
+
+ public HashStat getStats() {
+ return stats;
+ }
+
+ public boolean equals(Object o) {
+ HashEntry he = (HashEntry)o;
+ if(!(he.getEngine().equals(engine)))
+ return false;
+ if(!(he.getSignature().equals(signature)))
+ return false;
+ //if(!(he.getStats().equals(stats)))
+ // return false;
+ return true;
+ }
+
+ public int askForSpam() {
+ int[] users = stats.getUsers();
+ int spamConfidence=0;
+ for(int i=0; i
+// from some public domain C code (md5.c) included with the ssh-1.2.22 source.
+// Tue Jan 19 15:55:50 EST 1999
+// $Id: MD5.java,v 1.1 2010/03/04 00:17:44 adash Exp $
+//
+// To compute the message digest of a chunk of bytes, create an
+// MD5 object 'md5', call md5.update() as needed on buffers full
+// of bytes, and then call md5.md5final(), which
+// will fill a supplied 16-byte array with the digest.
+//
+// A main() method is included that hashes the data on System.in.
+//
+// It seems to run around 25-30 times slower (JDK1.1.6) than optimized C
+// (gcc -O4, version 2.7.2.3). Measured on a Sun Ultra 5 (SPARC 270MHz).
+//
+// Comments from md5.c from ssh-1.2.22, the basis for this code:
+//
+/* This code has been heavily hacked by Tatu Ylonen to
+ make it compile on machines like Cray that don't have a 32 bit integer
+ type. */
+/*
+ * This code implements the MD5 message-digest algorithm.
+ * The algorithm is due to Ron Rivest. This code was
+ * written by Colin Plumb in 1993, no copyright is claimed.
+ * This code is in the public domain; do with it what you wish.
+ *
+ * Equivalent code is available from RSA Data Security, Inc.
+ * This code has been tested against that, and is equivalent,
+ * except that you don't need to include two pages of legalese
+ * with every copy.
+ *
+ * To compute the message digest of a chunk of bytes, declare an
+ * MD5Context structure, pass it to MD5Init, call MD5Update as
+ * needed on buffers full of bytes, and then call MD5Final, which
+ * will fill a supplied 16-byte array with the digest.
+ */
+
+public class MD5 {
+ int buf[]; // These were originally unsigned ints.
+ // This Java code makes an effort to avoid sign traps.
+ // buf[] is where the hash accumulates.
+ long bits; // This is the count of bits hashed so far.
+ byte in[]; // This is a buffer where we stash bytes until we have
+ // enough (64) to perform a transform operation.
+ int inint[];
+ // inint[] used and discarded inside transform(),
+ // but why allocate it over and over?
+ // (In the C version this is allocated on the stack.)
+
+ public MD5() {
+ buf = new int[4];
+ // fill the hash accumulator with a seed value
+ buf[0] = 0x67452301;
+ buf[1] = 0xefcdab89;
+ buf[2] = 0x98badcfe;
+ buf[3] = 0x10325476;
+
+ // initially, we've hashed zero bits
+ bits = 0L;
+
+ in = new byte[64];
+ inint = new int[16];
+ }
+
+ public void update(byte[] newbuf) {
+ update(newbuf, 0, newbuf.length);
+ }
+
+ public void update(byte[] newbuf, int length) {
+ update(newbuf, 0, length);
+ }
+
+ public void update(byte[] newbuf, int bufstart, int buflen) {
+ int t;
+ int len = buflen;
+
+ // shash old bits value for the "Bytes already in" computation
+ // just below.
+ t = (int) bits; // (int) cast should just drop high bits, I hope
+
+ /* update bitcount */
+ /* the C code used two 32-bit ints separately, and carefully
+ * ensured that the carry carried.
+ * Java has a 64-bit long, which is just what the code really wants.
+ */
+ bits += (long)(len<<3);
+
+ t = (t >>> 3) & 0x3f; /* Bytes already in this->in */
+
+ /* Handle any leading odd-sized chunks */
+ /* (that is, any left-over chunk left by last update() */
+
+ if (t!=0) {
+ int p = t;
+ t = 64 - t;
+ if (len < t) {
+ arraycopy(newbuf, bufstart, in, p, len);
+ return;
+ }
+ arraycopy(newbuf, bufstart, in, p, t);
+ transform();
+ bufstart += t;
+ len -= t;
+ }
+
+ /* Process data in 64-byte chunks */
+ while (len >= 64) {
+ arraycopy(newbuf, bufstart, in, 0, 64);
+ transform();
+ bufstart += 64;
+ len -= 64;
+ }
+
+ /* Handle any remaining bytes of data. */
+ /* that is, stash them for the next update(). */
+ arraycopy(newbuf, bufstart, in, 0, len);
+ }
+
+ public void arraycopy(byte[] src, int srcPos, byte[] dest, int destPos, int len) {
+ for (int i = 0; i < len; i++) {
+ dest[destPos+i] = src[srcPos+i];
+ }
+ return;
+ }
+
+ /*
+ * Final wrapup - pad to 64-byte boundary with the bit pattern
+ * 1 0* (64-bit count of bits processed, MSB-first)
+ */
+ public void md5final(byte[] digest) {
+ /* "final" is a poor method name in Java. :v) */
+ int count;
+ int p; // in original code, this is a pointer; in this java code
+ // it's an index into the array this->in.
+
+ /* Compute number of bytes mod 64 */
+ count = (int) ((bits >>> 3) & 0x3F);
+
+ /* Set the first char of padding to 0x80. This is safe since there is
+ always at least one byte free */
+ p = count;
+ in[p++] = (byte) 0x80;
+
+ /* Bytes of padding needed to make 64 bytes */
+ count = 64 - 1 - count;
+
+ /* Pad out to 56 mod 64 */
+ if (count < 8) {
+ /* Two lots of padding: Pad the first block to 64 bytes */
+ zeroByteArray(in, p, count);
+ transform();
+
+ /* Now fill the next block with 56 bytes */
+ zeroByteArray(in, 0, 56);
+ } else {
+ /* Pad block to 56 bytes */
+ zeroByteArray(in, p, count - 8);
+ }
+
+ /* Append length in bits and transform */
+ // Could use a PUT_64BIT... func here. This is a fairly
+ // direct translation from the C code, where bits was an array
+ // of two 32-bit ints.
+ int lowbits = (int) bits;
+ int highbits = (int) (bits >>> 32);
+ PUT_32BIT_LSB_FIRST(in, 56, lowbits);
+ PUT_32BIT_LSB_FIRST(in, 60, highbits);
+
+ transform();
+ PUT_32BIT_LSB_FIRST(digest, 0, buf[0]);
+ PUT_32BIT_LSB_FIRST(digest, 4, buf[1]);
+ PUT_32BIT_LSB_FIRST(digest, 8, buf[2]);
+ PUT_32BIT_LSB_FIRST(digest, 12, buf[3]);
+
+ /* zero sensitive data */
+ /* notice this misses any sneaking out on the stack. The C
+ * version uses registers in some spots, perhaps because
+ * they care about this.
+ */
+ zeroByteArray(in);
+ zeroIntArray(buf);
+ bits = 0;
+ zeroIntArray(inint);
+ }
+
+ /*
+ public static void main(String args[]) {
+ // This main() method was created to easily test
+ // this class. It hashes whatever's on System.in.
+
+ byte buf[] = new byte[397];
+ // arbitrary buffer length designed to irritate update()
+ int rc;
+ MD5 md = new MD5();
+ byte out[] = new byte[16];
+ int i;
+ int len = 0;
+
+ try {
+ while ((rc = System.in.read(buf, 0, 397)) > 0) {
+ md.update(buf, rc);
+ len += rc;
+ }
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ return;
+ }
+ md.md5final(out);
+
+ System.out.println("file length: "+len);
+ System.out.println("hash: "+dumpBytes(out));
+ }
+ */
+
+
+ /////////////////////////////////////////////////////////////////////
+ // Below here ye will only finde private functions //
+ /////////////////////////////////////////////////////////////////////
+
+ // There must be a way to do these functions that's
+ // built into Java, and I just haven't noticed it yet.
+
+ private void zeroByteArray(byte[] a) {
+ zeroByteArray(a, 0, a.length);
+ }
+
+ private void zeroByteArray(byte[] a, int start, int length) {
+ setByteArray(a, (byte) 0, start, length);
+ }
+
+ private void setByteArray(byte[] a, byte val, int start, int length) {
+ int i;
+ int end = start+length;
+ for (i=start; i>>(32-s);
+ w += x;
+ return w;
+ }
+
+ private int MD5STEP2(int w, int x, int y, int z, int data, int s) {
+ w += (y ^ (z & (x ^ y))) + data;
+ w = w<>>(32-s);
+ w += x;
+ return w;
+ }
+
+ private int MD5STEP3(int w, int x, int y, int z, int data, int s) {
+ w += (x ^ y ^ z) + data;
+ w = w<>>(32-s);
+ w += x;
+ return w;
+ }
+
+ private int MD5STEP4(int w, int x, int y, int z, int data, int s) {
+ w += (y ^ (x | ~z)) + data;
+ w = w<>>(32-s);
+ w += x;
+ return w;
+ }
+
+ private void transform() {
+ /* load in[] byte array into an internal int array */
+ int i;
+ int[] inint = new int[16];
+
+ for (i=0; i<16; i++) {
+ inint[i] = GET_32BIT_LSB_FIRST(in, 4*i);
+ }
+
+ int a, b, c, d;
+ a = buf[0];
+ b = buf[1];
+ c = buf[2];
+ d = buf[3];
+
+ a = MD5STEP1(a, b, c, d, inint[0] + 0xd76aa478, 7);
+ d = MD5STEP1(d, a, b, c, inint[1] + 0xe8c7b756, 12);
+ c = MD5STEP1(c, d, a, b, inint[2] + 0x242070db, 17);
+ b = MD5STEP1(b, c, d, a, inint[3] + 0xc1bdceee, 22);
+ a = MD5STEP1(a, b, c, d, inint[4] + 0xf57c0faf, 7);
+ d = MD5STEP1(d, a, b, c, inint[5] + 0x4787c62a, 12);
+ c = MD5STEP1(c, d, a, b, inint[6] + 0xa8304613, 17);
+ b = MD5STEP1(b, c, d, a, inint[7] + 0xfd469501, 22);
+ a = MD5STEP1(a, b, c, d, inint[8] + 0x698098d8, 7);
+ d = MD5STEP1(d, a, b, c, inint[9] + 0x8b44f7af, 12);
+ c = MD5STEP1(c, d, a, b, inint[10] + 0xffff5bb1, 17);
+ b = MD5STEP1(b, c, d, a, inint[11] + 0x895cd7be, 22);
+ a = MD5STEP1(a, b, c, d, inint[12] + 0x6b901122, 7);
+ d = MD5STEP1(d, a, b, c, inint[13] + 0xfd987193, 12);
+ c = MD5STEP1(c, d, a, b, inint[14] + 0xa679438e, 17);
+ b = MD5STEP1(b, c, d, a, inint[15] + 0x49b40821, 22);
+
+ a = MD5STEP2(a, b, c, d, inint[1] + 0xf61e2562, 5);
+ d = MD5STEP2(d, a, b, c, inint[6] + 0xc040b340, 9);
+ c = MD5STEP2(c, d, a, b, inint[11] + 0x265e5a51, 14);
+ b = MD5STEP2(b, c, d, a, inint[0] + 0xe9b6c7aa, 20);
+ a = MD5STEP2(a, b, c, d, inint[5] + 0xd62f105d, 5);
+ d = MD5STEP2(d, a, b, c, inint[10] + 0x02441453, 9);
+ c = MD5STEP2(c, d, a, b, inint[15] + 0xd8a1e681, 14);
+ b = MD5STEP2(b, c, d, a, inint[4] + 0xe7d3fbc8, 20);
+ a = MD5STEP2(a, b, c, d, inint[9] + 0x21e1cde6, 5);
+ d = MD5STEP2(d, a, b, c, inint[14] + 0xc33707d6, 9);
+ c = MD5STEP2(c, d, a, b, inint[3] + 0xf4d50d87, 14);
+ b = MD5STEP2(b, c, d, a, inint[8] + 0x455a14ed, 20);
+ a = MD5STEP2(a, b, c, d, inint[13] + 0xa9e3e905, 5);
+ d = MD5STEP2(d, a, b, c, inint[2] + 0xfcefa3f8, 9);
+ c = MD5STEP2(c, d, a, b, inint[7] + 0x676f02d9, 14);
+ b = MD5STEP2(b, c, d, a, inint[12] + 0x8d2a4c8a, 20);
+
+ a = MD5STEP3(a, b, c, d, inint[5] + 0xfffa3942, 4);
+ d = MD5STEP3(d, a, b, c, inint[8] + 0x8771f681, 11);
+ c = MD5STEP3(c, d, a, b, inint[11] + 0x6d9d6122, 16);
+ b = MD5STEP3(b, c, d, a, inint[14] + 0xfde5380c, 23);
+ a = MD5STEP3(a, b, c, d, inint[1] + 0xa4beea44, 4);
+ d = MD5STEP3(d, a, b, c, inint[4] + 0x4bdecfa9, 11);
+ c = MD5STEP3(c, d, a, b, inint[7] + 0xf6bb4b60, 16);
+ b = MD5STEP3(b, c, d, a, inint[10] + 0xbebfbc70, 23);
+ a = MD5STEP3(a, b, c, d, inint[13] + 0x289b7ec6, 4);
+ d = MD5STEP3(d, a, b, c, inint[0] + 0xeaa127fa, 11);
+ c = MD5STEP3(c, d, a, b, inint[3] + 0xd4ef3085, 16);
+ b = MD5STEP3(b, c, d, a, inint[6] + 0x04881d05, 23);
+ a = MD5STEP3(a, b, c, d, inint[9] + 0xd9d4d039, 4);
+ d = MD5STEP3(d, a, b, c, inint[12] + 0xe6db99e5, 11);
+ c = MD5STEP3(c, d, a, b, inint[15] + 0x1fa27cf8, 16);
+ b = MD5STEP3(b, c, d, a, inint[2] + 0xc4ac5665, 23);
+
+ a = MD5STEP4(a, b, c, d, inint[0] + 0xf4292244, 6);
+ d = MD5STEP4(d, a, b, c, inint[7] + 0x432aff97, 10);
+ c = MD5STEP4(c, d, a, b, inint[14] + 0xab9423a7, 15);
+ b = MD5STEP4(b, c, d, a, inint[5] + 0xfc93a039, 21);
+ a = MD5STEP4(a, b, c, d, inint[12] + 0x655b59c3, 6);
+ d = MD5STEP4(d, a, b, c, inint[3] + 0x8f0ccc92, 10);
+ c = MD5STEP4(c, d, a, b, inint[10] + 0xffeff47d, 15);
+ b = MD5STEP4(b, c, d, a, inint[1] + 0x85845dd1, 21);
+ a = MD5STEP4(a, b, c, d, inint[8] + 0x6fa87e4f, 6);
+ d = MD5STEP4(d, a, b, c, inint[15] + 0xfe2ce6e0, 10);
+ c = MD5STEP4(c, d, a, b, inint[6] + 0xa3014314, 15);
+ b = MD5STEP4(b, c, d, a, inint[13] + 0x4e0811a1, 21);
+ a = MD5STEP4(a, b, c, d, inint[4] + 0xf7537e82, 6);
+ d = MD5STEP4(d, a, b, c, inint[11] + 0xbd3af235, 10);
+ c = MD5STEP4(c, d, a, b, inint[2] + 0x2ad7d2bb, 15);
+ b = MD5STEP4(b, c, d, a, inint[9] + 0xeb86d391, 21);
+
+ buf[0] += a;
+ buf[1] += b;
+ buf[2] += c;
+ buf[3] += d;
+ }
+
+ private int GET_32BIT_LSB_FIRST(byte[] b, int off) {
+ return
+ ((int)(b[off+0]&0xff)) |
+ ((int)(b[off+1]&0xff) << 8) |
+ ((int)(b[off+2]&0xff) << 16) |
+ ((int)(b[off+3]&0xff) << 24);
+ }
+
+ private void PUT_32BIT_LSB_FIRST(byte[] b, int off, int value) {
+ b[off+0] = (byte) (value & 0xff);
+ b[off+1] = (byte) ((value >> 8) & 0xff);
+ b[off+2] = (byte) ((value >> 16)& 0xff);
+ b[off+3] = (byte) ((value >> 24)& 0xff);
+ }
+
+ // These are debug routines I was using while trying to
+ // get this code to generate the same hashes as the C version.
+ // (IIRC, all the errors were due to the absence of unsigned
+ // ints in Java.)
+ /*
+ private void debugStatus(String m) {
+ System.out.println(m+":");
+ System.out.println("in: "+dumpBytes(in));
+ System.out.println("bits: "+bits);
+ System.out.println("buf: "
+ +Integer.toHexString(buf[0])+" "
+ +Integer.toHexString(buf[1])+" "
+ +Integer.toHexString(buf[2])+" "
+ +Integer.toHexString(buf[3]));
+ }
+
+ private static String dumpBytes(byte[] bytes) {
+ int i;
+ StringBuffer sb = new StringBuffer();
+ for (i=0; i 2) {
+ s = s.substring(s.length()-2);
+ }
+ sb.append(s);
+ }
+ return sb.toString();
+ }
+ */
+}
diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/Mail.java b/Robust/src/Benchmarks/Recovery/SpamFilter/java/Mail.java
new file mode 100644
index 00000000..3afbe19c
--- /dev/null
+++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/Mail.java
@@ -0,0 +1,422 @@
+/**
+ * This class is a container for all data contained in an Email Message.
+ **/
+public class Mail {
+
+ String header; // the full header
+ //String sentOn; // time the message was sent
+ //String receivedOn; // time when the message arrived
+ String from; // the "from" field
+ String to; // the "to" field
+ String cc;
+ String subject;
+ String body;
+ String noURLBody;
+ String sourceCode;
+ String spam;
+ boolean hasAttachement;
+ String encoding; //rich text, plain, html
+
+ String messageID; // cached message ID for reuse (takes a lot of memory and is used all over the place)
+ //same as hashcode of a class
+ boolean isSpam;
+
+ /**
+ * this is a really simple implementation of a tokenizer
+ * used to build tokens from an email and divide email into parts
+ **/
+ int MAX_TOKEN_SIZE;
+
+ public Mail() {
+ messageID=null;
+ }
+
+ public Mail(String fileName) // read a mail from file
+ {
+ //System.out.println("DEBUG: fileName= " + fileName);
+
+ BufferedReader fileinput = new BufferedReader(new FileInputStream(fileName));
+ String line;
+ boolean chk = false;
+
+ while((line = fileinput.readLine()) != null)
+ {
+ chk = true;
+
+ Vector splittedLine = line.split();
+ if(((String)(splittedLine.elementAt(0))).equals("Spam:"))
+ {
+ spam = (String)(splittedLine.elementAt(1));
+ }
+ else if(((String)(splittedLine.elementAt(0))).equals("Header:")) // message id
+ {
+ header = (String)splittedLine.elementAt(1);
+ }
+ else if(((String)(splittedLine.elementAt(0))).equals("To:")) // receiver
+ {
+ to = (String)splittedLine.elementAt(1);
+ }
+ else if(((String)(splittedLine.elementAt(0))).equals("From:")) // sender
+ {
+ from = (String)splittedLine.elementAt(1);
+ }
+ else if(((String)(splittedLine.elementAt(0))).equals("Cc:")) // cc
+ {
+ cc = (String)splittedLine.elementAt(1);
+ }
+ else if(((String)(splittedLine.elementAt(0))).equals("Subject:")) // Subject
+ {
+ subject = (String)splittedLine.elementAt(1);
+ break;
+ }
+ } // parsed messageID, To, from, cc, Title
+
+ /**
+ * error checking
+ **/
+ if(!chk)
+ System.out.println("no line read");
+
+
+ body = new String();
+ byte[] readBody = new byte[256];
+
+ while((fileinput.read(readBody)>0))
+ {
+ body += new String(readBody);
+ readBody = new byte[256];
+ }
+
+ fileinput.close();
+
+ MAX_TOKEN_SIZE = 1024;
+ }
+
+ // -------------------------------------------------------
+
+ public void setHeader(String header) {
+ this.header = header;
+ }
+
+ public String getHeader() {
+ return header;
+ }
+
+
+ /*
+ public void setSentOn(String sentOn) {
+ this.sentOn = sentOn;
+ }
+
+ public String getSentOn() {
+ return sentOn;
+ }
+
+ public Date getSentOnAsDate() {
+ String sentOn = getSentOn();
+ return parseDate(sentOn);
+ }
+
+ public void setReceivedOn(String receivedOn) {
+ this.receivedOn = receivedOn;
+ }
+
+ public String getReceivedOn() {
+ return receivedOn;
+ }
+
+ public Date getReceivedOnAsDate() {
+ String receivedOn = getReceivedOn();
+ return parseDate(receivedOn);
+ }
+ */
+
+
+ /**
+ * Parses a given Date-String in into a real Date-Object
+ *
+ * @param stringDate the string in format dd.mm.yyyy hh:mm
+ * @return a Date containing the info of the string or the actual date and time if something fails.
+ */
+ /*
+ public Date parseDate(String stringDate) {
+ // date is in this format: dd.mm.yyyy hh:mm
+ if (stringDate == null || "N/A".equals(stringDate)) {
+ return new Date();
+ }
+ try {
+ synchronized (MAIL_TIME_FORMAT) {
+ return MAIL_TIME_FORMAT.parse(stringDate);
+ }
+ } catch (Throwable e) {
+ return new Date();
+ }
+ }
+ */
+
+ public void setFrom(String from) {
+ this.from = from;
+ }
+
+ public String getFrom() {
+ return from;
+ }
+
+ public void setTo(String to) {
+ this.to = to;
+ }
+
+ public String getTo() {
+ return to;
+ }
+
+ public void setCc(String cc) {
+ this.cc = cc;
+ }
+
+ public String getCc() {
+ return cc;
+ }
+
+ public void setSubject(String subject) {
+ this.subject = subject;
+ }
+
+ public String getSubject() {
+ return subject;
+ }
+
+ public void setBody(String body) {
+ this.body = body;
+ }
+
+ public String getBody() {
+ return body;
+ }
+
+ public void setSourceCode(String sourceCode) {
+ this.sourceCode = sourceCode;
+ }
+
+ public String getSourceCode() {
+ return sourceCode;
+ }
+
+ public void setHasAttachement(boolean hasAttachement) {
+ this.hasAttachement = hasAttachement;
+ }
+
+ public boolean getHasAttachement() {
+ return hasAttachement;
+ }
+
+ public void setEncoding(String encoding) {
+ this.encoding = encoding;
+ }
+
+ public String getEncoding() {
+ return encoding;
+ }
+
+ public boolean isTextEncoding() {
+ return getEncoding().toLowerCase().indexOf("plain") >= 0;
+ }
+
+ public boolean isHTMLEncoding() {
+ return getEncoding().toLowerCase().indexOf("html") >= 0;
+ }
+
+ /*
+ public String toString() {
+ return getBody() + "," + getCc() + "," + getEncoding() + "," + getFrom() + "," + getHasAttachement() + "," + getHeader() + "," + getReceivedOn() + "," + getSentOn() + "," + getSourceCode() + "," + getSubject() + "," + getTo();
+ }
+ */
+
+ public String toString() {
+ return getBody() + "," + getCc() + "," + getEncoding() + "," + getFrom() + "," + getHasAttachement() + "," + getHeader() + "," + getSourceCode() + "," + getSubject() + "," + getTo();
+ }
+
+ /*
+ public String getID() {
+ if (messageID == null) { // no cached version
+ // Take the message-ID header as ID (if present)
+ String[] messageIDs = getHeaderField("Message-ID");
+ if ((messageIDs != null) && (messageIDs.length > 0)) {
+ messageID = messageIDs[0];
+ } else { // otherwise, hash header and body as ID
+ return String.valueOf(getHeader().hashCode() + getBody().hashCode());
+ }
+ }
+
+ return messageID;
+ }
+ */
+
+ public String[] getHeaderField(String fieldName) {
+
+ }
+
+ public String extractEMailAddress() {
+
+ }
+
+ /*
+ public boolean equals(Object o) {
+ if (o instanceof Mail) {
+ Mail mail = (Mail)o;
+ return this.getID().equals(mail.getID());
+ }
+
+ return false;
+ }
+ */
+
+ public Vector getCommonPart()
+ {
+ Vector returnStrings = new Vector();
+
+ // add header, sender, and title
+ returnStrings.addElement(header);
+ returnStrings.addElement(from);
+ returnStrings.addElement(subject);
+
+ return returnStrings;
+ }
+
+ public String getBodyString()
+ {
+ return body;
+ }
+
+ public Vector returnEmail() {
+ Vector myemail = new Vector();
+ myemail.addElement(getCommonPart());
+ //System.out.println("DEBUG: getCommonPart.size= " + getCommonPart().size());
+ myemail.addElement(getURLs());
+ //System.out.println("DEBUG: getURLs.size= " + getURLs().size());
+ myemail.addElement(getSplittedBody(MAX_TOKEN_SIZE));
+ //System.out.println("DEBUG: getSplittedBody.size= " + getSplittedBody(MAX_TOKEN_SIZE).size());
+ return myemail;
+ }
+
+ public Vector getURLs()
+ {
+ Vector returnStrings = new Vector();
+ Vector splittedBody = body.split();
+
+ // add URL and email in the body
+ for(int i=0; i=end) {
+ String str=noURLBody.substring(i, end);
+ returnStrings.addElement(str);
+ }
+ else {
+ String str=noURLBody.substring(i, i+size);
+ returnStrings.addElement(str);
+ }
+ }
+ return returnStrings;
+ }
+
+
+ public void setIsSpam(boolean spam) {
+ isSpam = spam;
+ }
+
+ public boolean getIsSpam() {
+ if(spam.equals("yes"))
+ return true;
+ return false;
+ }
+
+ /**
+ * Returns result to the Spam filter
+ **/
+ public Vector checkMail(int userid) {
+ //Preprocess emails
+
+ //long startGetParts=System.currentTimeMillis();
+ Vector partsOfMailStrings = returnEmail();
+ //long stopGetParts=System.currentTimeMillis();
+ //System.out.println("Time to read email= " + (stopGetParts-startGetParts));
+
+ //Compute signatures
+ SignatureComputer sigComp = new SignatureComputer();
+ //Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings
+ //long startGetsignatures=System.currentTimeMillis();
+ Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of vector of strings
+ //long stopGetsignatures=System.currentTimeMillis();
+ //System.out.println("Time to Getsignatures= " + (stopGetsignatures-startGetsignatures));
+
+ return signatures;
+ }
+
+ /* For tests only */
+ /*
+ public static void main(String[] args)
+ {
+ Mail mail = new Mail("./emails/email1");
+
+ String[] a = mail.createMailStrings();
+
+ for(String b : a)
+ {
+ System.out.println(b);
+ }
+ }
+ */
+}
diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/SignatureComputer.java b/Robust/src/Benchmarks/Recovery/SpamFilter/java/SignatureComputer.java
new file mode 100644
index 00000000..4f98a2d0
--- /dev/null
+++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/SignatureComputer.java
@@ -0,0 +1,289 @@
+public class SignatureComputer {
+ public EphemeralSignature sig4; //signature engines
+ public WhiplashSignature sig8; //signature engines
+
+ int[] enginesToUseForCheck;
+
+ public SignatureComputer() {
+ sig4 = new EphemeralSignature(); //default values
+ sig8 = new WhiplashSignature();
+ createEnginesToCheck();
+ }
+
+ /**
+ * constructor to be used when some parsing has already taken place with the
+ * server-provides value randomNumberSeed
.
+ *
+ * @param randomNumberSeed
+ * a non-negative number used for seeding the random number generator
+ * before starting to hash values.
+ * @param separator
+ * how the mail-text should be splitted into lines. (== what chars
+ * separate 2 lines)
+ */
+ public SignatureComputer(int randomNumberSeed, String separator) {
+ sig4 = new EphemeralSignature(randomNumberSeed,separator);
+ sig8 = new WhiplashSignature();
+ createEnginesToCheck();
+ }
+
+ /**
+ * the constructor to be used most of the time. you can hand over the
+ * seed-string exactly as it is provided by the razor-server.
+ *
+ * @param seedAndSeparator
+ * a string containing the seed value for the RNG and a separator list
+ * (separated by ' - '). default value is
+ * "7542-10"
which means server-seed 7542 and only one
+ * separator 10 (which is ascii '\n').
+ */
+ public SignatureComputer(String seedAndSeparator) {
+ sig4 = new EphemeralSignature(seedAndSeparator);
+ sig8 = new WhiplashSignature();
+ createEnginesToCheck();
+ }
+
+ /**
+ *
+ */
+ public void createEnginesToCheck() {
+ enginesToUseForCheck = new int[2];
+ enginesToUseForCheck[0] = 4; //Ephemeral engine
+ enginesToUseForCheck[1] = 8;//Whiplash engine
+ }
+
+ public boolean isSigSupported(int sig) {
+ boolean found = false;
+ for (int i = 0; i < enginesToUseForCheck.length && !found; i++) {
+ if (enginesToUseForCheck[i] == sig) {
+ found = true;
+ }
+ }
+ return found;
+ }
+
+ public boolean isSigSupported(String sig) {
+ return (sig != null && isSigSupported(Integer.parseInt(sig)));
+ }
+
+ public String getDefaultEngine() {
+ return "4";
+ }
+
+ public Vector computeSigs(Vector EmailParts) {
+ if (EmailParts == null) return null;
+
+ Vector printableSigs = new Vector(); // vector of strings
+
+ /**
+ * Step -I
+ * Get signatures for the common parts
+ **/
+
+ Vector commonpart = (Vector) (EmailParts.elementAt(0));
+ for (int mailIndex = 0; mailIndex < commonpart.size(); mailIndex++) {
+ String mail = (String) (commonpart.elementAt(mailIndex));
+
+ if (mail == null) continue;
+
+ /*
+ * Compute Sig for email header that are cleaned.
+ */
+ for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
+ int engineNo = enginesToUseForCheck[engineIndex];
+ String sig = null;
+
+ /* EphemeralSignature calculator */
+ if(engineNo==4) {
+ sig = computeSignature(engineNo,mail);
+ }
+
+ if(engineNo==8) {
+ continue;
+ }
+
+ if((engineNo!=4)) {
+ System.out.println("Err: Common part Couldn't find the signature engine: " + engineNo);
+ }
+
+ if (sig != null) {
+ String hash = engineNo + ":" + sig;
+ printableSigs.addElement(hash);
+
+ //System.out.println("DEBUG: mail= " +mail + " hash= " + hash);
+
+ } else {
+ // we didn't produce a signature for the mail.
+ }
+ }//engine
+ }//common part
+
+ /**
+ * Step -II
+ * Get signatures for the body parts without URLs
+ **/
+ Vector getBodywithNoURLs = (Vector)(EmailParts.elementAt(2));
+ for (int mailIndex = 0; mailIndex < getBodywithNoURLs.size(); mailIndex++) {
+ String mail = (String) (getBodywithNoURLs.elementAt(mailIndex));
+
+
+ if (mail == null) continue;
+
+ /*
+ * Compute Sig for email header that are cleaned.
+ */
+ for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
+ int engineNo = enginesToUseForCheck[engineIndex];
+ String sig = null;
+
+ /* EphemeralSignature calculator */
+ if(engineNo==4) {
+ sig = computeSignature(engineNo,mail);
+ }
+
+ if(engineNo==8)
+ continue;
+
+ if(engineNo!=4) {
+ System.out.println("Err: body parts without URL Couldn't find the signature engine: " + engineNo);
+ }
+
+ if (sig != null) {
+ String hash = engineNo + ":" + sig;
+ printableSigs.addElement(hash);
+ } else {
+ // we didn't produce a signature for the mail.
+ }
+ }//engine
+ }
+
+ /**
+ * Step -III
+ * Get signatures for the body parts with URLs
+ **/
+ Vector getURLs = (Vector)(EmailParts.elementAt(1));
+ for (int mailIndex = 0; mailIndex < getURLs.size(); mailIndex++) {
+ String mail = (String) (getURLs.elementAt(mailIndex));
+
+ /*
+ * Compute Sig for bodyparts that are cleaned.
+ */
+ for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
+ int engineNo = enginesToUseForCheck[engineIndex];
+ if(engineNo==4)
+ continue;
+
+ /* WhiplashSignature calculator */
+ String[] hosts = null;
+ String sig = null;
+ if(engineNo==8) {
+ //hosts = computeSignature(engineNo,mail);
+ hosts = sig8.computeSignature(mail);
+ if(hosts != null) {
+ for(int i=0; i -n -e \n");
+ System.out.println( " -n : num iterations");
+ System.out.println( " -e : number of emails");
+ }
+
+ /**
+ * Returns result to the Spam filter
+ **/
+ /*
+ public boolean checkMail(Mail mail, int userid) {
+ //Preprocess emails
+ //Vector partsOfMailStrings = mail.createMailStringsWithURL();
+ /*
+ Vector partsOfMailStrings = mail.getCommonPart();
+ partsOfMailStrings.addElement(mail.getBodyString());
+
+ //Compute signatures
+ SignatureComputer sigComp = new SignatureComputer();
+ Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings
+
+ //check with data structure
+ int[] confidenceVals = check(signatures,userid);
+
+ //---- create and return results --------
+ FilterResult filterResult = new FilterResult();
+ boolean spam = filterResult.getResult(confidenceVals);
+
+ return spam;
+ }
+ */
+
+ public int[] check(Vector signatures, int userid) {
+ int numparts = signatures.size();
+
+ //System.out.println("check() numparts= " + numparts);
+
+ int[] confidenceVals = new int[numparts];
+ for(int i=0; i the mail client is able to determine if it is spam or not
+ // --- According to the "any"-logic (in Core#check_logic) in original Razor ---
+ // If any answer is spam, the entire email is spam.
+ return confidenceVals;
+ }
+
+ /**
+ * This method sends feedback from the user to a distributed
+ * spam database and trains the spam database to check future
+ * emails and detect spam
+ **/
+ public void sendFeedBack(Vector signatures, boolean isSpam, int id, Random myrand) {
+
+ for(int i=0;ihex.
+ */
+ public String hexToBase64(String hex){
+ if(hex == null)
+ return null;
+ int[] b64s = new int[hex.length()*2/3 + ((hex.length()*2)%3)];
+ int i=0;
+ int b64count = 0;
+
+ while(i < hex.length()){
+ //process 3 hex char chunks at a time
+ int upperBorder = Math.imin(i+3,hex.length());
+ String hex3 = hex.substring(i,upperBorder);
+ i+=3;
+
+ int bv = convertHexToRazorEncoding(hex3);
+ //now the right endian encoding
+ b64s[b64count++] = ((0xfc0 & bv)>>>6); //higher 6 bits
+ b64s[b64count++] = (0x3f & bv) ; //lower 6 bits
+
+ }
+ String bs = "";
+ for (int j= 0; j < b64s.length; j++) {
+ bs += b64table[ b64s[j] ];
+ }
+ return bs;
+ }
+
+ /**
+ * razor does some special conversion using perl's pack()
which
+ * we must do manually in java.
+ */
+ private int convertHexToRazorEncoding(String hex3) {
+ if((hex3 == null))
+ return 0; //error
+ int res = 0;
+ int cur = Integer.parseInt(hex3.substring(0,1),16);
+ cur = mirror4LSBits(cur);
+ res |= ( (cur&0xf) << 8);
+ if(hex3.length() >=2) {
+ cur = Integer.parseInt(hex3.substring(1,2),16);
+ } else {
+ cur = 0;
+ }
+ //cur = ( hex3.length() >=2 ? Integer.parseInt(hex3.substring(1,2),16) : 0);
+ cur = mirror4LSBits(cur);
+ res |= ((cur & 0xf) << 4);
+ if(hex3.length() >= 3) {
+ cur = Integer.parseInt(hex3.substring(2,3),16);
+ } else {
+ cur = 0;
+ }
+ //cur = ( hex3.length() >= 3 ? Integer.parseInt(hex3.substring(2,3),16): 0);
+ cur = mirror4LSBits(cur);
+ res |= (cur & 0xf);
+
+ return res;
+ }
+
+ /**
+ * mirrors the 4 least significant bytes of an integer
+ * @param cur an int containing 4 Least Singificant bytes like 00000...00abcd
+ * @return the mirrored 4 least significant bytes 00000...00dcba
. all bits except a-b
are lost.
+ */
+ public int mirror4LSBits(int cur) {
+ int res = 0;
+ res |= (cur & 0x8)>>>3;
+ res |= (cur & 0x4)>>>1;
+ res |= (cur & 0x2)<<1;
+ res |= (cur & 0x1)<<3;
+ return res;
+ }
+
+ public String[] whiplash(String text) {
+
+ if (text == null) {
+ return null;
+ }
+ String[] hosts = extractHosts(text);
+ if (hosts == null || hosts.length < 1) {
+ return null;
+ }
+ String[] sigs = new String[hosts.length];
+
+ for (int i = 0; i < hosts.length; i++) {
+ MD5 md = new MD5();
+ String host = hosts[i];
+ int len = host.length();
+ byte buf[] = host.getBytes();
+ byte sig[] = new byte[16];
+ md.update(buf, len);
+ md.md5final(sig);
+ String signature = new String(sig);
+
+ // System.out.println("DEBUG: host= " + host + " whiplash sig= " + signature);
+
+ sigs[i] = signature;
+ }
+ return sigs;
+ }
+
+ public String[] extractHosts(String text) {
+ //System.out.println("Inside extractHosts");
+ Vector hosts = new Vector();
+ String buf = new String(text);
+
+ //System.out.println("DEBUG: extractHosts() string= " + buf);
+
+ /* Extract hosts from http:// links */
+ int idx;
+ String strwww = new String("www.");
+ while ((idx = buf.indexOf(strwww)) != -1) {
+ int startidx = idx + strwww.length();
+ String strcom = new String(".");
+ buf = buf.subString(startidx);
+ int endidx = buf.indexOf(strcom);
+ String host = buf.subString(0, endidx);
+ //System.out.println("DEBUG: http links extracted host= " + host);
+
+ buf = buf.subString(endidx);
+ endidx = buf.indexOf(strcom);
+ host += buf.subString(0, endidx);
+
+ hosts.addElement(host);
+ buf = buf.subString(endidx+strcom.length());
+ }
+
+ /* Extract hosts from email addressess */
+ buf = new String(text);
+ String strrate = new String("@");
+ while ((idx = buf.indexOf(strrate)) != -1) {
+ int startidx = idx + strrate.length();
+ String strdot = new String(".");
+ buf = buf.subString(startidx);
+ int endidx = buf.indexOf(strdot);
+ String host = buf.subString(0, endidx);
+ //System.out.println("DEBUG: email addr extracted host= " + host);
+
+ buf = buf.subString(endidx);
+ endidx = buf.indexOf(strdot);
+ host += buf.subString(0, endidx);
+
+ hosts.addElement(host);
+ buf = buf.subString(endidx+strdot.length());
+ }
+
+ if (hosts.size() == 0) {
+ return null;
+ }
+
+ String[] retbuf = new String[hosts.size()];
+ for (int i = 0; i < hosts.size(); i++) {
+ retbuf[i] = (String) (hosts.elementAt(i));
+ }
+
+ return retbuf;
+ }
+
+// Testing the signature computation
+// public static void main(String[] args) {
+// /* String testVector = " Test Vectors: \n"+
+// "\n" +
+// "1. http:www.nodg.com@www.geocities.com/nxcisdsfdfdsy/off\n"+
+// "2. http:www.ksleybiuh.com@213.171.60.74/getoff/\n"+
+// "3. \n"+
+// "4. http:217.12.4.7/rmi/http:definethis.net/526/index.html\n"+
+// "5. http:magalygr8sex.free-host.com/h.html\n"+
+// "6. http:%3CVenkatrs%3E@218.80.74.102/thecard/4index.htm\n"+
+// "7. http:EBCDVKIGURGGCEOKXHINOCANVQOIDOXJWTWGPC@218.80.74.102/thecard/5in\n"+
+// "8. http:g.india2.bag.gs/remove_page.htm\n"+
+// "9. https:220.97.40.149\n"+
+// "10. http:mjaked.biz/unsubscribe.ddd?leaving\n"+
+// "11. http:g5j99m8@it.rd.yahoo.com/bassi/*http:www.lekobas.com/c/index.php\n"+
+// "12. look great / feel great\n"+
+// "13. \n"+
+// "14. www.pillzthatwork.com # anything that starts with www. \n";
+// */
+// String testVector = "\n"+
+// "\n"+
+// "Our first autolink: www.autolink1.com or another link like www.autolink2.co.uk or how about https:plaintextlink1.co.uk or http:plaintextlink2.com
\n"+
+// "now a masked link http://www.coveringlink1.com and another link http:plaintextlink3.net and how about https:plaintextlink4.to
\n"+
+// "another masked link https:coveringlink2.com and another link https:plaintextlink5.com
\n"+
+// "\n"+
+// "\n";
+// String test1 = "Our first autolink: www.autolink1.com or another link like www.autolink2.co.uk or how about https:plaintextlink1.co.uk or http:plaintextlink2.com
\n";
+// WhiplashSignature whiplash = new WhiplashSignature();
+// String[] hosts = whiplash.computeSignature(testVector);
+// //String[] hosts = whiplash.computeSignature(test1);
+// for (int i = 0; i < hosts.length; i++) {
+// String string = hosts[i];
+// System.out.println("host " + i + ":\t" + string);
+// }
+// }
+
+}
diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/java/makefile b/Robust/src/Benchmarks/Recovery/SpamFilter/java/makefile
new file mode 100644
index 00000000..e016d93c
--- /dev/null
+++ b/Robust/src/Benchmarks/Recovery/SpamFilter/java/makefile
@@ -0,0 +1,21 @@
+MAINCLASS=SpamFilter
+SRC=${MAINCLASS}.java \
+ DistributedHashMap.java \
+ Mail.java \
+ FilterResult.java \
+ HashEntry.java \
+ HashStat.java \
+ SignatureComputer.java \
+ FilterStatistic.java \
+ EphemeralSignature.java \
+ GString.java \
+ WhiplashSignature.java
+
+FLAGS=-optimize -thread -mainclass ${MAINCLASS}
+
+default:
+ ../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC}
+
+clean:
+ rm -rf tmpbuilddirectory*
+ rm *.bin
diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/GString.java b/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/GString.java
index 47e3fcf3..9665f58c 100644
--- a/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/GString.java
+++ b/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/GString.java
@@ -46,24 +46,6 @@ public class GString {
this.offset=0;
}
- public static char[] toLocalCharArray(GString str) {
- char[] c;
- int length;
-
- length = str.length();
-
- c = new char[length];
-
- for (int i = 0; i < length; i++) {
- c[i] = str.value[i+str.offset];
- }
- return c;
- }
-
- public String toLocalString() {
- return new String(toLocalCharArray(this));
- }
-
public int length() {
return count;
}
diff --git a/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/SpamFilter.java b/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/SpamFilter.java
index 2bed4b34..37e97cd3 100644
--- a/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/SpamFilter.java
+++ b/Robust/src/Benchmarks/Recovery/SpamFilter/recovery/SpamFilter.java
@@ -53,10 +53,10 @@ public class SpamFilter extends Thread {
correct =0;
wrong = 0;
for(int j=0; j