-128.195.180.21
-#128.195.180.24
-128.195.180.26
MAINCLASS=FileSystem
SRC1=${MAINCLASS}.java
-FLAGS= -recoverystats -dsm -recovery -nooptimize -mainclass ${MAINCLASS}
+FLAGS= -recovery -recoverystats -dsm -dsmcaching -optimize -mainclass ${MAINCLASS}
default:
../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC1}
#define ROW 400 /* columns in the map */
#define COLUMN 100 /* rows of in the map */
-#define ROUNDS 1000 /* Number of moves by each player */
+#define ROUNDS 2000 /* Number of moves by each player */
#define PLAYERS 20 /* Number of Players when num Players != num of client machines */
#define RATI0 0.5 /* Number of lumberjacks to number of planters */
#define BLOCK 3 /* Area around the gamer to consider */
RecoveryStat.printRecoveryStat();
while(true) {
- sleep(1000000);
+ sleep(300000000);
}
}
AStarPathFinder.java \
../../../../ClassLibrary/JavaDSM/Thread.java
-FLAGS1=-dsm -recoverystats -recovery -optimize -mainclass ${MAINCLASS}
-DSMFLAGS=-dsm -optimize -mainclass ${MAINCLASS}
+FLAGS1=-dsm -dsmcaching -recoverystats -recovery -optimize -mainclass ${MAINCLASS}
+DSMFLAGS=-dsm -dsmcaching -optimize -mainclass ${MAINCLASS}
default:
cpp ${MAINCLASS}.java > tmp1${MAINCLASS}.java
--- /dev/null
+public class BufferedReader {
+ FileInputStream fr;
+ byte[] buffer;
+ int offset;
+ int end;
+
+ public BufferedReader(FileInputStream fr) {
+ this.fr=fr;
+ this.buffer=new byte[2048];
+ }
+
+ public int read() {
+ if (offset<end) {
+ return buffer[offset++];
+ } else {
+ readBuffer();
+ if (end<=0)
+ return -1;
+ return buffer[offset++];
+ }
+ }
+
+ public int read(byte[] array) {
+ int off=0;
+ int arraylen=array.length;
+ do {
+ for(;offset<end;offset++) {
+ if (off>=arraylen)
+ return off;
+ array[off++]=buffer[offset];
+ }
+ readBuffer();
+ if (end==0)
+ return off;
+ if (end<0)
+ return end;
+ } while(true);
+ }
+
+ public void readBuffer() {
+ offset=0;
+ end=fr.read(buffer);
+ }
+
+ public String readLine() {
+ String str=null;
+ do {
+ boolean foundcr=false;
+ int index=offset;
+ for(;index<end;index++) {
+ if (buffer[index]=='\n'||buffer[index]==13) {
+ foundcr=true;
+ break;
+ }
+ }
+ String buf=new String(buffer, offset, index-offset);
+ if (str==null)
+ str=buf;
+ else
+ str=str.concat(buf);
+ if (foundcr) {
+ offset=index++;
+ do {
+ for(;offset<end;offset++) {
+ if (buffer[offset]!='\n'&&buffer[offset]!=13) {
+ return str;
+ }
+ }
+ readBuffer();
+ if (end<=0)
+ return str;
+ } while(true);
+ } else {
+ readBuffer();
+ if (end<=0)
+ return null;
+ }
+ } while(true);
+
+ }
+
+ public void close() {
+ fr.close();
+ }
+
+}
\ No newline at end of file
--- /dev/null
+public class DistributedHashMap {
+ DistributedHashEntry[] table;
+ float loadFactor;
+
+ public DistributedHashMap(int initialCapacity, float loadFactor) {
+ init(initialCapacity, loadFactor);
+ }
+
+ private void init(int initialCapacity, float loadFactor) {
+ table=global new DistributedHashEntry[initialCapacity];
+ this.loadFactor=loadFactor;
+ }
+
+ private static int hash1(int hashcode, int length) {
+ int value=hashcode%length;
+ if (value<0)
+ return -value;
+ else
+ return value;
+ }
+
+ Object remove(Object key) {
+ int hashcode=key.hashCode();
+ int index1=hash1(hashcode, table.length);
+ DistributedHashEntry dhe=table[index1];
+ if (dhe==null)
+ return null;
+ DHashEntry ptr=dhe.array;
+
+ if (ptr!=null) {
+ if (ptr.hashval==hashcode&&ptr.key.equals(key)) {
+ dhe.array=ptr.next;
+ dhe.count--;
+ return ptr.value;
+ }
+ while(ptr.next!=null) {
+ if (ptr.hashval==hashcode&&ptr.next.key.equals(key)) {
+ Object oldvalue=ptr.value;
+ ptr.next=ptr.next.next;
+ dhe.count--;
+ return oldvalue;
+ }
+ ptr=ptr.next;
+ }
+ }
+ return null;
+ }
+
+ Object get(Object key) {
+ int hashcode=key.hashCode();
+ int index1=hash1(hashcode, table.length);
+
+ DistributedHashEntry dhe=table[index1];
+ if (dhe==null)
+ return null;
+
+ DHashEntry ptr=dhe.array;
+
+ while(ptr!=null) {
+ if (ptr.hashval==hashcode
+ &&ptr.key.equals(key)) {
+ return ptr.value;
+ }
+ ptr=ptr.next;
+ }
+ return null;
+ }
+
+
+ Object getKey(Object key) {
+ int hashcode=key.hashCode();
+ int index1=hash1(hashcode, table.length);
+
+ DistributedHashEntry dhe=table[index1];
+ if (dhe==null)
+ return null;
+
+ DHashEntry ptr=dhe.array;
+
+ while(ptr!=null) {
+ if (ptr.hashval==hashcode
+ &&ptr.key.equals(key)) {
+ return ptr.key;
+ }
+ ptr=ptr.next;
+ }
+ return null;
+ }
+
+ boolean containsKey(Object key) {
+ int hashcode=key.hashCode();
+ int index1=hash1(hashcode, table.length);
+ DistributedHashEntry dhe=table[index1];
+ if (dhe==null)
+ return false;
+
+ DHashEntry ptr=dhe.array;
+
+ while(ptr!=null) {
+ if (ptr.hashval==hashcode
+ &&ptr.key.equals(key)) {
+ return true;
+ }
+ ptr=ptr.next;
+ }
+ return false;
+ }
+
+ Object put(Object key, Object value) {
+ int hashcode=key.hashCode();
+ int index1=hash1(hashcode, table.length);
+ DistributedHashEntry dhe=table[index1];
+ if (dhe==null) {
+ dhe=global new DistributedHashEntry();
+ table[index1]=dhe;
+ }
+ DHashEntry ptr=dhe.array;
+
+ while(ptr!=null) {
+ if (ptr.hashval==hashcode&&ptr.key.equals(key)) {
+ Object oldvalue=ptr.value;
+ ptr.value=value;
+ return oldvalue;
+ }
+ ptr=ptr.next;
+ }
+
+ DHashEntry he=global new DHashEntry();
+ he.value=value;
+ he.key=key;
+ he.hashval=hashcode;
+ he.next=dhe.array;
+ dhe.array=he;
+
+ dhe.count++;
+
+ return null;
+ }
+}
+
+class DistributedHashEntry {
+ int count;
+ public DistributedHashEntry() {
+ count=0;
+ }
+ DHashEntry array;
+}
+
+
+class DHashEntry {
+ public DHashEntry() {
+ }
+ int hashval;
+ Object key;
+ Object value;
+ DHashEntry next;
+}
--- /dev/null
+public class EphemeralSignature {
+
+ int serverSeed;
+ String serverSeparator;
+ Random rand;
+
+ public EphemeralSignature() {
+ Random rand = new Random(0);
+ }
+
+ public EphemeralSignature(int randomNumberSeed, String separator) {
+ Random rand = new Random(randomNumberSeed);
+ serverSeparator = separator;
+ }
+
+ public EphemeralSignature(String seedAndSeparator) {
+ serverSeparator = seedAndSeparator;
+ }
+
+ public String computeSignature(String body) {
+ MD5 md = new MD5();
+ int len = body.length();
+ byte buf[] = body.getBytes();
+ byte sig[] = new byte[16];
+
+ md.update(buf, len);
+ md.md5final(sig);
+ String signature = new String(sig);
+
+ return signature;
+ }
+
+ /*
+ public long DEKHash(String str)
+ {
+ long hash = str.length();
+
+ for(int i = 0; i < str.length(); i++)
+ {
+ hash = ((hash << 5) ^ (hash >> 27)) ^ str.charAt(i);
+ }
+
+ return hash;
+ }
+ */
+
+}
--- /dev/null
+/**
+ * A FilterResult encapsulates the result of a filter made by checking a mail.
+ **/
+public class FilterResult {
+ /**
+ * This value is used if type is ERROR or UNKNOWN.
+ */
+ public double NO_RESULT;
+
+ /**
+ * A result value greater or equal this value indicates that the filter has
+ * decided on spam.
+ */
+ public int SPAM_THRESHOLD;
+ public int ABSOLUTE_SPAM;
+ public int ABSOLUTE_HAM;
+
+ //public double result; // the result, a value between -1 (ham) and 1000 (spam),
+ // negative values for "error", "unknown" etc.
+
+ // -----------------------------------------------------------------------------
+
+ public FilterResult(double result) {
+ SPAM_THRESHOLD=50;
+ ABSOLUTE_SPAM=100;
+ ABSOLUTE_HAM=0;
+ NO_RESULT=-1;
+ //this.result = result;
+ }
+
+ public FilterResult() {
+ SPAM_THRESHOLD=50;
+ ABSOLUTE_SPAM=100;
+ ABSOLUTE_HAM=0;
+ NO_RESULT=-1;
+ }
+
+ public boolean getResult(int[] confidenceVals) {
+ int[] res = new int[3]; //3 equals spam, ham and unknown
+ for(int i=0; i<confidenceVals.length; i++) {
+ if(confidenceVals[i] < 0)
+ res[0]+=1; //unknown
+ if(confidenceVals[i] >= 0 && confidenceVals[i] < SPAM_THRESHOLD)
+ res[1]+=1; //ham
+ if(confidenceVals[i] >= SPAM_THRESHOLD)
+ res[2]+=1;//spam
+ }
+ int maxVotes=0;
+ int max;
+ for(int i=0; i<3;i++) {
+ if(res[i] > maxVotes) {
+ maxVotes = res[i];
+ max = i;
+ }
+ }
+ if(max==0)
+ return false;
+ if(max==1)
+ return false;
+ if(max==2)
+ return true;
+
+ System.out.println("Err: getResult() Control shouldn't come here, max= " + max);
+ return false;
+ }
+
+ /*
+ public void addProperty(String key, String value) {
+ properties.put(key,value);
+ }
+
+ public String getProperty(String key) {
+ return properties.get(key);
+ }
+
+ public HashMap<String,String> getProperties() {
+ return properties;
+ }
+ */
+}
--- /dev/null
+public class FilterStatistic {
+ int unknown;
+ int spam;
+ int ham;
+
+ // -------------------------------------------------------
+
+ public FilterStatistic() {
+ this.spam = 0;
+ this.ham = 0;
+ this.unknown = 0;
+ }
+
+ public FilterStatistic(int spam, int ham, int unknown) {
+ this.spam = spam;
+ this.ham = ham;
+ this.unknown = unknown;
+ }
+
+ public int getChecked() {
+ return getSpam() + getHam() + getUnknown();
+ }
+
+ public int getHam() {
+ return ham;
+ }
+
+ public int getSpam() {
+ return spam;
+ }
+
+ public void setHam(int i) {
+ ham = i;
+ }
+
+ public void setSpam(int i) {
+ spam = i;
+ }
+
+ public int getUnknown() {
+ return unknown;
+ }
+
+ public void setUnknown(int u) {
+ unknown = u;
+ }
+
+ public void increaseSpam() {
+ setSpam(getSpam() + 1);
+ }
+
+ public void increaseHam() {
+ setHam(getHam() + 1);
+ }
+
+ public void increaseUnknown() {
+ setUnknown(getUnknown() + 1);
+ }
+
+ public String toString() {
+ String str = "Filterstats_spam_"+spam;
+ str += "_ham_" +ham;
+ str += "_unknown_"+unknown;
+ return str;
+ }
+}
--- /dev/null
+public class GString {
+ public char value[];
+ public int count;
+ public int offset;
+
+ public GString() {
+ }
+
+ public GString(char c) {
+ char[] str = new char[1];
+ str[0] = c;
+ GString(str);
+ }
+
+ public GString(String str) {
+ value = new char[str.count];
+ for(int i =0; i< str.count;i++) {
+ value[i] = str.value[i+str.offset];
+ }
+ count = str.count;
+ offset = 0;
+ }
+
+ public GString(GString gstr) {
+ this.value = gstr.value;
+ this.count = gstr.count;
+ this.offset = gstr.offset;
+ }
+
+ /*
+ public GString(StringBuffer gsb) {
+ value = new char[gsb.length()];
+ count = gsb.length();
+ offset = 0;
+ for (int i = 0; i < count; i++)
+ value[i] = gsb.value[i];
+ }
+ */
+
+ public GString(char str[]) {
+ char charstr[]=new char[str.length];
+ for(int i=0; i<str.length; i++)
+ charstr[i]=str[i];
+ this.value=charstr;
+ this.count=str.length;
+ this.offset=0;
+ }
+
+ public int length() {
+ return count;
+ }
+
+ public int indexOf(int ch, int fromIndex) {
+ for (int i = fromIndex; i < count; i++)
+ if (this.charAt(i) == ch)
+ return i;
+ return -1;
+ }
+
+ public int lastindexOf(int ch) {
+ return this.lastindexOf(ch, count - 1);
+ }
+
+ public int lastindexOf(int ch, int fromIndex) {
+ for (int i = fromIndex; i > 0; i--)
+ if (this.charAt(i) == ch)
+ return i;
+ return -1;
+ }
+
+ public char charAt(int i) {
+ return value[i+offset];
+ }
+
+ public int indexOf(String str) {
+ return this.indexOf(str, 0);
+ }
+
+ public int indexOf(String str, int fromIndex) {
+ if (fromIndex < 0)
+ fromIndex = 0;
+ for (int i = fromIndex; i <= (count-str.count); i++)
+ if (regionMatches(i, str, 0, str.count))
+ return i;
+ return -1;
+ }
+
+ public boolean regionMatches(int toffset, String other, int ooffset, int len) {
+ if (toffset < 0 || ooffset < 0 || (toffset+len) > count || (ooffset+len) > other.count)
+ return false;
+
+ for (int i = 0; i < len; i++) {
+ if (other.value[i+other.offset+ooffset] != this.value[i+this.offset+toffset])
+ return false;
+ }
+ return true;
+ }
+
+ public String subString(int beginIndex, int endIndex) {
+ return substring(beginIndex, endIndex);
+ }
+
+ public String substring(int beginIndex, int endIndex) {
+ String str;
+ str = new String();
+ str.value = this.value;
+ str.count = endIndex-beginIndex;
+ str.offset = this.offset + beginIndex;
+ return str;
+ }
+
+ public static String valueOf(Object o) {
+ if (o==null)
+ return "null";
+ else
+ return o.toString();
+ }
+
+ public String toLocalString() {
+ return new String(toLocalCharArray(this));
+ }
+
+ public static char[] toLocalCharArray(GString str) {
+ char[] c;
+ int length;
+ length = str.length();
+ c = new char[length];
+ for (int i = 0; i < length; i++) {
+ c[i] = str.value[i+str.offset];
+ }
+ return c;
+ }
+
+ public int hashCode() {
+ String s = this.toLocalString();
+ return s.hashCode();
+ }
+
+ public boolean equals(Object o) {
+ if(o == null)
+ return false;
+ if(!(o instanceof GString))
+ return false;
+ GString gs = (GString)o;
+ String s1 = gs.toLocalString();
+ String s2 = this.toLocalString();
+ if(s2.equals(s1))
+ return true;
+ return false;
+ }
+}
--- /dev/null
+public class HashEntry {
+ public GString engine;
+ public GString signature;
+ public HashStat stats;
+
+ public HashEntry() {
+
+ }
+
+ /**
+ * hashCode that combines two strings using xor.
+ * @return a hash code value on the entire object.
+ */
+ public int hashCode() {
+ int result=0;
+ // this will not work well if some of the strings are equal.
+ result = engine.hashCode();
+ result ^= signature.hashCode();
+ //result ^= stats.hashCode();
+ //System.out.println("HashEntry: hashCode= " + result);
+ return result;
+ }
+
+ public void setengine(GString engine) {
+ this.engine=engine;
+ }
+
+ public void setstats(HashStat stats) {
+ this.stats=stats;
+ }
+
+ public void setsig(GString signature) {
+ this.signature=signature;
+ }
+
+ public GString getEngine() {
+ return engine;
+ }
+
+ public GString getSignature() {
+ return signature;
+ }
+
+ public HashStat getStats() {
+ return stats;
+ }
+
+ public boolean equals(Object o) {
+ HashEntry he = (HashEntry)o;
+ if(!(he.getEngine().equals(engine)))
+ return false;
+ if(!(he.getSignature().equals(signature)))
+ return false;
+ //if(!(he.getStats().equals(stats)))
+ // return false;
+ return true;
+ }
+
+ public int askForSpam() {
+ int[] users = stats.getUsers();
+ int spamConfidence=0;
+ for(int i=0; i<users.length; i++) {
+ int userid = users[i];
+ spamConfidence += stats.userstat[userid].getChecked();
+ }
+ return spamConfidence;
+ }
+}
--- /dev/null
+public class HashStat {
+ int[] userid;
+ FilterStatistic[] userstat;
+ int[] listofusers;
+ public HashStat() {
+ userid = new int[8]; //max users for our system=8
+ userstat = new FilterStatistic[8];
+ for(int i=0; i<8; i++) {
+ userstat[i] = new FilterStatistic();
+ }
+ }
+
+ public void setuser(int id, int spam, int ham, int unknown) {
+ userid[id] = 1;
+ userstat[id].setSpam(spam);
+ userstat[id].setHam(ham);
+ userstat[id].setUnknown(unknown);
+ }
+
+ public void setuserid(int id) {
+ userid[id] = 1;
+ }
+
+ public int getuser(int id) {
+ return userid[id];
+ }
+
+ public int getspamcount(int userid) {
+ return userstat[userid].getSpam();
+ }
+
+ public int gethamcount(int userid) {
+ return userstat[userid].getHam();
+ }
+
+ public int getunknowncount(int userid) {
+ return userstat[userid].getUnknown();
+ }
+
+ public void incSpamCount(int userid) {
+ userstat[userid].increaseSpam();
+ }
+
+ public void incHamCount(int userid) {
+ userstat[userid].increaseHam();
+ }
+
+ public int[] getUsers() {
+ int nusers = numUsers();
+ listofusers = new int[nusers];
+ int j=0;
+ for(int i=0; i<8; i++) {
+ if(userid[i] == 1) {
+ listofusers[j]=i;
+ j++;
+ }
+ }
+ return listofusers;
+ }
+
+ public int numUsers() {
+ int count=0;
+ for(int i=0; i<8; i++) {
+ if(userid[i] == 1) {
+ count++;
+ }
+ }
+ return count;
+ }
+}
--- /dev/null
+
+// This class computes MD5 hashes.
+// Manually translated by Jon Howell <jonh@cs.dartmouth.edu>
+// from some public domain C code (md5.c) included with the ssh-1.2.22 source.
+// Tue Jan 19 15:55:50 EST 1999
+// $Id: MD5.java,v 1.1 2010/03/04 00:17:44 adash Exp $
+//
+// To compute the message digest of a chunk of bytes, create an
+// MD5 object 'md5', call md5.update() as needed on buffers full
+// of bytes, and then call md5.md5final(), which
+// will fill a supplied 16-byte array with the digest.
+//
+// A main() method is included that hashes the data on System.in.
+//
+// It seems to run around 25-30 times slower (JDK1.1.6) than optimized C
+// (gcc -O4, version 2.7.2.3). Measured on a Sun Ultra 5 (SPARC 270MHz).
+//
+// Comments from md5.c from ssh-1.2.22, the basis for this code:
+//
+/* This code has been heavily hacked by Tatu Ylonen <ylo@cs.hut.fi> to
+ make it compile on machines like Cray that don't have a 32 bit integer
+ type. */
+/*
+ * This code implements the MD5 message-digest algorithm.
+ * The algorithm is due to Ron Rivest. This code was
+ * written by Colin Plumb in 1993, no copyright is claimed.
+ * This code is in the public domain; do with it what you wish.
+ *
+ * Equivalent code is available from RSA Data Security, Inc.
+ * This code has been tested against that, and is equivalent,
+ * except that you don't need to include two pages of legalese
+ * with every copy.
+ *
+ * To compute the message digest of a chunk of bytes, declare an
+ * MD5Context structure, pass it to MD5Init, call MD5Update as
+ * needed on buffers full of bytes, and then call MD5Final, which
+ * will fill a supplied 16-byte array with the digest.
+ */
+
+public class MD5 {
+ int buf[]; // These were originally unsigned ints.
+ // This Java code makes an effort to avoid sign traps.
+ // buf[] is where the hash accumulates.
+ long bits; // This is the count of bits hashed so far.
+ byte in[]; // This is a buffer where we stash bytes until we have
+ // enough (64) to perform a transform operation.
+ int inint[];
+ // inint[] used and discarded inside transform(),
+ // but why allocate it over and over?
+ // (In the C version this is allocated on the stack.)
+
+ public MD5() {
+ buf = new int[4];
+ // fill the hash accumulator with a seed value
+ buf[0] = 0x67452301;
+ buf[1] = 0xefcdab89;
+ buf[2] = 0x98badcfe;
+ buf[3] = 0x10325476;
+
+ // initially, we've hashed zero bits
+ bits = 0L;
+
+ in = new byte[64];
+ inint = new int[16];
+ }
+
+ public void update(byte[] newbuf) {
+ update(newbuf, 0, newbuf.length);
+ }
+
+ public void update(byte[] newbuf, int length) {
+ update(newbuf, 0, length);
+ }
+
+ public void update(byte[] newbuf, int bufstart, int buflen) {
+ int t;
+ int len = buflen;
+
+ // shash old bits value for the "Bytes already in" computation
+ // just below.
+ t = (int) bits; // (int) cast should just drop high bits, I hope
+
+ /* update bitcount */
+ /* the C code used two 32-bit ints separately, and carefully
+ * ensured that the carry carried.
+ * Java has a 64-bit long, which is just what the code really wants.
+ */
+ bits += (long)(len<<3);
+
+ t = (t >>> 3) & 0x3f; /* Bytes already in this->in */
+
+ /* Handle any leading odd-sized chunks */
+ /* (that is, any left-over chunk left by last update() */
+
+ if (t!=0) {
+ int p = t;
+ t = 64 - t;
+ if (len < t) {
+ arraycopy(newbuf, bufstart, in, p, len);
+ return;
+ }
+ arraycopy(newbuf, bufstart, in, p, t);
+ transform();
+ bufstart += t;
+ len -= t;
+ }
+
+ /* Process data in 64-byte chunks */
+ while (len >= 64) {
+ arraycopy(newbuf, bufstart, in, 0, 64);
+ transform();
+ bufstart += 64;
+ len -= 64;
+ }
+
+ /* Handle any remaining bytes of data. */
+ /* that is, stash them for the next update(). */
+ arraycopy(newbuf, bufstart, in, 0, len);
+ }
+
+ public void arraycopy(byte[] src, int srcPos, byte[] dest, int destPos, int len) {
+ for (int i = 0; i < len; i++) {
+ dest[destPos+i] = src[srcPos+i];
+ }
+ return;
+ }
+
+ /*
+ * Final wrapup - pad to 64-byte boundary with the bit pattern
+ * 1 0* (64-bit count of bits processed, MSB-first)
+ */
+ public void md5final(byte[] digest) {
+ /* "final" is a poor method name in Java. :v) */
+ int count;
+ int p; // in original code, this is a pointer; in this java code
+ // it's an index into the array this->in.
+
+ /* Compute number of bytes mod 64 */
+ count = (int) ((bits >>> 3) & 0x3F);
+
+ /* Set the first char of padding to 0x80. This is safe since there is
+ always at least one byte free */
+ p = count;
+ in[p++] = (byte) 0x80;
+
+ /* Bytes of padding needed to make 64 bytes */
+ count = 64 - 1 - count;
+
+ /* Pad out to 56 mod 64 */
+ if (count < 8) {
+ /* Two lots of padding: Pad the first block to 64 bytes */
+ zeroByteArray(in, p, count);
+ transform();
+
+ /* Now fill the next block with 56 bytes */
+ zeroByteArray(in, 0, 56);
+ } else {
+ /* Pad block to 56 bytes */
+ zeroByteArray(in, p, count - 8);
+ }
+
+ /* Append length in bits and transform */
+ // Could use a PUT_64BIT... func here. This is a fairly
+ // direct translation from the C code, where bits was an array
+ // of two 32-bit ints.
+ int lowbits = (int) bits;
+ int highbits = (int) (bits >>> 32);
+ PUT_32BIT_LSB_FIRST(in, 56, lowbits);
+ PUT_32BIT_LSB_FIRST(in, 60, highbits);
+
+ transform();
+ PUT_32BIT_LSB_FIRST(digest, 0, buf[0]);
+ PUT_32BIT_LSB_FIRST(digest, 4, buf[1]);
+ PUT_32BIT_LSB_FIRST(digest, 8, buf[2]);
+ PUT_32BIT_LSB_FIRST(digest, 12, buf[3]);
+
+ /* zero sensitive data */
+ /* notice this misses any sneaking out on the stack. The C
+ * version uses registers in some spots, perhaps because
+ * they care about this.
+ */
+ zeroByteArray(in);
+ zeroIntArray(buf);
+ bits = 0;
+ zeroIntArray(inint);
+ }
+
+ /*
+ public static void main(String args[]) {
+ // This main() method was created to easily test
+ // this class. It hashes whatever's on System.in.
+
+ byte buf[] = new byte[397];
+ // arbitrary buffer length designed to irritate update()
+ int rc;
+ MD5 md = new MD5();
+ byte out[] = new byte[16];
+ int i;
+ int len = 0;
+
+ try {
+ while ((rc = System.in.read(buf, 0, 397)) > 0) {
+ md.update(buf, rc);
+ len += rc;
+ }
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ return;
+ }
+ md.md5final(out);
+
+ System.out.println("file length: "+len);
+ System.out.println("hash: "+dumpBytes(out));
+ }
+ */
+
+
+ /////////////////////////////////////////////////////////////////////
+ // Below here ye will only finde private functions //
+ /////////////////////////////////////////////////////////////////////
+
+ // There must be a way to do these functions that's
+ // built into Java, and I just haven't noticed it yet.
+
+ private void zeroByteArray(byte[] a) {
+ zeroByteArray(a, 0, a.length);
+ }
+
+ private void zeroByteArray(byte[] a, int start, int length) {
+ setByteArray(a, (byte) 0, start, length);
+ }
+
+ private void setByteArray(byte[] a, byte val, int start, int length) {
+ int i;
+ int end = start+length;
+ for (i=start; i<end; i++) {
+ a[i] = val;
+ }
+ }
+
+ private void zeroIntArray(int[] a) {
+ zeroIntArray(a, 0, a.length);
+ }
+
+ private void zeroIntArray(int[] a, int start, int length) {
+ setIntArray(a, (int) 0, start, length);
+ }
+
+ private void setIntArray(int[] a, int val, int start, int length) {
+ int i;
+ int end = start+length;
+ for (i=start; i<end; i++) {
+ a[i] = val;
+ }
+ }
+
+ private int MD5STEP1(int w, int x, int y, int z, int data, int s) {
+ w += (z ^ (x & (y ^ z))) + data;
+ w = w<<s | w>>>(32-s);
+ w += x;
+ return w;
+ }
+
+ private int MD5STEP2(int w, int x, int y, int z, int data, int s) {
+ w += (y ^ (z & (x ^ y))) + data;
+ w = w<<s | w>>>(32-s);
+ w += x;
+ return w;
+ }
+
+ private int MD5STEP3(int w, int x, int y, int z, int data, int s) {
+ w += (x ^ y ^ z) + data;
+ w = w<<s | w>>>(32-s);
+ w += x;
+ return w;
+ }
+
+ private int MD5STEP4(int w, int x, int y, int z, int data, int s) {
+ w += (y ^ (x | ~z)) + data;
+ w = w<<s | w>>>(32-s);
+ w += x;
+ return w;
+ }
+
+ private void transform() {
+ /* load in[] byte array into an internal int array */
+ int i;
+ int[] inint = new int[16];
+
+ for (i=0; i<16; i++) {
+ inint[i] = GET_32BIT_LSB_FIRST(in, 4*i);
+ }
+
+ int a, b, c, d;
+ a = buf[0];
+ b = buf[1];
+ c = buf[2];
+ d = buf[3];
+
+ a = MD5STEP1(a, b, c, d, inint[0] + 0xd76aa478, 7);
+ d = MD5STEP1(d, a, b, c, inint[1] + 0xe8c7b756, 12);
+ c = MD5STEP1(c, d, a, b, inint[2] + 0x242070db, 17);
+ b = MD5STEP1(b, c, d, a, inint[3] + 0xc1bdceee, 22);
+ a = MD5STEP1(a, b, c, d, inint[4] + 0xf57c0faf, 7);
+ d = MD5STEP1(d, a, b, c, inint[5] + 0x4787c62a, 12);
+ c = MD5STEP1(c, d, a, b, inint[6] + 0xa8304613, 17);
+ b = MD5STEP1(b, c, d, a, inint[7] + 0xfd469501, 22);
+ a = MD5STEP1(a, b, c, d, inint[8] + 0x698098d8, 7);
+ d = MD5STEP1(d, a, b, c, inint[9] + 0x8b44f7af, 12);
+ c = MD5STEP1(c, d, a, b, inint[10] + 0xffff5bb1, 17);
+ b = MD5STEP1(b, c, d, a, inint[11] + 0x895cd7be, 22);
+ a = MD5STEP1(a, b, c, d, inint[12] + 0x6b901122, 7);
+ d = MD5STEP1(d, a, b, c, inint[13] + 0xfd987193, 12);
+ c = MD5STEP1(c, d, a, b, inint[14] + 0xa679438e, 17);
+ b = MD5STEP1(b, c, d, a, inint[15] + 0x49b40821, 22);
+
+ a = MD5STEP2(a, b, c, d, inint[1] + 0xf61e2562, 5);
+ d = MD5STEP2(d, a, b, c, inint[6] + 0xc040b340, 9);
+ c = MD5STEP2(c, d, a, b, inint[11] + 0x265e5a51, 14);
+ b = MD5STEP2(b, c, d, a, inint[0] + 0xe9b6c7aa, 20);
+ a = MD5STEP2(a, b, c, d, inint[5] + 0xd62f105d, 5);
+ d = MD5STEP2(d, a, b, c, inint[10] + 0x02441453, 9);
+ c = MD5STEP2(c, d, a, b, inint[15] + 0xd8a1e681, 14);
+ b = MD5STEP2(b, c, d, a, inint[4] + 0xe7d3fbc8, 20);
+ a = MD5STEP2(a, b, c, d, inint[9] + 0x21e1cde6, 5);
+ d = MD5STEP2(d, a, b, c, inint[14] + 0xc33707d6, 9);
+ c = MD5STEP2(c, d, a, b, inint[3] + 0xf4d50d87, 14);
+ b = MD5STEP2(b, c, d, a, inint[8] + 0x455a14ed, 20);
+ a = MD5STEP2(a, b, c, d, inint[13] + 0xa9e3e905, 5);
+ d = MD5STEP2(d, a, b, c, inint[2] + 0xfcefa3f8, 9);
+ c = MD5STEP2(c, d, a, b, inint[7] + 0x676f02d9, 14);
+ b = MD5STEP2(b, c, d, a, inint[12] + 0x8d2a4c8a, 20);
+
+ a = MD5STEP3(a, b, c, d, inint[5] + 0xfffa3942, 4);
+ d = MD5STEP3(d, a, b, c, inint[8] + 0x8771f681, 11);
+ c = MD5STEP3(c, d, a, b, inint[11] + 0x6d9d6122, 16);
+ b = MD5STEP3(b, c, d, a, inint[14] + 0xfde5380c, 23);
+ a = MD5STEP3(a, b, c, d, inint[1] + 0xa4beea44, 4);
+ d = MD5STEP3(d, a, b, c, inint[4] + 0x4bdecfa9, 11);
+ c = MD5STEP3(c, d, a, b, inint[7] + 0xf6bb4b60, 16);
+ b = MD5STEP3(b, c, d, a, inint[10] + 0xbebfbc70, 23);
+ a = MD5STEP3(a, b, c, d, inint[13] + 0x289b7ec6, 4);
+ d = MD5STEP3(d, a, b, c, inint[0] + 0xeaa127fa, 11);
+ c = MD5STEP3(c, d, a, b, inint[3] + 0xd4ef3085, 16);
+ b = MD5STEP3(b, c, d, a, inint[6] + 0x04881d05, 23);
+ a = MD5STEP3(a, b, c, d, inint[9] + 0xd9d4d039, 4);
+ d = MD5STEP3(d, a, b, c, inint[12] + 0xe6db99e5, 11);
+ c = MD5STEP3(c, d, a, b, inint[15] + 0x1fa27cf8, 16);
+ b = MD5STEP3(b, c, d, a, inint[2] + 0xc4ac5665, 23);
+
+ a = MD5STEP4(a, b, c, d, inint[0] + 0xf4292244, 6);
+ d = MD5STEP4(d, a, b, c, inint[7] + 0x432aff97, 10);
+ c = MD5STEP4(c, d, a, b, inint[14] + 0xab9423a7, 15);
+ b = MD5STEP4(b, c, d, a, inint[5] + 0xfc93a039, 21);
+ a = MD5STEP4(a, b, c, d, inint[12] + 0x655b59c3, 6);
+ d = MD5STEP4(d, a, b, c, inint[3] + 0x8f0ccc92, 10);
+ c = MD5STEP4(c, d, a, b, inint[10] + 0xffeff47d, 15);
+ b = MD5STEP4(b, c, d, a, inint[1] + 0x85845dd1, 21);
+ a = MD5STEP4(a, b, c, d, inint[8] + 0x6fa87e4f, 6);
+ d = MD5STEP4(d, a, b, c, inint[15] + 0xfe2ce6e0, 10);
+ c = MD5STEP4(c, d, a, b, inint[6] + 0xa3014314, 15);
+ b = MD5STEP4(b, c, d, a, inint[13] + 0x4e0811a1, 21);
+ a = MD5STEP4(a, b, c, d, inint[4] + 0xf7537e82, 6);
+ d = MD5STEP4(d, a, b, c, inint[11] + 0xbd3af235, 10);
+ c = MD5STEP4(c, d, a, b, inint[2] + 0x2ad7d2bb, 15);
+ b = MD5STEP4(b, c, d, a, inint[9] + 0xeb86d391, 21);
+
+ buf[0] += a;
+ buf[1] += b;
+ buf[2] += c;
+ buf[3] += d;
+ }
+
+ private int GET_32BIT_LSB_FIRST(byte[] b, int off) {
+ return
+ ((int)(b[off+0]&0xff)) |
+ ((int)(b[off+1]&0xff) << 8) |
+ ((int)(b[off+2]&0xff) << 16) |
+ ((int)(b[off+3]&0xff) << 24);
+ }
+
+ private void PUT_32BIT_LSB_FIRST(byte[] b, int off, int value) {
+ b[off+0] = (byte) (value & 0xff);
+ b[off+1] = (byte) ((value >> 8) & 0xff);
+ b[off+2] = (byte) ((value >> 16)& 0xff);
+ b[off+3] = (byte) ((value >> 24)& 0xff);
+ }
+
+ // These are debug routines I was using while trying to
+ // get this code to generate the same hashes as the C version.
+ // (IIRC, all the errors were due to the absence of unsigned
+ // ints in Java.)
+ /*
+ private void debugStatus(String m) {
+ System.out.println(m+":");
+ System.out.println("in: "+dumpBytes(in));
+ System.out.println("bits: "+bits);
+ System.out.println("buf: "
+ +Integer.toHexString(buf[0])+" "
+ +Integer.toHexString(buf[1])+" "
+ +Integer.toHexString(buf[2])+" "
+ +Integer.toHexString(buf[3]));
+ }
+
+ private static String dumpBytes(byte[] bytes) {
+ int i;
+ StringBuffer sb = new StringBuffer();
+ for (i=0; i<bytes.length; i++) {
+ if (i%32 == 0 && i!=0) {
+ sb.append("\n");
+ }
+ String s = Integer.toHexString(bytes[i]);
+ if (s.length() < 2) {
+ s = "0"+s;
+ }
+ if (s.length() > 2) {
+ s = s.substring(s.length()-2);
+ }
+ sb.append(s);
+ }
+ return sb.toString();
+ }
+ */
+}
--- /dev/null
+/**
+ * This class is a container for all data contained in an Email Message.
+ **/
+public class Mail {
+
+ String header; // the full header
+ //String sentOn; // time the message was sent
+ //String receivedOn; // time when the message arrived
+ String from; // the "from" field
+ String to; // the "to" field
+ String cc;
+ String subject;
+ String body;
+ String noURLBody;
+ String sourceCode;
+ String spam;
+ boolean hasAttachement;
+ String encoding; //rich text, plain, html
+
+ String messageID; // cached message ID for reuse (takes a lot of memory and is used all over the place)
+ //same as hashcode of a class
+ boolean isSpam;
+
+ /**
+ * this is a really simple implementation of a tokenizer
+ * used to build tokens from an email and divide email into parts
+ **/
+ int MAX_TOKEN_SIZE;
+
+ public Mail() {
+ messageID=null;
+ }
+
+ public Mail(String fileName) // read a mail from file
+ {
+ //System.out.println("DEBUG: fileName= " + fileName);
+
+ BufferedReader fileinput = new BufferedReader(new FileInputStream(fileName));
+ String line;
+ boolean chk = false;
+
+ while((line = fileinput.readLine()) != null)
+ {
+ chk = true;
+
+ Vector splittedLine = line.split();
+ if(((String)(splittedLine.elementAt(0))).equals("Spam:"))
+ {
+ spam = (String)(splittedLine.elementAt(1));
+ }
+ else if(((String)(splittedLine.elementAt(0))).equals("Header:")) // message id
+ {
+ header = (String)splittedLine.elementAt(1);
+ }
+ else if(((String)(splittedLine.elementAt(0))).equals("To:")) // receiver
+ {
+ to = (String)splittedLine.elementAt(1);
+ }
+ else if(((String)(splittedLine.elementAt(0))).equals("From:")) // sender
+ {
+ from = (String)splittedLine.elementAt(1);
+ }
+ else if(((String)(splittedLine.elementAt(0))).equals("Cc:")) // cc
+ {
+ cc = (String)splittedLine.elementAt(1);
+ }
+ else if(((String)(splittedLine.elementAt(0))).equals("Subject:")) // Subject
+ {
+ subject = (String)splittedLine.elementAt(1);
+ break;
+ }
+ } // parsed messageID, To, from, cc, Title
+
+ /**
+ * error checking
+ **/
+ if(!chk)
+ System.out.println("no line read");
+
+
+ body = new String();
+ byte[] readBody = new byte[256];
+
+ while((fileinput.read(readBody)>0))
+ {
+ body += new String(readBody);
+ readBody = new byte[256];
+ }
+
+ fileinput.close();
+
+ MAX_TOKEN_SIZE = 1024;
+ }
+
+ // -------------------------------------------------------
+
+ public void setHeader(String header) {
+ this.header = header;
+ }
+
+ public String getHeader() {
+ return header;
+ }
+
+
+ /*
+ public void setSentOn(String sentOn) {
+ this.sentOn = sentOn;
+ }
+
+ public String getSentOn() {
+ return sentOn;
+ }
+
+ public Date getSentOnAsDate() {
+ String sentOn = getSentOn();
+ return parseDate(sentOn);
+ }
+
+ public void setReceivedOn(String receivedOn) {
+ this.receivedOn = receivedOn;
+ }
+
+ public String getReceivedOn() {
+ return receivedOn;
+ }
+
+ public Date getReceivedOnAsDate() {
+ String receivedOn = getReceivedOn();
+ return parseDate(receivedOn);
+ }
+ */
+
+
+ /**
+ * Parses a given Date-String in into a real Date-Object
+ *
+ * @param stringDate the string in format dd.mm.yyyy hh:mm
+ * @return a Date containing the info of the string or the actual date and time if something fails.
+ */
+ /*
+ public Date parseDate(String stringDate) {
+ // date is in this format: dd.mm.yyyy hh:mm
+ if (stringDate == null || "N/A".equals(stringDate)) {
+ return new Date();
+ }
+ try {
+ synchronized (MAIL_TIME_FORMAT) {
+ return MAIL_TIME_FORMAT.parse(stringDate);
+ }
+ } catch (Throwable e) {
+ return new Date();
+ }
+ }
+ */
+
+ public void setFrom(String from) {
+ this.from = from;
+ }
+
+ public String getFrom() {
+ return from;
+ }
+
+ public void setTo(String to) {
+ this.to = to;
+ }
+
+ public String getTo() {
+ return to;
+ }
+
+ public void setCc(String cc) {
+ this.cc = cc;
+ }
+
+ public String getCc() {
+ return cc;
+ }
+
+ public void setSubject(String subject) {
+ this.subject = subject;
+ }
+
+ public String getSubject() {
+ return subject;
+ }
+
+ public void setBody(String body) {
+ this.body = body;
+ }
+
+ public String getBody() {
+ return body;
+ }
+
+ public void setSourceCode(String sourceCode) {
+ this.sourceCode = sourceCode;
+ }
+
+ public String getSourceCode() {
+ return sourceCode;
+ }
+
+ public void setHasAttachement(boolean hasAttachement) {
+ this.hasAttachement = hasAttachement;
+ }
+
+ public boolean getHasAttachement() {
+ return hasAttachement;
+ }
+
+ public void setEncoding(String encoding) {
+ this.encoding = encoding;
+ }
+
+ public String getEncoding() {
+ return encoding;
+ }
+
+ public boolean isTextEncoding() {
+ return getEncoding().toLowerCase().indexOf("plain") >= 0;
+ }
+
+ public boolean isHTMLEncoding() {
+ return getEncoding().toLowerCase().indexOf("html") >= 0;
+ }
+
+ /*
+ public String toString() {
+ return getBody() + "," + getCc() + "," + getEncoding() + "," + getFrom() + "," + getHasAttachement() + "," + getHeader() + "," + getReceivedOn() + "," + getSentOn() + "," + getSourceCode() + "," + getSubject() + "," + getTo();
+ }
+ */
+
+ public String toString() {
+ return getBody() + "," + getCc() + "," + getEncoding() + "," + getFrom() + "," + getHasAttachement() + "," + getHeader() + "," + getSourceCode() + "," + getSubject() + "," + getTo();
+ }
+
+ /*
+ public String getID() {
+ if (messageID == null) { // no cached version
+ // Take the message-ID header as ID (if present)
+ String[] messageIDs = getHeaderField("Message-ID");
+ if ((messageIDs != null) && (messageIDs.length > 0)) {
+ messageID = messageIDs[0];
+ } else { // otherwise, hash header and body as ID
+ return String.valueOf(getHeader().hashCode() + getBody().hashCode());
+ }
+ }
+
+ return messageID;
+ }
+ */
+
+ public String[] getHeaderField(String fieldName) {
+
+ }
+
+ public String extractEMailAddress() {
+
+ }
+
+ /*
+ public boolean equals(Object o) {
+ if (o instanceof Mail) {
+ Mail mail = (Mail)o;
+ return this.getID().equals(mail.getID());
+ }
+
+ return false;
+ }
+ */
+
+ public Vector getCommonPart()
+ {
+ Vector returnStrings = new Vector();
+
+ // add header, sender, and title
+ returnStrings.addElement(header);
+ returnStrings.addElement(from);
+ returnStrings.addElement(subject);
+
+ return returnStrings;
+ }
+
+ public String getBodyString()
+ {
+ return body;
+ }
+
+ public Vector returnEmail() {
+ Vector myemail = new Vector();
+ myemail.addElement(getCommonPart());
+ //System.out.println("DEBUG: getCommonPart.size= " + getCommonPart().size());
+ myemail.addElement(getURLs());
+ //System.out.println("DEBUG: getURLs.size= " + getURLs().size());
+ myemail.addElement(getSplittedBody(MAX_TOKEN_SIZE));
+ //System.out.println("DEBUG: getSplittedBody.size= " + getSplittedBody(MAX_TOKEN_SIZE).size());
+ return myemail;
+ }
+
+ public Vector getURLs()
+ {
+ Vector returnStrings = new Vector();
+ Vector splittedBody = body.split();
+
+ // add URL and email in the body
+ for(int i=0; i<splittedBody.size(); i++)
+ {
+ String segment = (String)(splittedBody.elementAt(i));
+ if(segment.startsWith("http://")) // URL
+ {
+ returnStrings.addElement(segment);
+ }
+ else if(isEmailAccount(segment)) // email
+ {
+ returnStrings.addElement(segment);
+ }
+ }
+
+ return returnStrings;
+ }
+
+ // check if it is email account string
+ private boolean isEmailAccount(String str)
+ {
+ if(str.contains("@") && str.contains("."))
+ return true;
+ else
+ return false;
+ }
+
+ public void setNoURLBody()
+ {
+ Vector splittedBody = body.split();
+ int totalsize=0;
+ for(int i=0; i< splittedBody.size();i ++) {
+ String segment = (String)(splittedBody.elementAt(i));
+ if(!(segment.startsWith("http://") || isEmailAccount(segment)))
+ totalsize+=segment.length();
+ }
+
+ StringBuffer sb=new StringBuffer(totalsize);
+ for(int i=0; i< splittedBody.size();i ++) {
+ String segment = (String)(splittedBody.elementAt(i));
+ if(!(segment.startsWith("http://") || isEmailAccount(segment))) {
+ sb.append(segment);
+ }
+ }
+ noURLBody=sb.toString();
+ }
+
+ // setNoURLBody method has to be called before this method
+ // parameter : bytesize to split.
+ public Vector getSplittedBody(int size)
+ {
+ setNoURLBody();
+ Vector returnStrings = new Vector();
+ int end=noURLBody.length();
+
+ for(int i=1; i< end; i+=size)
+ {
+ if((i+size)>=end) {
+ String str=noURLBody.substring(i, end);
+ returnStrings.addElement(str);
+ }
+ else {
+ String str=noURLBody.substring(i, i+size);
+ returnStrings.addElement(str);
+ }
+ }
+ return returnStrings;
+ }
+
+
+ public void setIsSpam(boolean spam) {
+ isSpam = spam;
+ }
+
+ public boolean getIsSpam() {
+ if(spam.equals("yes"))
+ return true;
+ return false;
+ }
+
+ /**
+ * Returns result to the Spam filter
+ **/
+ public Vector checkMail(int userid) {
+ //Preprocess emails
+
+ //long startGetParts=System.currentTimeMillis();
+ Vector partsOfMailStrings = returnEmail();
+ //long stopGetParts=System.currentTimeMillis();
+ //System.out.println("Time to read email= " + (stopGetParts-startGetParts));
+
+ //Compute signatures
+ SignatureComputer sigComp = new SignatureComputer();
+ //Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings
+ //long startGetsignatures=System.currentTimeMillis();
+ Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of vector of strings
+ //long stopGetsignatures=System.currentTimeMillis();
+ //System.out.println("Time to Getsignatures= " + (stopGetsignatures-startGetsignatures));
+
+ return signatures;
+ }
+
+ /* For tests only */
+ /*
+ public static void main(String[] args)
+ {
+ Mail mail = new Mail("./emails/email1");
+
+ String[] a = mail.createMailStrings();
+
+ for(String b : a)
+ {
+ System.out.println(b);
+ }
+ }
+ */
+}
--- /dev/null
+public class SignatureComputer {
+ public EphemeralSignature sig4; //signature engines
+ public WhiplashSignature sig8; //signature engines
+
+ int[] enginesToUseForCheck;
+
+ public SignatureComputer() {
+ sig4 = new EphemeralSignature(); //default values
+ sig8 = new WhiplashSignature();
+ createEnginesToCheck();
+ }
+
+ /**
+ * constructor to be used when some parsing has already taken place with the
+ * server-provides value <code>randomNumberSeed</code>.
+ *
+ * @param randomNumberSeed
+ * a non-negative number used for seeding the random number generator
+ * before starting to hash values.
+ * @param separator
+ * how the mail-text should be splitted into lines. (== what chars
+ * separate 2 lines)
+ */
+ public SignatureComputer(int randomNumberSeed, String separator) {
+ sig4 = new EphemeralSignature(randomNumberSeed,separator);
+ sig8 = new WhiplashSignature();
+ createEnginesToCheck();
+ }
+
+ /**
+ * the constructor to be used most of the time. you can hand over the
+ * seed-string exactly as it is provided by the razor-server.
+ *
+ * @param seedAndSeparator
+ * a string containing the seed value for the RNG and a separator list
+ * (separated by ' <b>- </b>'). default value is
+ * <code>"7542-10"</code> which means server-seed 7542 and only one
+ * separator 10 (which is ascii '\n').
+ */
+ public SignatureComputer(String seedAndSeparator) {
+ sig4 = new EphemeralSignature(seedAndSeparator);
+ sig8 = new WhiplashSignature();
+ createEnginesToCheck();
+ }
+
+ /**
+ *
+ */
+ public void createEnginesToCheck() {
+ enginesToUseForCheck = new int[2];
+ enginesToUseForCheck[0] = 4; //Ephemeral engine
+ enginesToUseForCheck[1] = 8;//Whiplash engine
+ }
+
+ public boolean isSigSupported(int sig) {
+ boolean found = false;
+ for (int i = 0; i < enginesToUseForCheck.length && !found; i++) {
+ if (enginesToUseForCheck[i] == sig) {
+ found = true;
+ }
+ }
+ return found;
+ }
+
+ public boolean isSigSupported(String sig) {
+ return (sig != null && isSigSupported(Integer.parseInt(sig)));
+ }
+
+ public String getDefaultEngine() {
+ return "4";
+ }
+
+ public Vector computeSigs(Vector EmailParts) {
+ if (EmailParts == null) return null;
+
+ Vector printableSigs = new Vector(); // vector of strings
+
+ /**
+ * Step -I
+ * Get signatures for the common parts
+ **/
+
+ Vector commonpart = (Vector) (EmailParts.elementAt(0));
+ for (int mailIndex = 0; mailIndex < commonpart.size(); mailIndex++) {
+ String mail = (String) (commonpart.elementAt(mailIndex));
+
+ if (mail == null) continue;
+
+ /*
+ * Compute Sig for email header that are cleaned.
+ */
+ for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
+ int engineNo = enginesToUseForCheck[engineIndex];
+ String sig = null;
+
+ /* EphemeralSignature calculator */
+ if(engineNo==4) {
+ sig = computeSignature(engineNo,mail);
+ }
+
+ if(engineNo==8) {
+ continue;
+ }
+
+ if((engineNo!=4)) {
+ System.out.println("Err: Common part Couldn't find the signature engine: " + engineNo);
+ }
+
+ if (sig != null) {
+ String hash = engineNo + ":" + sig;
+ printableSigs.addElement(hash);
+
+ //System.out.println("DEBUG: mail= " +mail + " hash= " + hash);
+
+ } else {
+ // we didn't produce a signature for the mail.
+ }
+ }//engine
+ }//common part
+
+ /**
+ * Step -II
+ * Get signatures for the body parts without URLs
+ **/
+ Vector getBodywithNoURLs = (Vector)(EmailParts.elementAt(2));
+ for (int mailIndex = 0; mailIndex < getBodywithNoURLs.size(); mailIndex++) {
+ String mail = (String) (getBodywithNoURLs.elementAt(mailIndex));
+
+
+ if (mail == null) continue;
+
+ /*
+ * Compute Sig for email header that are cleaned.
+ */
+ for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
+ int engineNo = enginesToUseForCheck[engineIndex];
+ String sig = null;
+
+ /* EphemeralSignature calculator */
+ if(engineNo==4) {
+ sig = computeSignature(engineNo,mail);
+ }
+
+ if(engineNo==8)
+ continue;
+
+ if(engineNo!=4) {
+ System.out.println("Err: body parts without URL Couldn't find the signature engine: " + engineNo);
+ }
+
+ if (sig != null) {
+ String hash = engineNo + ":" + sig;
+ printableSigs.addElement(hash);
+ } else {
+ // we didn't produce a signature for the mail.
+ }
+ }//engine
+ }
+
+ /**
+ * Step -III
+ * Get signatures for the body parts with URLs
+ **/
+ Vector getURLs = (Vector)(EmailParts.elementAt(1));
+ for (int mailIndex = 0; mailIndex < getURLs.size(); mailIndex++) {
+ String mail = (String) (getURLs.elementAt(mailIndex));
+
+ /*
+ * Compute Sig for bodyparts that are cleaned.
+ */
+ for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
+ int engineNo = enginesToUseForCheck[engineIndex];
+ if(engineNo==4)
+ continue;
+
+ /* WhiplashSignature calculator */
+ String[] hosts = null;
+ String sig = null;
+ if(engineNo==8) {
+ //hosts = computeSignature(engineNo,mail);
+ hosts = sig8.computeSignature(mail);
+ if(hosts != null) {
+ for(int i=0; i<hosts.length; i++) {
+ sig = hosts[i];
+ //sig = (String) (hosts.elementAt(i));
+ String hash = engineNo + ":" + sig;
+ printableSigs.addElement(hash);
+ }
+ } else {
+ // we didn't produce a signature for the mail.
+ }
+ }
+
+ if(engineNo!=8) {
+ System.out.println("Err: body parts with URL Couldn't find the signature engine: " + engineNo);
+ }
+
+ /*
+ if (sig != null) {
+ String hash = engineNo + ":" + sig;
+ printableSigs.addElement(hash);
+ } else {
+ we didn't produce a signature for the mail.
+ }
+ */
+ }//engine
+ }
+
+ // OLD IMPLEMENTATION
+//
+// for (int mailIndex = 0; mailIndex < EmailParts.size(); mailIndex++) {
+// String mail = (String) (EmailParts.elementAt(mailIndex));
+//
+// if (mail == null) continue;
+//
+// /*
+// * Compute Sig for bodyparts that are cleaned.
+// */
+// for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
+// int engineNo = enginesToUseForCheck[engineIndex];
+// String sig = null;
+//
+// /* EphemeralSignature calculator */
+// if(engineNo==4) {
+// sig = computeSignature(engineNo,mail);
+// if(sig != null) {
+// String hash = engineNo + ":" + sig;
+// printableSigs.addElement(hash);
+// } else {
+// // we didn't produce a signature for the mail.
+// }
+// }
+//
+// /* WhiplashSignature calculator */
+// String[] hosts = null;
+// if(engineNo==8) {
+// //hosts = computeSignature(engineNo,mail);
+// hosts = sig8.computeSignature(mail);
+// if(hosts != null) {
+// for(int i=0; i<hosts.length; i++) {
+// sig = hosts[i];
+// //sig = (String) (hosts.elementAt(i));
+// String hash = engineNo + ":" + sig;
+// printableSigs.addElement(hash);
+// }
+// } else {
+// // we didn't produce a signature for the mail.
+// }
+// }
+//
+// if(engineNo!=4 || engineNo!=8) {
+// System.out.println("Err: Couldn't find the signature engine: " + engineNo);
+// }
+//
+// /*
+// if (sig != null) {
+// String hash = engineNo + ":" + sig;
+// printableSigs.addElement(hash);
+// } else {
+// // we didn't produce a signature for the mail.
+// }
+// */
+// }//engine
+// }//each emails part
+ return printableSigs;
+ }//computeSigs
+
+ /**
+ * @param engineNo
+ * @param email
+ * @return
+ */
+ private String computeSignature(int engineNo, String mail) {
+ if(engineNo==4) {
+ //String s1 = this.sig4.computeSignature(mail);
+ return this.sig4.computeSignature(mail);
+ //return new String { this.sig4.computeSignature(mail) };
+ }
+
+ /*
+ if(engineNo==8) {
+ //String cleanedButKeepHTML = Preprocessor.preprocess(mail,Preprocessor.ConfigParams.NO_DEHTML);
+ //return this.sig8.computeSignature(cleanedButKeepHTML);
+ return this.sig8.computeSignature(mail);
+ }
+ */
+ return null;
+ }
+}
--- /dev/null
+public class SpamFilter extends Thread {
+ DistributedHashMap mydhmap;
+
+ int id; //thread id
+
+ /**
+ * Total number of iterations
+ **/
+ int numiter;
+
+ /**
+ * Total number of emails
+ **/
+ int numemail;
+
+ /**
+ * Total number of threads
+ **/
+ int nthreads;
+
+ public SpamFilter() {
+
+ }
+
+ public SpamFilter(int numiter, int numemail,int id, DistributedHashMap mydhmap, int nthreads) {
+ this.numiter=numiter;
+ this.numemail=numemail;
+ this.id = id;
+ this.mydhmap = mydhmap;
+ this.nthreads = nthreads;
+ }
+
+ public void run() {
+ int niter;
+ int nemails;
+ int thid;
+ int correct=0;
+ int wrong=0;
+
+ {
+ niter=numiter;
+ nemails=numemail;
+ thid = id;
+ }
+
+ Random rand = new Random(thid);
+ int i;
+
+ long st = System.currentTimeMillis();
+ long fi;
+
+ for(i=0; i<niter; i++) {
+ correct =0;
+ wrong = 0;
+ for(int j=0; j<nemails; j++) {
+ // long start = System.currentTimeMillis();
+ int pickemail = rand.nextInt(100);
+
+// System.out.println("pickemail= " + pickemail);
+
+ // randomly pick emails
+ pickemail+=1;
+ Mail email = new Mail("../emails/email"+pickemail);
+ Vector signatures = email.checkMail(thid);
+
+ //check with data structure
+ int[] confidenceVals=null;
+ // long startcheck = System.currentTimeMillis();
+ {
+ confidenceVals = check(signatures,thid);
+ }
+ // long stopcheckMail = System.currentTimeMillis();
+ // long diff = (stopcheckMail-startcheck);
+ // System.out.println("check takes= " + diff + "millisecs");
+
+ /* Only for debugging
+ for(int k=0; k<signatures.size();k++) {
+ System.out.println("confidenceVals["+k+"]= "+confidenceVals[k]);
+ }
+ */
+
+ //---- create and return results --------
+ FilterResult filterResult = new FilterResult();
+ //long startgetResult = System.currentTimeMillis();
+ boolean filterAnswer = filterResult.getResult(confidenceVals);
+ //long stopgetResult = System.currentTimeMillis();
+ //diff = (stopgetResult-startgetResult);
+ //System.out.println("getResult takes= " + diff + "millisecs");
+
+ //---- get user's take on email and send feedback ------
+ boolean userAnswer = email.getIsSpam();
+
+// System.out.println("userAnswer= " + userAnswer + " filterAnswer= " + filterAnswer);
+
+ if(filterAnswer != userAnswer) {
+ /* wrong answer from the spam filter */
+ wrong++;
+ //long startsendFeedBack = System.currentTimeMillis();
+ {
+ sendFeedBack(signatures, userAnswer, thid, rand);
+ }
+ //long stopsendFeedBack = System.currentTimeMillis();
+ //diff = (stopsendFeedBack-startsendFeedBack);
+ //System.out.println("sendFeedback takes= " + diff + "millisecs");
+ }
+ else {
+ /* Correct answer from the spam filter */
+ correct++;
+ }
+ //long stop = System.currentTimeMillis();
+ //diff = stop-start;
+// System.out.println("time to complete iteration" + j + " = " + diff + " millisecs");
+ } //end num emails
+// System.out.println((i+1)+"th iteration correct = " + correct + " Wrong = " + wrong + " percentage = " + ((float)correct/(float)nemails));
+ }//end num iter
+ // Sanity check
+ fi = System.currentTimeMillis();
+
+ System.out.println((i)+"th iteration correct = " + correct + " Wrong = " + wrong + " percentage = " + ((float)correct/(float)nemails));
+ System.out.println("\n\n\n I'm Done - Time Elapse : " + (double)((fi-st)/1000) +"\n\n\n");
+ }
+
+ public static void main(String[] args) {
+ int[] mid = new int[8];
+
+ /*
+ mid[0] = (128<<24)|(195<<16)|(180<<8)|21; //dw-2
+ mid[1] = (128<<24)|(195<<16)|(180<<8)|26; //dw-7
+ */
+ mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dc-1.calit2
+ mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dc-2.calit2
+ mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dc-3.calit2
+ mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc-4.calit2
+ mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dc-5.calit2
+ mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dc-6.calit2
+ mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dc-7.calit2
+ mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dc-8.calit2
+
+ //Read options from command prompt
+ SpamFilter sf = new SpamFilter();
+ SpamFilter.parseCmdLine(args, sf);
+ int nthreads = sf.nthreads;
+
+ //Create Global data structure
+ DistributedHashMap dhmap;
+ SpamFilter[] spf;
+ {
+ dhmap = new DistributedHashMap(500, 0.75f);
+ }
+ {
+ spf = new SpamFilter[nthreads];
+ for(int i=0; i<nthreads; i++) {
+ spf[i] = new SpamFilter(sf.numiter, sf.numemail, i, dhmap, nthreads);
+ }
+ }
+
+ /* ---- Start Threads ---- */
+ SpamFilter tmp;
+ for(int i = 0; i<nthreads; i++) {
+ {
+ tmp = spf[i];
+ }
+ tmp.run();
+ }
+
+ System.out.println("Finished");
+ }
+
+ public static void parseCmdLine(String args[], SpamFilter sf) {
+ int i = 1;
+
+ sf.nthreads = new Integer(args[0]).intValue();
+
+
+ String arg;
+ while (i < args.length && args[i].startsWith("-")) {
+ arg = args[i++];
+ //check options
+ if(arg.equals("-n")) { //num of iterations
+ if(i < args.length) {
+ sf.numiter = new Integer(args[i++]).intValue();
+ }
+ } else if(arg.equals("-e")) { //num of emails
+ if(i < args.length) {
+ sf.numemail = new Integer(args[i++]).intValue();
+ }
+ }
+
+ /*else if(arg.equals("-t")) { //num of threads
+ if(i < args.length) {
+ sf.nthreads = new Integer(args[i++]).intValue();
+ }
+ }
+ */
+ else if(arg.equals("-h")) {
+ sf.usage();
+ }
+ }
+ if(sf.nthreads == 0) {
+ sf.usage();
+ }
+ }
+
+ /**
+ * The usage routine describing the program
+ **/
+ public void usage() {
+ System.out.println("usage: ./spamfilter <num thread> -n <num iterations> -e <num emails>\n");
+ System.out.println( " -n : num iterations");
+ System.out.println( " -e : number of emails");
+ }
+
+ /**
+ * Returns result to the Spam filter
+ **/
+ /*
+ public boolean checkMail(Mail mail, int userid) {
+ //Preprocess emails
+ //Vector partsOfMailStrings = mail.createMailStringsWithURL();
+ /*
+ Vector partsOfMailStrings = mail.getCommonPart();
+ partsOfMailStrings.addElement(mail.getBodyString());
+
+ //Compute signatures
+ SignatureComputer sigComp = new SignatureComputer();
+ Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings
+
+ //check with data structure
+ int[] confidenceVals = check(signatures,userid);
+
+ //---- create and return results --------
+ FilterResult filterResult = new FilterResult();
+ boolean spam = filterResult.getResult(confidenceVals);
+
+ return spam;
+ }
+ */
+
+ public int[] check(Vector signatures, int userid) {
+ int numparts = signatures.size();
+
+ //System.out.println("check() numparts= " + numparts);
+
+ int[] confidenceVals = new int[numparts];
+ for(int i=0; i<numparts; i++) {
+ String part = (String)(signatures.elementAt(i));
+ char tmpengine = part.charAt(0);
+ GString engine=null;
+ if(tmpengine == '4') { //Ephemeral Signature calculator
+ String tmpstr = new String("4");
+ engine = new GString(tmpstr);
+ }
+ if(tmpengine == '8') { //Whiplash Signature calculator
+ String tmpstr = new String("8");
+ engine = new GString(tmpstr);
+ }
+
+ //System.out.println("check(): engine= " + engine.toLocalString());
+
+ String str = new String(part.substring(2));//a:b index of a =0, index of : =1, index of b =2
+ GString signature = new GString(str);
+ HashEntry myhe = new HashEntry();
+ myhe.setengine(engine);
+ myhe.setsig(signature);
+
+ //find object in distributedhashMap: if no object then add object
+ if(!mydhmap.containsKey(myhe)) {
+ //add new object
+ HashStat mystat = new HashStat();
+ mystat.setuser(userid, 0, 0, -1);
+ myhe.setstats(mystat);
+ FilterStatistic fs = new FilterStatistic(0,0,-1);
+ mydhmap.put(myhe, fs);
+ confidenceVals[i] = 0;
+ } else { //read exsisting object
+ // ----- now connect to data structure and ask for spam -----
+ HashEntry tmphe = (HashEntry)(mydhmap.getKey(myhe));
+ FilterStatistic fs = (FilterStatistic) (mydhmap.get(tmphe)); //get the value from hash
+
+ //System.out.println(fs.toString()+"\n");
+
+ confidenceVals[i] = fs.getChecked();
+ }
+ }
+
+ // --> the mail client is able to determine if it is spam or not
+ // --- According to the "any"-logic (in Core#check_logic) in original Razor ---
+ // If any answer is spam, the entire email is spam.
+ return confidenceVals;
+ }
+
+ /**
+ * This method sends feedback from the user to a distributed
+ * spam database and trains the spam database to check future
+ * emails and detect spam
+ **/
+ public void sendFeedBack(Vector signatures, boolean isSpam, int id, Random myrand) {
+
+ for(int i=0;i<signatures.size();i++) {
+ String part = (String)(signatures.elementAt(i));
+ //
+ // Signature is of form a:b
+ // where a = string representing a signature engine
+ // either "4" or "8"
+ // b = string representing signature
+ //
+ char tmpengine = part.charAt(0); //
+
+ GString engine=null;
+
+ if(tmpengine == '4') {
+ String tmpstr = new String("4");
+ engine = new GString(tmpstr);
+ }
+
+ if(tmpengine == '8') {
+ String tmpstr = new String("8");
+ engine = new GString(tmpstr);
+ }
+
+ //System.out.println("sendFeedBack(): engine= " + engine.toLocalString());
+
+ String tmpsig = new String(part.substring(2));
+ GString signature = new GString(tmpsig);
+
+ //System.out.println("sendFeedBack(): signature= " + signature.toLocalString());
+
+ HashEntry myhe = new HashEntry();
+ myhe.setengine(engine);
+ myhe.setsig(signature);
+
+ // ----- now connect to data structure and update stats -----
+ if(mydhmap.containsKey(myhe)) {
+ HashEntry tmphe = (HashEntry)(mydhmap.getKey(myhe));
+
+
+ if(tmphe.stats.userid[id] != 1) {
+ tmphe.stats.setuserid(id);
+ }
+
+ //---- get value from distributed hash and update spam count
+ FilterStatistic fs = (FilterStatistic) (mydhmap.get(myhe));
+
+ //System.out.println(fs.toString());
+
+ //Allow users to give incorrect feedback
+ int pickemail = myrand.nextInt(100);
+ /* Randomly allow user to provide incorrect feedback */
+ if(pickemail < 95) {
+ //give correct feedback 95% of times
+ //Increment spam or ham value
+ if(isSpam) {
+ tmphe.stats.incSpamCount(id);
+ fs.increaseSpam();
+ } else {
+ tmphe.stats.incHamCount(id);
+ fs.increaseHam();
+ }
+ } else {
+ // Give incorrect feedback 5% of times
+ if(isSpam) {
+ tmphe.stats.incHamCount(id);
+ fs.increaseHam();
+ } else {
+ tmphe.stats.incSpamCount(id);
+ fs.increaseSpam();
+ }
+ } //end of pickemail
+ }//end of if
+ }//end of for
+ }//end of sendFeeback()
+}
+
+
--- /dev/null
+
+/*
+ Part of the Spamato project (www.spamato.net)
+ Copyright (C) 2005 ETHZ, DCG
+ contact by email: info@spamato.net
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+ $Id: WhiplashSignature.java,v 1.1 2010/03/04 00:17:44 adash Exp $
+ */
+public class WhiplashSignature {
+ char[] b64table;
+
+ public WhiplashSignature() {
+ b64table = new char[64];
+
+ for (int i= 0; i <= 25; i++) {
+ b64table[i] = (char) ((i + 65) & 0xff);
+ }
+ for (int i= 26; i <= 51; i++) {
+ b64table[i] = (char) ((i + 71) & 0xff);
+ }
+ for (int i= 52; i <= 61; i++) {
+ b64table[i] = (char) ((i - 4) & 0xff);
+ }
+ b64table[62]= '-';
+ b64table[63]= '_';
+ }
+
+ public String[] computeSignature(String text) {
+
+ //System.out.println("Inside computeSignature");
+ //Current: Simplify the host extraction and signature computation
+ String[] sigs = whiplash(text);
+ // TODO: Extract canonical domain name and convert to Base64
+ /*
+ if(sigs != null) {
+ for(int i = 0; i<sigs.length; i++) {
+ sigs[i] = hexToBase64(sigs[i]);
+ System.out.println("sigs[i]= " + sigs[i]);
+ }
+ }
+ */
+ return sigs;
+ }
+
+ /**
+ * converts a hex-string in a base64-string exactly as it is done in razor.
+ * @param hex a hex-value
+ * @return a base64-equivalent of <code>hex</code>.
+ */
+ public String hexToBase64(String hex){
+ if(hex == null)
+ return null;
+ int[] b64s = new int[hex.length()*2/3 + ((hex.length()*2)%3)];
+ int i=0;
+ int b64count = 0;
+
+ while(i < hex.length()){
+ //process 3 hex char chunks at a time
+ int upperBorder = Math.imin(i+3,hex.length());
+ String hex3 = hex.substring(i,upperBorder);
+ i+=3;
+
+ int bv = convertHexToRazorEncoding(hex3);
+ //now the right endian encoding
+ b64s[b64count++] = ((0xfc0 & bv)>>>6); //higher 6 bits
+ b64s[b64count++] = (0x3f & bv) ; //lower 6 bits
+
+ }
+ String bs = "";
+ for (int j= 0; j < b64s.length; j++) {
+ bs += b64table[ b64s[j] ];
+ }
+ return bs;
+ }
+
+ /**
+ * razor does some special conversion using perl's <code>pack()</code> which
+ * we must do manually in java.
+ */
+ private int convertHexToRazorEncoding(String hex3) {
+ if((hex3 == null))
+ return 0; //error
+ int res = 0;
+ int cur = Integer.parseInt(hex3.substring(0,1),16);
+ cur = mirror4LSBits(cur);
+ res |= ( (cur&0xf) << 8);
+ if(hex3.length() >=2) {
+ cur = Integer.parseInt(hex3.substring(1,2),16);
+ } else {
+ cur = 0;
+ }
+ //cur = ( hex3.length() >=2 ? Integer.parseInt(hex3.substring(1,2),16) : 0);
+ cur = mirror4LSBits(cur);
+ res |= ((cur & 0xf) << 4);
+ if(hex3.length() >= 3) {
+ cur = Integer.parseInt(hex3.substring(2,3),16);
+ } else {
+ cur = 0;
+ }
+ //cur = ( hex3.length() >= 3 ? Integer.parseInt(hex3.substring(2,3),16): 0);
+ cur = mirror4LSBits(cur);
+ res |= (cur & 0xf);
+
+ return res;
+ }
+
+ /**
+ * mirrors the 4 least significant bytes of an integer
+ * @param cur an int containing 4 Least Singificant bytes like <code>00000...00abcd</code>
+ * @return the mirrored 4 least significant bytes <code>00000...00dcba</code>. all bits except <code>a-b</code> are lost.
+ */
+ public int mirror4LSBits(int cur) {
+ int res = 0;
+ res |= (cur & 0x8)>>>3;
+ res |= (cur & 0x4)>>>1;
+ res |= (cur & 0x2)<<1;
+ res |= (cur & 0x1)<<3;
+ return res;
+ }
+
+ public String[] whiplash(String text) {
+
+ if (text == null) {
+ return null;
+ }
+ String[] hosts = extractHosts(text);
+ if (hosts == null || hosts.length < 1) {
+ return null;
+ }
+ String[] sigs = new String[hosts.length];
+
+ for (int i = 0; i < hosts.length; i++) {
+ MD5 md = new MD5();
+ String host = hosts[i];
+ int len = host.length();
+ byte buf[] = host.getBytes();
+ byte sig[] = new byte[16];
+ md.update(buf, len);
+ md.md5final(sig);
+ String signature = new String(sig);
+
+ // System.out.println("DEBUG: host= " + host + " whiplash sig= " + signature);
+
+ sigs[i] = signature;
+ }
+ return sigs;
+ }
+
+ public String[] extractHosts(String text) {
+ //System.out.println("Inside extractHosts");
+ Vector hosts = new Vector();
+ String buf = new String(text);
+
+ //System.out.println("DEBUG: extractHosts() string= " + buf);
+
+ /* Extract hosts from http:// links */
+ int idx;
+ String strwww = new String("www.");
+ while ((idx = buf.indexOf(strwww)) != -1) {
+ int startidx = idx + strwww.length();
+ String strcom = new String(".");
+ buf = buf.subString(startidx);
+ int endidx = buf.indexOf(strcom);
+ String host = buf.subString(0, endidx);
+ //System.out.println("DEBUG: http links extracted host= " + host);
+
+ buf = buf.subString(endidx);
+ endidx = buf.indexOf(strcom);
+ host += buf.subString(0, endidx);
+
+ hosts.addElement(host);
+ buf = buf.subString(endidx+strcom.length());
+ }
+
+ /* Extract hosts from email addressess */
+ buf = new String(text);
+ String strrate = new String("@");
+ while ((idx = buf.indexOf(strrate)) != -1) {
+ int startidx = idx + strrate.length();
+ String strdot = new String(".");
+ buf = buf.subString(startidx);
+ int endidx = buf.indexOf(strdot);
+ String host = buf.subString(0, endidx);
+ //System.out.println("DEBUG: email addr extracted host= " + host);
+
+ buf = buf.subString(endidx);
+ endidx = buf.indexOf(strdot);
+ host += buf.subString(0, endidx);
+
+ hosts.addElement(host);
+ buf = buf.subString(endidx+strdot.length());
+ }
+
+ if (hosts.size() == 0) {
+ return null;
+ }
+
+ String[] retbuf = new String[hosts.size()];
+ for (int i = 0; i < hosts.size(); i++) {
+ retbuf[i] = (String) (hosts.elementAt(i));
+ }
+
+ return retbuf;
+ }
+
+// Testing the signature computation
+// public static void main(String[] args) {
+// /* String testVector = " Test Vectors: \n"+
+// "\n" +
+// "1. http:www.nodg.com@www.geocities.com/nxcisdsfdfdsy/off\n"+
+// "2. http:www.ksleybiuh.com@213.171.60.74/getoff/\n"+
+// "3. <http:links.verotel.com/cgi-bin/showsite.verotel?vercode=12372:9804000000374206>\n"+
+// "4. http:217.12.4.7/rmi/http:definethis.net/526/index.html\n"+
+// "5. http:magalygr8sex.free-host.com/h.html\n"+
+// "6. http:%3CVenkatrs%3E@218.80.74.102/thecard/4index.htm\n"+
+// "7. http:EBCDVKIGURGGCEOKXHINOCANVQOIDOXJWTWGPC@218.80.74.102/thecard/5in\n"+
+// "8. http:g.india2.bag.gs/remove_page.htm\n"+
+// "9. https:220.97.40.149\n"+
+// "10. http:mjaked.biz/unsubscribe.ddd?leaving\n"+
+// "11. http:g5j99m8@it.rd.yahoo.com/bassi/*http:www.lekobas.com/c/index.php\n"+
+// "12. <a href=\"http:Chettxuydyhv vwyyrcmgbxzj n as ecq kkurxtrvaug nfsygjjjwhfkpaklh t a qsc exinscfjtxr\n"+
+// " jobg @www.mmv9.org?affil=19\">look great / feel great</a>\n"+
+// "13. <A HREF=\"http:href=www.churchwomen.comhref=www.cairn.nethref=www.teeter.orghref=www.lefty.bizhref=wwwbehold.pitfall@www.mmstong5f.com/host/index.asp?ID=01910?href=www.corrode.comhref=www.ode.nethref=www.clergy.orghref=www.aberrate.biz\" >\n"+
+// "14. www.pillzthatwork.com # anything that starts with www. \n";
+// */
+// String testVector = "<html>\n"+
+// "<body>\n"+
+// "<p>Our first autolink: www.autolink1.com or another link like www.autolink2.co.uk or how about https:plaintextlink1.co.uk or http:plaintextlink2.com</p>\n"+
+// "<p>now a masked link <a href=\"http://www.hiddenlink1.com\">http://www.coveringlink1.com</a> and another link http:plaintextlink3.net and how about https:plaintextlink4.to</p>\n"+
+// "<p>another masked link <A Href=\"http://www.hiddenlink2.com\">https:coveringlink2.com</A> and another link https:plaintextlink5.com</p>\n"+
+// "</body>\n"+
+// "</html>\n";
+// String test1 = "Our first autolink: www.autolink1.com or another link like www.autolink2.co.uk or how about https:plaintextlink1.co.uk or http:plaintextlink2.com</p>\n";
+// WhiplashSignature whiplash = new WhiplashSignature();
+// String[] hosts = whiplash.computeSignature(testVector);
+// //String[] hosts = whiplash.computeSignature(test1);
+// for (int i = 0; i < hosts.length; i++) {
+// String string = hosts[i];
+// System.out.println("host " + i + ":\t" + string);
+// }
+// }
+
+}
--- /dev/null
+MAINCLASS=SpamFilter
+SRC=${MAINCLASS}.java \
+ DistributedHashMap.java \
+ Mail.java \
+ FilterResult.java \
+ HashEntry.java \
+ HashStat.java \
+ SignatureComputer.java \
+ FilterStatistic.java \
+ EphemeralSignature.java \
+ GString.java \
+ WhiplashSignature.java
+
+FLAGS=-optimize -thread -mainclass ${MAINCLASS}
+
+default:
+ ../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC}
+
+clean:
+ rm -rf tmpbuilddirectory*
+ rm *.bin
this.offset=0;
}
- public static char[] toLocalCharArray(GString str) {
- char[] c;
- int length;
-
- length = str.length();
-
- c = new char[length];
-
- for (int i = 0; i < length; i++) {
- c[i] = str.value[i+str.offset];
- }
- return c;
- }
-
- public String toLocalString() {
- return new String(toLocalCharArray(this));
- }
-
public int length() {
return count;
}
correct =0;
wrong = 0;
for(int j=0; j<nemails; j++) {
- // long start = System.currentTimeMillis();
+ //long start = System.currentTimeMillis();
int pickemail = rand.nextInt(100);
-// System.out.println("pickemail= " + pickemail);
+ //System.out.println("pickemail= " + pickemail);
// randomly pick emails
pickemail+=1;
//check with global data structure
int[] confidenceVals=null;
- // long startcheck = System.currentTimeMillis();
+ //long startcheck = System.currentTimeMillis();
atomic {
confidenceVals = check(signatures,thid);
}
- // long stopcheckMail = System.currentTimeMillis();
- // long diff = (stopcheckMail-startcheck);
- // System.out.println("check takes= " + diff + "millisecs");
+ //long stopcheckMail = System.currentTimeMillis();
+ //long diff = (stopcheckMail-startcheck);
+ //System.out.println("check takes= " + diff + "millisecs");
/* Only for debugging
for(int k=0; k<signatures.size();k++) {
-128.195.180.21
-128.195.180.26
-#128.195.136.162
-#128.195.136.163
-#128.195.136.164
-#128.195.136.165
-#128.195.136.166
-#128.195.136.167
-#128.195.136.168
-#128.195.136.169
-
-
+128.195.136.162
GString.java \
WhiplashSignature.java
-FLAGS= -dsm -recoverystats -recovery -nooptimize -mainclass ${MAINCLASS}
+FLAGS= -prefetch -dsmcaching -dsm -recoverystats -recovery -optimize -mainclass ${MAINCLASS}
default:
../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC}
SRC1=${MAINCLASS}.java
SRC2=Global${SUBCLASS}.java
SRC3=${SUBCLASS}Task.java
-FLAGS= -recoverystats -recovery -dsm -dsmtask -nooptimize -mainclass ${MAINCLASS}
+FLAGS= -recoverystats -recovery -dsmcaching -dsm -dsmtask -optimize -mainclass ${MAINCLASS}
default:
../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC1}