From 2f5a04f6b4d4dd70d32800e53e725410aecfe70a Mon Sep 17 00:00:00 2001 From: adash Date: Mon, 2 Nov 2009 20:55:47 +0000 Subject: [PATCH] add MD5 hash file and EphemeralSignature some more changes --- .../SpamFilter/DistributedHashMap.java | 93 ++-- .../SpamFilter/EphemeralSignature.java | 38 +- .../Distributed/SpamFilter/HashStat.java | 12 + .../Distributed/SpamFilter/MD5.java | 424 ++++++++++++++++++ .../Distributed/SpamFilter/Mail.java | 8 + .../Distributed/SpamFilter/SpamFilter.java | 69 ++- 6 files changed, 579 insertions(+), 65 deletions(-) create mode 100644 Robust/src/Benchmarks/Distributed/SpamFilter/MD5.java diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/DistributedHashMap.java b/Robust/src/Benchmarks/Distributed/SpamFilter/DistributedHashMap.java index f8601118..5b785723 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/DistributedHashMap.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/DistributedHashMap.java @@ -1,16 +1,14 @@ public class DistributedHashMap { DistributedHashEntry[] table; float loadFactor; - int secondcapacity; - public DistributedHashMap(int initialCapacity, int secondcapacity, float loadFactor) { - init(initialCapacity, secondcapacity, loadFactor); + public DistributedHashMap(int initialCapacity, float loadFactor) { + init(initialCapacity, loadFactor); } - private void init(int initialCapacity, int secondcapacity, float loadFactor) { + private void init(int initialCapacity, float loadFactor) { table=global new DistributedHashEntry[initialCapacity]; this.loadFactor=loadFactor; - this.secondcapacity=secondcapacity; } private static int hash1(int hashcode, int length) { @@ -21,30 +19,8 @@ public class DistributedHashMap { return value; } - private static int hash2(int hashcode, int length1, int length2) { - int value=(hashcode*31)%length2; - if (value<0) - return -value; - else - return value; - } - - void resize(int index) { - DHashEntry[] oldtable=table[index].array; - int newCapacity=oldtable.length*2+1; - DHashEntry [] newtable=global new DHashEntry[newCapacity]; - table[index].array=newtable; - - for(int i=0; i(loadFactor*dhe.array.length)) { - //Resize the table - resize(index1); - } return null; } } - class DistributedHashEntry { - public DistributedHashEntry(int capacity) { - array=global new DHashEntry[capacity]; + public DistributedHashEntry() { } int count; - DHashEntry[] array; + DHashEntry array; } + class DHashEntry { + public DHashEntry() { + } int hashval; Object key; Object value; DHashEntry next; - public DHashEntry() { - } } diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/EphemeralSignature.java b/Robust/src/Benchmarks/Distributed/SpamFilter/EphemeralSignature.java index de47920f..b5a80a5a 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/EphemeralSignature.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/EphemeralSignature.java @@ -1,15 +1,51 @@ public class EphemeralSignature { + + private int serverSeed; + private String serverSeparator; + Random rand; + public EphemeralSignature() { + Random rand = new Random(0); } public EphemeralSignature(int randomNumberSeed, String separator) { + Random rand = new Random(randomNumberSeed); + serverSeparator = separator; } public EphemeralSignature(String seedAndSeparator) { + serverSeparator = seedAndSeparator; + } + + public String computeSignature(String body) { + MD5 md = new MD5(); + int len = body.length(); + byte buf[] = body.getBytes(); + byte sig[] = new byte[16]; + + md.update(buf, len); + md.md5final(sig); + String signature = new String(sig); + return signature; } - public computeSignature(String mail) { + private String computeHexDigest(String body) { + return + } + + /* + public long DEKHash(String str) + { + long hash = str.length(); + for(int i = 0; i < str.length(); i++) + { + hash = ((hash << 5) ^ (hash >> 27)) ^ str.charAt(i); + } + + return hash; } + */ + } diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/HashStat.java b/Robust/src/Benchmarks/Distributed/SpamFilter/HashStat.java index 92143eeb..d8d2b58b 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/HashStat.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/HashStat.java @@ -17,6 +17,10 @@ public class HashStat { userstat[id].setUnknown(unknown); } + public void setuserid(int id) { + userid[id] = 1; + } + public int getuser(int id) { return userid[id]; } @@ -33,6 +37,14 @@ public class HashStat { return userstat[userid].getUnknown(); } + public void incSpamCount(int userid) { + userstat[userid].increaseSpam(); + } + + public void incHamCount(int userid) { + userstat[userid].increaseHam(); + } + public int[] getUsers() { int nusers = numUsers(); listofusers = new int[nusers]; diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/MD5.java b/Robust/src/Benchmarks/Distributed/SpamFilter/MD5.java new file mode 100644 index 00000000..a744bbea --- /dev/null +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/MD5.java @@ -0,0 +1,424 @@ + +// This class computes MD5 hashes. +// Manually translated by Jon Howell +// from some public domain C code (md5.c) included with the ssh-1.2.22 source. +// Tue Jan 19 15:55:50 EST 1999 +// $Id: MD5.java,v 1.1 2009/11/02 20:55:47 adash Exp $ +// +// To compute the message digest of a chunk of bytes, create an +// MD5 object 'md5', call md5.update() as needed on buffers full +// of bytes, and then call md5.md5final(), which +// will fill a supplied 16-byte array with the digest. +// +// A main() method is included that hashes the data on System.in. +// +// It seems to run around 25-30 times slower (JDK1.1.6) than optimized C +// (gcc -O4, version 2.7.2.3). Measured on a Sun Ultra 5 (SPARC 270MHz). +// +// Comments from md5.c from ssh-1.2.22, the basis for this code: +// +/* This code has been heavily hacked by Tatu Ylonen to + make it compile on machines like Cray that don't have a 32 bit integer + type. */ +/* + * This code implements the MD5 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. + * + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. + * + * To compute the message digest of a chunk of bytes, declare an + * MD5Context structure, pass it to MD5Init, call MD5Update as + * needed on buffers full of bytes, and then call MD5Final, which + * will fill a supplied 16-byte array with the digest. + */ + +public class MD5 { + int buf[]; // These were originally unsigned ints. + // This Java code makes an effort to avoid sign traps. + // buf[] is where the hash accumulates. + long bits; // This is the count of bits hashed so far. + byte in[]; // This is a buffer where we stash bytes until we have + // enough (64) to perform a transform operation. + int inint[]; + // inint[] used and discarded inside transform(), + // but why allocate it over and over? + // (In the C version this is allocated on the stack.) + + public MD5() { + buf = new int[4]; + // fill the hash accumulator with a seed value + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + + // initially, we've hashed zero bits + bits = 0L; + + in = new byte[64]; + inint = new int[16]; + } + + public void update(byte[] newbuf) { + update(newbuf, 0, newbuf.length); + } + + public void update(byte[] newbuf, int length) { + update(newbuf, 0, length); + } + + public void update(byte[] newbuf, int bufstart, int buflen) { + int t; + int len = buflen; + + // shash old bits value for the "Bytes already in" computation + // just below. + t = (int) bits; // (int) cast should just drop high bits, I hope + + /* update bitcount */ + /* the C code used two 32-bit ints separately, and carefully + * ensured that the carry carried. + * Java has a 64-bit long, which is just what the code really wants. + */ + bits += (long)(len<<3); + + t = (t >>> 3) & 0x3f; /* Bytes already in this->in */ + + /* Handle any leading odd-sized chunks */ + /* (that is, any left-over chunk left by last update() */ + + if (t!=0) { + int p = t; + t = 64 - t; + if (len < t) { + arraycopy(newbuf, bufstart, in, p, len); + return; + } + arraycopy(newbuf, bufstart, in, p, t); + transform(); + bufstart += t; + len -= t; + } + + /* Process data in 64-byte chunks */ + while (len >= 64) { + arraycopy(newbuf, bufstart, in, 0, 64); + transform(); + bufstart += 64; + len -= 64; + } + + /* Handle any remaining bytes of data. */ + /* that is, stash them for the next update(). */ + arraycopy(newbuf, bufstart, in, 0, len); + } + + public void arraycopy(byte[] src, int srcPos, byte[] dest, int destPos, int len) { + for (int i = 0; i < len; i++) { + dest[destPos+i] = src[srcPos+i]; + } + return; + } + + /* + * Final wrapup - pad to 64-byte boundary with the bit pattern + * 1 0* (64-bit count of bits processed, MSB-first) + */ + public void md5final(byte[] digest) { + /* "final" is a poor method name in Java. :v) */ + int count; + int p; // in original code, this is a pointer; in this java code + // it's an index into the array this->in. + + /* Compute number of bytes mod 64 */ + count = (int) ((bits >>> 3) & 0x3F); + + /* Set the first char of padding to 0x80. This is safe since there is + always at least one byte free */ + p = count; + in[p++] = (byte) 0x80; + + /* Bytes of padding needed to make 64 bytes */ + count = 64 - 1 - count; + + /* Pad out to 56 mod 64 */ + if (count < 8) { + /* Two lots of padding: Pad the first block to 64 bytes */ + zeroByteArray(in, p, count); + transform(); + + /* Now fill the next block with 56 bytes */ + zeroByteArray(in, 0, 56); + } else { + /* Pad block to 56 bytes */ + zeroByteArray(in, p, count - 8); + } + + /* Append length in bits and transform */ + // Could use a PUT_64BIT... func here. This is a fairly + // direct translation from the C code, where bits was an array + // of two 32-bit ints. + int lowbits = (int) bits; + int highbits = (int) (bits >>> 32); + PUT_32BIT_LSB_FIRST(in, 56, lowbits); + PUT_32BIT_LSB_FIRST(in, 60, highbits); + + transform(); + PUT_32BIT_LSB_FIRST(digest, 0, buf[0]); + PUT_32BIT_LSB_FIRST(digest, 4, buf[1]); + PUT_32BIT_LSB_FIRST(digest, 8, buf[2]); + PUT_32BIT_LSB_FIRST(digest, 12, buf[3]); + + /* zero sensitive data */ + /* notice this misses any sneaking out on the stack. The C + * version uses registers in some spots, perhaps because + * they care about this. + */ + zeroByteArray(in); + zeroIntArray(buf); + bits = 0; + zeroIntArray(inint); + } + + /* + public static void main(String args[]) { + // This main() method was created to easily test + // this class. It hashes whatever's on System.in. + + byte buf[] = new byte[397]; + // arbitrary buffer length designed to irritate update() + int rc; + MD5 md = new MD5(); + byte out[] = new byte[16]; + int i; + int len = 0; + + try { + while ((rc = System.in.read(buf, 0, 397)) > 0) { + md.update(buf, rc); + len += rc; + } + } catch (IOException ex) { + ex.printStackTrace(); + return; + } + md.md5final(out); + + System.out.println("file length: "+len); + System.out.println("hash: "+dumpBytes(out)); + } + */ + + + ///////////////////////////////////////////////////////////////////// + // Below here ye will only finde private functions // + ///////////////////////////////////////////////////////////////////// + + // There must be a way to do these functions that's + // built into Java, and I just haven't noticed it yet. + + private void zeroByteArray(byte[] a) { + zeroByteArray(a, 0, a.length); + } + + private void zeroByteArray(byte[] a, int start, int length) { + setByteArray(a, (byte) 0, start, length); + } + + private void setByteArray(byte[] a, byte val, int start, int length) { + int i; + int end = start+length; + for (i=start; i>>(32-s); + w += x; + return w; + } + + private int MD5STEP2(int w, int x, int y, int z, int data, int s) { + w += (y ^ (z & (x ^ y))) + data; + w = w<>>(32-s); + w += x; + return w; + } + + private int MD5STEP3(int w, int x, int y, int z, int data, int s) { + w += (x ^ y ^ z) + data; + w = w<>>(32-s); + w += x; + return w; + } + + private int MD5STEP4(int w, int x, int y, int z, int data, int s) { + w += (y ^ (x | ~z)) + data; + w = w<>>(32-s); + w += x; + return w; + } + + private void transform() { + /* load in[] byte array into an internal int array */ + int i; + int[] inint = new int[16]; + + for (i=0; i<16; i++) { + inint[i] = GET_32BIT_LSB_FIRST(in, 4*i); + } + + int a, b, c, d; + a = buf[0]; + b = buf[1]; + c = buf[2]; + d = buf[3]; + + a = MD5STEP1(a, b, c, d, inint[0] + 0xd76aa478, 7); + d = MD5STEP1(d, a, b, c, inint[1] + 0xe8c7b756, 12); + c = MD5STEP1(c, d, a, b, inint[2] + 0x242070db, 17); + b = MD5STEP1(b, c, d, a, inint[3] + 0xc1bdceee, 22); + a = MD5STEP1(a, b, c, d, inint[4] + 0xf57c0faf, 7); + d = MD5STEP1(d, a, b, c, inint[5] + 0x4787c62a, 12); + c = MD5STEP1(c, d, a, b, inint[6] + 0xa8304613, 17); + b = MD5STEP1(b, c, d, a, inint[7] + 0xfd469501, 22); + a = MD5STEP1(a, b, c, d, inint[8] + 0x698098d8, 7); + d = MD5STEP1(d, a, b, c, inint[9] + 0x8b44f7af, 12); + c = MD5STEP1(c, d, a, b, inint[10] + 0xffff5bb1, 17); + b = MD5STEP1(b, c, d, a, inint[11] + 0x895cd7be, 22); + a = MD5STEP1(a, b, c, d, inint[12] + 0x6b901122, 7); + d = MD5STEP1(d, a, b, c, inint[13] + 0xfd987193, 12); + c = MD5STEP1(c, d, a, b, inint[14] + 0xa679438e, 17); + b = MD5STEP1(b, c, d, a, inint[15] + 0x49b40821, 22); + + a = MD5STEP2(a, b, c, d, inint[1] + 0xf61e2562, 5); + d = MD5STEP2(d, a, b, c, inint[6] + 0xc040b340, 9); + c = MD5STEP2(c, d, a, b, inint[11] + 0x265e5a51, 14); + b = MD5STEP2(b, c, d, a, inint[0] + 0xe9b6c7aa, 20); + a = MD5STEP2(a, b, c, d, inint[5] + 0xd62f105d, 5); + d = MD5STEP2(d, a, b, c, inint[10] + 0x02441453, 9); + c = MD5STEP2(c, d, a, b, inint[15] + 0xd8a1e681, 14); + b = MD5STEP2(b, c, d, a, inint[4] + 0xe7d3fbc8, 20); + a = MD5STEP2(a, b, c, d, inint[9] + 0x21e1cde6, 5); + d = MD5STEP2(d, a, b, c, inint[14] + 0xc33707d6, 9); + c = MD5STEP2(c, d, a, b, inint[3] + 0xf4d50d87, 14); + b = MD5STEP2(b, c, d, a, inint[8] + 0x455a14ed, 20); + a = MD5STEP2(a, b, c, d, inint[13] + 0xa9e3e905, 5); + d = MD5STEP2(d, a, b, c, inint[2] + 0xfcefa3f8, 9); + c = MD5STEP2(c, d, a, b, inint[7] + 0x676f02d9, 14); + b = MD5STEP2(b, c, d, a, inint[12] + 0x8d2a4c8a, 20); + + a = MD5STEP3(a, b, c, d, inint[5] + 0xfffa3942, 4); + d = MD5STEP3(d, a, b, c, inint[8] + 0x8771f681, 11); + c = MD5STEP3(c, d, a, b, inint[11] + 0x6d9d6122, 16); + b = MD5STEP3(b, c, d, a, inint[14] + 0xfde5380c, 23); + a = MD5STEP3(a, b, c, d, inint[1] + 0xa4beea44, 4); + d = MD5STEP3(d, a, b, c, inint[4] + 0x4bdecfa9, 11); + c = MD5STEP3(c, d, a, b, inint[7] + 0xf6bb4b60, 16); + b = MD5STEP3(b, c, d, a, inint[10] + 0xbebfbc70, 23); + a = MD5STEP3(a, b, c, d, inint[13] + 0x289b7ec6, 4); + d = MD5STEP3(d, a, b, c, inint[0] + 0xeaa127fa, 11); + c = MD5STEP3(c, d, a, b, inint[3] + 0xd4ef3085, 16); + b = MD5STEP3(b, c, d, a, inint[6] + 0x04881d05, 23); + a = MD5STEP3(a, b, c, d, inint[9] + 0xd9d4d039, 4); + d = MD5STEP3(d, a, b, c, inint[12] + 0xe6db99e5, 11); + c = MD5STEP3(c, d, a, b, inint[15] + 0x1fa27cf8, 16); + b = MD5STEP3(b, c, d, a, inint[2] + 0xc4ac5665, 23); + + a = MD5STEP4(a, b, c, d, inint[0] + 0xf4292244, 6); + d = MD5STEP4(d, a, b, c, inint[7] + 0x432aff97, 10); + c = MD5STEP4(c, d, a, b, inint[14] + 0xab9423a7, 15); + b = MD5STEP4(b, c, d, a, inint[5] + 0xfc93a039, 21); + a = MD5STEP4(a, b, c, d, inint[12] + 0x655b59c3, 6); + d = MD5STEP4(d, a, b, c, inint[3] + 0x8f0ccc92, 10); + c = MD5STEP4(c, d, a, b, inint[10] + 0xffeff47d, 15); + b = MD5STEP4(b, c, d, a, inint[1] + 0x85845dd1, 21); + a = MD5STEP4(a, b, c, d, inint[8] + 0x6fa87e4f, 6); + d = MD5STEP4(d, a, b, c, inint[15] + 0xfe2ce6e0, 10); + c = MD5STEP4(c, d, a, b, inint[6] + 0xa3014314, 15); + b = MD5STEP4(b, c, d, a, inint[13] + 0x4e0811a1, 21); + a = MD5STEP4(a, b, c, d, inint[4] + 0xf7537e82, 6); + d = MD5STEP4(d, a, b, c, inint[11] + 0xbd3af235, 10); + c = MD5STEP4(c, d, a, b, inint[2] + 0x2ad7d2bb, 15); + b = MD5STEP4(b, c, d, a, inint[9] + 0xeb86d391, 21); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; + } + + private int GET_32BIT_LSB_FIRST(byte[] b, int off) { + return + ((int)(b[off+0]&0xff)) | + ((int)(b[off+1]&0xff) << 8) | + ((int)(b[off+2]&0xff) << 16) | + ((int)(b[off+3]&0xff) << 24); + } + + private void PUT_32BIT_LSB_FIRST(byte[] b, int off, int value) { + b[off+0] = (byte) (value & 0xff); + b[off+1] = (byte) ((value >> 8) & 0xff); + b[off+2] = (byte) ((value >> 16)& 0xff); + b[off+3] = (byte) ((value >> 24)& 0xff); + } + + // These are debug routines I was using while trying to + // get this code to generate the same hashes as the C version. + // (IIRC, all the errors were due to the absence of unsigned + // ints in Java.) + /* + private void debugStatus(String m) { + System.out.println(m+":"); + System.out.println("in: "+dumpBytes(in)); + System.out.println("bits: "+bits); + System.out.println("buf: " + +Integer.toHexString(buf[0])+" " + +Integer.toHexString(buf[1])+" " + +Integer.toHexString(buf[2])+" " + +Integer.toHexString(buf[3])); + } + + private static String dumpBytes(byte[] bytes) { + int i; + StringBuffer sb = new StringBuffer(); + for (i=0; i 2) { + s = s.substring(s.length()-2); + } + sb.append(s); + } + return sb.toString(); + } + */ +} diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/Mail.java b/Robust/src/Benchmarks/Distributed/SpamFilter/Mail.java index 61fee691..e14633e7 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/Mail.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/Mail.java @@ -74,6 +74,7 @@ public class Mail { } + /* public void setSentOn(String sentOn) { this.sentOn = sentOn; } @@ -99,6 +100,7 @@ public class Mail { String receivedOn = getReceivedOn(); return parseDate(receivedOn); } + */ /** @@ -196,9 +198,15 @@ public class Mail { return getEncoding().toLowerCase().indexOf("html") >= 0; } + /* public String toString() { return getBody() + "," + getCc() + "," + getEncoding() + "," + getFrom() + "," + getHasAttachement() + "," + getHeader() + "," + getReceivedOn() + "," + getSentOn() + "," + getSourceCode() + "," + getSubject() + "," + getTo(); } + */ + + public String toString() { + return getBody() + "," + getCc() + "," + getEncoding() + "," + getFrom() + "," + getHasAttachement() + "," + getHeader() + "," + getSourceCode() + "," + getSubject() + "," + getTo(); + } /* public String getID() { diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java b/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java index 600dcea6..1002c811 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java @@ -33,19 +33,18 @@ public class SpamFilter extends Thread { for(int i=0; i the mail client is able to determine if it is spam or not return confidenceVals; } + + public void sendFeedBack(Mail mail, boolean isSpam, int id) { + Vector partsOfMailStrings = mail.getCommonPart(); + partsOfMailStrings.addElement(mail.getBodyString()); + //Compute signatures + SignatureComputer sigComp = new SignatureComputer(); + Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings + + for(int i=0;i