java - Difficulty with Caesar Cipher Chi-Squared -
in case unfamiliar cipher. idea able encode message shifting letters in alphabet. ex. d shift 3 -> a. able decode message using crack method. crack method makes array full of chi square values (compared natural alphabetical frequencies in table) every possible shift (its index) , checks see shift has smallest value. takes value , decodes using shift amount.
the problem, getting smallest chi square value in wrong address of array able decode accurately. i'm not looking give me answer, should in code make correction.
thanks in advance.
import java.util.arrays; public class cipher { //alphabet frequency static double[] table = {8.2, 1.5, 2.8, 4.3, 12.7, 2.2, 2.0, 6.1, 7.0, 0.2, 0.8, 4.0, 2.4, 6.7, 7.5, 1.9, 0.1, 6.0, 6.3, 9.1, 2.8, 1.0, 2.4, 0.2, 2.0, 0.1}; //convert letter number static int let2nat(char c) { return ((int) c) - 97; } //convert number letter static char nat2let(int code) { return (char) (code + 97); } //shift letter letter shftamt spaces away static char shift(int shftamt, char c) { if (let2nat(c) < 0 || let2nat(c) > 25) return c; return nat2let((let2nat(c) + shftamt) % 26); } //encodes string using given shift amount static string encode(int shftamt, string str) { char[] encodedstr = new char[str.length()]; for(int = 0; < str.length(); i++) encodedstr[i] = shift(shftamt, str.charat(i)); return new string(encodedstr); } //performs inverse method encode static string decode(int shftamt, string str) { char[] decodedstr = new char[str.length()]; for(int = 0; < str.length(); i++) decodedstr[i] = shift(0 - shftamt, str.charat(i)); return new string(decodedstr); } //determines how many lowercase letters in string str static int lowers(string str) { int count = 0; for(int = 0; < str.length(); i++) if(let2nat(str.charat(i)) >= 0 && let2nat(str.charat(i)) <= 25) count++; return count; } //calculates number of character in string static int count(char c, string str) { int counter = 0; for(int = 0; < str.length(); i++) if(c == str.charat(i)) counter++; return counter; } //calculates percent off num1 num2 static double percent(int num1, int num2) { return ((double) num1/num2 * 100); } //find ratio frequency of letters in string str static double[] freqs(string str) { double[] count = new double[26]; for(int = 0; < str.length(); i++) if(let2nat(str.charat(i)) >= 0 && let2nat(str.charat(i)) <= 25) count[let2nat(str.charat(i))]++; for(int = 0; < 26; i++) count[i] = percent((int)count[i], lowers(str)); return count; } //rotates list n places left static double[] rotate(int n, double[] list) { int j = 0; while(j<n){ double starter = list[0]; //shift 1 left for(int = 0; < list.length-1; i++) list[i] = list[i+1]; list[list.length-1] = starter; j++; } return list; } //calculates chi square value static double chisqr(double[] os) { double chitotal = 0; for(int = 0; < os.length; i++) chitotal += ((math.pow((os[i] - table[i]), 2)) / table[i]); return chitotal; } //returns first position @ whcih value occurs,if returns 999999 doesnt exist in array static int position(double a, double[] list) { for(int = 0; < list.length; i++) if(list[i] == a) return i; return 999999; } static string crack(string str) { double[] frequencies = freqs(str); double[] chisqrvalues = new double[26]; for(int = 0; < 26; i++) chisqrvalues[i] = chisqr(rotate(i, frequencies)); int smallestindex = 0; for(int = 1; < 26; i++) if(chisqrvalues[i] < chisqrvalues[smallestindex]) smallestindex = i; return decode(smallestindex, str); } public static void main(string[] args) { system.out.println(crack(encode(3, "haskellisfun"))); } }
as mentioned in comment, code shifting wrong when negative numbers used index parameter, , because of how java handles negative values when %
operator used. safe, should like:
// shift letter letter shftamt spaces away static char shift(int shftamt, char c) { if (let2nat(c) < 0 || let2nat(c) > z_to_a - 1) { return c; } else { // safe shift int result = (let2nat(c) + shftamt) % z_to_a; result += z_to_a; result %= z_to_a; return nat2let(result); } }
but mentioned in comments, isn't cause of bug, , find bug, should test each method, best unit testing, or if not, your own test methods, sure they're behaving expected, , if not, @ least allowing isolate bug put half way towards solving problem.
as aside, own take on problem not ignore upper case letters code does, since part of text letter frequency, should used when calculating our own letter frequency. work well:
/** * * @author hovercraft * {@link http://stackoverflow.com/q/31303332/522444} */ public class cipher2 { // ban "magic" numbers public static final int a_value = (int) 'a'; public static final int z_to_a = (int) ('z' - 'a') + 1; // alphabet frequency static double[] table = { 8.2, 1.5, 2.8, 4.3, 12.7, 2.2, 2.0, 6.1, 7.0, 0.2, 0.8, 4.0, 2.4, 6.7, 7.5, 1.9, 0.1, 6.0, 6.3, 9.1, 2.8, 1.0, 2.4, 0.2, 2.0, 0.1 }; // convert letter number static int let2nat(char c) { return ((int) c) - a_value; } // convert number letter static char nat2let(int code) { return (char) (code + a_value); } // shift letter letter shftamt spaces away static char shift(int shftamt, char c) { if (let2nat(c) < 0 || let2nat(c) > z_to_a - 1) { return c; } else { // safe shift int result = (let2nat(c) + shftamt) % z_to_a; result += z_to_a; result %= z_to_a; return nat2let(result); } } private static string crack(string encrypted) { int[] lettercount = new int[z_to_a]; (char c : encrypted.tochararray()) { lettercount[let2nat(c)]++; } double[] letterfrequency = new double[z_to_a]; (int = 0; < lettercount.length; i++) { letterfrequency[i] = (double) lettercount[i] * 100 / z_to_a; } int index = 0; double minchisqrsum = calcchisqrsum(index, letterfrequency); (int = 0; < z_to_a; i++) { double chisqrsum = calcchisqrsum(i, letterfrequency); if (chisqrsum < minchisqrsum) { minchisqrsum = chisqrsum; index = i; } } return encode(index, encrypted); } private static double calcchisqrsum(int i, double[] letterfrequency) { double sum = 0.0; (int j = 0; j < letterfrequency.length; j++) { double observed = letterfrequency[j]; int tableindex = (i + j + z_to_a) % z_to_a; double expected = table[tableindex]; double delta = observed - expected; double chisqr = delta * delta / expected; sum += chisqr; } return sum; } private static string encode(int shift, string text) { stringbuilder sb = new stringbuilder(); (char c : text.tochararray()) { // convert upper case lower case // judgment call on part c = character.tolowercase(c); if (let2nat(c) >= 0 && let2nat(c) < z_to_a) { sb.append(shift(shift, c)); } } return sb.tostring(); } public static void main(string[] args) { string test1 = "hello world. how going? it's going fine me"; string test2 = "when, in course of human events, becomes " + "necessary 1 portion of family of man assume " + "among people of earth position different " + "that have hitherto occupied, 1 " + "the laws of nature , of nature's god entitle them, " + "a decent respect opinions of mankind"; // let's throw complete loop: ... works! string test3 = "lorem ipsum dolor sit amet, consectetur adipiscing " + "elit, sed eiusmod tempor incididunt ut labore et dolore " + "magna aliqua. ut enim ad minim veniam, quis nostrud " + "exercitation ullamco laboris nisi ut aliquip ex ea commodo " + "consequat. duis aute irure dolor in reprehenderit in " + "voluptate velit esse cillum dolore eu fugiat nulla pariatur. " + "excepteur sint occaecat cupidatat non proident, sunt in " + "culpa qui officia deserunt mollit anim id est laborum."; // unfair test: string test4 = "abcdefghijklmnopqrstuvwxyz"; string test5 = "haskellisfun"; string[] tests = { test1, test2, test3, test4, test5 }; (string test : tests) { system.out.println(crack(encode(6, test))); } } }
Comments
Post a Comment