Vigenère cipher/Cryptanalysis: Difference between revisions
(Shorter D code) |
(Cleaned D code) |
||
Line 286: | Line 286: | ||
{{works with|DMD 2.054}} |
{{works with|DMD 2.054}} |
||
{{trans|C++}} |
{{trans|C++}} |
||
<lang d>import std.stdio, |
<lang d>import std.stdio,std.algorithm,std.typecons,std.string,std.array; |
||
string[2] vigenereDecrypt(in double[] targetFreqs, string input) { |
string[2] vigenereDecrypt(in double[] targetFreqs, string input) { |
||
enum |
enum nalpha = uppercase.length; |
||
double[] sortedTargets = targetFreqs.dup; |
double[] sortedTargets = targetFreqs.dup; |
||
sortedTargets.sort(); |
sortedTargets.sort(); |
||
Tuple!(char, double)[] frequency(string input) { |
Tuple!(char, double)[] frequency(string input) { |
||
auto |
auto freqs = new Tuple!(char, double)[nalpha]; |
||
foreach (i, char c; uppercase) |
foreach (i, char c; uppercase) |
||
freqs[i] = tuple(c, 0.0); |
|||
foreach (c; input) |
foreach (c; input) |
||
freqs[c - 'A'][1]++; |
|||
return |
return freqs; |
||
} |
} |
||
double correlation(string input) { |
double correlation(string input) { |
||
⚫ | |||
auto freq = frequency(input); |
auto freq = frequency(input); |
||
sort!q{a[1] < b[1]}(freq); // slow |
sort!q{ a[1] < b[1] }(freq); // slow |
||
⚫ | |||
foreach (i, f; freq) |
foreach (i, f; freq) |
||
corr += f[1] * sortedTargets[i]; |
|||
return |
return corr; |
||
} |
} |
||
const string cleaned = input.toupper().removechars("^A-Z"); |
const string cleaned = input.toupper().removechars("^A-Z"); |
||
size_t bestLength = 0; |
size_t bestLength = 0; |
||
double bestCorr = -100.0; |
double bestCorr = -100.0; |
||
// Assume that if there are less than 20 characters |
// Assume that if there are less than 20 characters |
||
// per column, the key's too long to guess |
// per column, the key's too long to guess |
||
Line 323: | Line 323: | ||
foreach (j, c; cleaned) |
foreach (j, c; cleaned) |
||
pieces[j % i].put(c); |
pieces[j % i].put(c); |
||
// The correlation seems to increase for smaller |
// The correlation seems to increase for smaller |
||
// pieces/longer keys, so weigh against them a little |
// pieces/longer keys, so weigh against them a little |
||
Line 329: | Line 329: | ||
foreach (p; pieces) |
foreach (p; pieces) |
||
corr += correlation(p.data); |
corr += correlation(p.data); |
||
if (corr > bestCorr) { |
if (corr > bestCorr) { |
||
bestLength = i; |
bestLength = i; |
||
Line 335: | Line 335: | ||
} |
} |
||
} |
} |
||
if (bestLength == 0) |
if (bestLength == 0) |
||
return ["Text is too short to analyze", ""]; |
return ["Text is too short to analyze", ""]; |
||
auto pieces = new string[bestLength]; |
auto pieces = new string[bestLength]; |
||
foreach (i, c; cleaned) |
foreach (i, c; cleaned) |
||
pieces[i % bestLength] ~= c; |
pieces[i % bestLength] ~= c; |
||
auto freqs = array(map!frequency(pieces)); |
auto freqs = array(map!frequency(pieces)); |
||
string key; |
string key; |
||
foreach (i; 0 .. bestLength) { |
foreach (i; 0 .. bestLength) { |
||
sort!q{a[1] > b[1]}(freqs[i]); |
sort!q{ a[1] > b[1] }(freqs[i]); |
||
size_t m = 0; |
size_t m = 0; |
||
double maxCorr = 0.0; |
double maxCorr = 0.0; |
||
foreach (j; 0 .. |
foreach (j; 0 .. nalpha) { |
||
double corr = 0.0; |
double corr = 0.0; |
||
char c |
foreach (k, char c; uppercase) { |
||
const d = (freqs[i][k][0] - c + nalpha) % nalpha; |
|||
int d = (freqs[i][k][0] - c + nchars) % nchars; |
|||
corr += freqs[i][k][1] * targetFreqs[d]; |
corr += freqs[i][k][1] * targetFreqs[d]; |
||
} |
} |
||
if (corr > maxCorr) { |
if (corr > maxCorr) { |
||
m = j; |
m = j; |
||
Line 363: | Line 362: | ||
} |
} |
||
} |
} |
||
key ~= cast(char)(m + 'A'); |
key ~= cast(char)(m + 'A'); |
||
} |
} |
||
string |
string decoded; |
||
foreach (i, c; cleaned) |
foreach (i, c; cleaned) |
||
decoded ~= cast(char)((c-key[i % $]+ nalpha) % nalpha + 'A'); |
|||
return [key, |
return [key, decoded]; |
||
} |
} |
||
void main() { |
void main() { |
||
const encoded = " |
const encoded = " |
||
Line 393: | Line 392: | ||
BXVCB XBAWG LQKCM ICRRX MACUO IKHQU AJEGL OIJHH XPVZW JEWBA |
BXVCB XBAWG LQKCM ICRRX MACUO IKHQU AJEGL OIJHH XPVZW JEWBA |
||
FWAML ZZRXJ EKAHV FASMU LVVUT TGK"; |
FWAML ZZRXJ EKAHV FASMU LVVUT TGK"; |
||
const englishFrequences = [ |
const englishFrequences = [ |
||
0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, |
0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, |
||
Line 399: | Line 398: | ||
0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, |
0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, |
||
0.00978, 0.02360, 0.00150, 0.01974, 0.00074]; |
0.00978, 0.02360, 0.00150, 0.01974, 0.00074]; |
||
const key_dec = vigenereDecrypt(englishFrequences, encoded); |
|||
writeln("Key: ", |
writeln("Key: ", key_dec[0]); |
||
writeln("\nText: ", |
writeln("\nText: ", key_dec[1]); |
||
}</lang> |
}</lang> |
||
Revision as of 13:36, 21 June 2011
Given some text you suspect has been encrypted with a Vigenère cipher, extract the key and plaintext. There are several methods for doing this. See the Wikipedia entry for more information. Use the following encrypted text:
MOMUD EKAPV TQEFM OEVHP AJMII CDCTI FGYAG JSPXY ALUYM NSMYH VUXJE LEPXJ FXGCM JHKDZ RYICU HYPUS PGIGM OIYHF WHTCQ KMLRD ITLXZ LJFVQ GHOLW CUHLO MDSOE KTALU VYLNZ RFGBX PHVGA LWQIS FGRPH JOOFW GUBYI LAPLA LCAFA AMKLG CETDW VOELJ IKGJB XPHVG ALWQC SNWBU BYHCU HKOCE XJEYK BQKVY KIIEH GRLGH XEOLW AWFOJ ILOVV RHPKD WIHKN ATUHN VRYAQ DIVHX FHRZV QWMWV LGSHN NLVZS JLAKI FHXUF XJLXM TBLQV RXXHR FZXGV LRAJI EXPRV OSMNP KEPDT LPRWM JAZPK LQUZA ALGZX GVLKL GJTUI ITDSU REZXJ ERXZS HMPST MTEOE PAPJH SMFNB YVQUZ AALGA YDNMP AQOWT UHDBV TSMUE UIMVH QGVRW AEFSP EMPVE PKXZY WLKJA GWALT VYYOB YIXOK IHPDS EVLEV RVSGB JOGYW FHKBL GLXYA MVKIS KIEHY IMAPX UOISK PVAGN MZHPW TTZPV XFCCD TUHJH WLAPF YULTB UXJLN SIJVV YOVDJ SOLXG TGRVO SFRII CTMKO JFCQF KTINQ BWVHG TENLH HOGCS PSFPV GJOKM SIFPR ZPAAS ATPTZ FTPPD PORRF TAXZP KALQA WMIUD BWNCT LEFKO ZQDLX BUXJL ASIMR PNMBF ZCYLV WAPVF QRHZV ZGZEF KBYIO OFXYE VOWGB BXVCB XBAWG LQKCM ICRRX MACUO IKHQU AJEGL OIJHH XPVZW JEWBA FWAML ZZRXJ EKAHV FASMU LVVUT TGK
Letter frequencies for English can be found here.
Specifics for this task:
- Take only the ciphertext as input. You can assume it's all capitalized and has no punctuation, but it might have whitespace.
- Assume the plaintext is written in English.
- Find and output the key.
- Use that key to decrypt and output the original plaintext. Maintaining the whitespace from the ciphertext is optional.
- The algorithm doesn't have to be perfect (which may not be possible) but it should work when given enough ciphertext. The example above is fairly long, and should be plenty for any algorithm.
C
This finds the right key (I think, I didn't try to decode it after getting the key). The program is not fully auto, but by its output, the result is pretty obvious. <lang C>#include <stdio.h>
- include <stdlib.h>
- include <string.h>
- include <ctype.h>
- include <math.h>
char *encoded = "MOMUD EKAPV TQEFM OEVHP AJMII CDCTI FGYAG JSPXY ALUYM NSMYH" "VUXJE LEPXJ FXGCM JHKDZ RYICU HYPUS PGIGM OIYHF WHTCQ KMLRD" "ITLXZ LJFVQ GHOLW CUHLO MDSOE KTALU VYLNZ RFGBX PHVGA LWQIS" "FGRPH JOOFW GUBYI LAPLA LCAFA AMKLG CETDW VOELJ IKGJB XPHVG" "ALWQC SNWBU BYHCU HKOCE XJEYK BQKVY KIIEH GRLGH XEOLW AWFOJ" "ILOVV RHPKD WIHKN ATUHN VRYAQ DIVHX FHRZV QWMWV LGSHN NLVZS" "JLAKI FHXUF XJLXM TBLQV RXXHR FZXGV LRAJI EXPRV OSMNP KEPDT" "LPRWM JAZPK LQUZA ALGZX GVLKL GJTUI ITDSU REZXJ ERXZS HMPST" "MTEOE PAPJH SMFNB YVQUZ AALGA YDNMP AQOWT UHDBV TSMUE UIMVH" "QGVRW AEFSP EMPVE PKXZY WLKJA GWALT VYYOB YIXOK IHPDS EVLEV" "RVSGB JOGYW FHKBL GLXYA MVKIS KIEHY IMAPX UOISK PVAGN MZHPW" "TTZPV XFCCD TUHJH WLAPF YULTB UXJLN SIJVV YOVDJ SOLXG TGRVO" "SFRII CTMKO JFCQF KTINQ BWVHG TENLH HOGCS PSFPV GJOKM SIFPR" "ZPAAS ATPTZ FTPPD PORRF TAXZP KALQA WMIUD BWNCT LEFKO ZQDLX" "BUXJL ASIMR PNMBF ZCYLV WAPVF QRHZV ZGZEF KBYIO OFXYE VOWGB" "BXVCB XBAWG LQKCM ICRRX MACUO IKHQU AJEGL OIJHH XPVZW JEWBA" "FWAML ZZRXJ EKAHV FASMU LVVUT TGK";
double freq[] = {
0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, 0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749, 0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, 0.00978, 0.02360, 0.00150, 0.01974, 0.00074
};
int best_match(double *a, double *b) {
double sum = 0, fit, d, best_fit = 1e100; int i, rotate, best_rotate; for (i = 0; i < 26; i++) sum += a[i]; for (rotate = 0; rotate < 26; rotate++) { fit = 0; for (i = 0; i < 26; i++) { d = a[(i + rotate) % 26] / sum - b[i]; fit += d * d / b[i]; } if (fit < best_fit) { best_fit = fit; best_rotate = rotate; } } return best_rotate;
}
double freq_every_nth(int *msg, int len, int interval, char *key) {
double sum, d, ret, out[26], accu[26] = {0}; int i, j, rot;
for (j = 0; j < interval; j++) { for (i = 0; i < 26; i++) out[i] = 0; for (i = j; i < len; i += interval) out[msg[i]]++; key[j] = rot = best_match(out, freq); key[j] += 'A'; for (i = 0; i < 26; i++) accu[i] += out[(i + rot) % 26]; }
for (i = 0, sum = 0; i < 26; i++) sum += accu[i];
for (i = 0, ret = 0; i < 26; i++) { d = accu[i] / sum - freq[i]; ret += d * d / freq[i]; } key[interval] = '\0'; return ret;
}
int main() {
int *txt = malloc(sizeof(int) * strlen(encoded)); int len = 0, j; char key[100]; double fit, best_fit = 1e100;
for (j = 0; encoded[j] != '\0'; j++) if (isupper(encoded[j])) txt[len++] = encoded[j] - 'A';
for (j = 1; j < 30; j++) { fit = freq_every_nth(txt, len, j, key); printf("%f, key length: %2d, %s", fit, j, key); if (fit < best_fit) { best_fit = fit; printf(" <--- best so far"); } printf("\n"); }
return 0;
}</lang>
C++
Not guaranteed to give a 100% correct answer, but it works here. Requires C++0x.
<lang cpp>#include <iostream>
- include <string>
- include <vector>
- include <map>
- include <algorithm>
using namespace std;
struct VigenereAnalyser {
vector<double> targets; vector<double> sortedTargets;
VigenereAnalyser(vector<double> targetFreqs) { targets = targetFreqs; sortedTargets = targets; sort(sortedTargets.begin(), sortedTargets.end()); }
vector<pair<char, double>> frequency(string input) { vector<pair<char, double>> result(26); for (char c = 'A'; c <= 'Z'; ++c) result[c - 'A'] = make_pair(c, 0);
for (size_t i = 0; i < input.size(); ++i) result[input[i] - 'A'].second++;
return result; }
double correlation(string input) { double result = 0.0; vector<pair<char, double>> freq = frequency(input);
sort(freq.begin(), freq.end(), [](pair<char, double> u, pair<char, double> v)->bool { return u.second < v.second; });
for (size_t i = 0; i < 26; ++i) result += freq[i].second * sortedTargets[i];
return result; }
pair<string, string> analyze(string input) { string cleaned; for (size_t i = 0; i < input.size(); ++i) { if (input[i] >= 'A' && input[i] <= 'Z') cleaned += input[i]; else if (input[i] >= 'a' && input[i] <= 'z') cleaned += input[i] + 'A' - 'a'; }
size_t bestLength = 0; double bestCorr = -100.0;
// Assume that if there are less than 20 characters // per column, the key's too long to guess for (size_t i = 2; i < cleaned.size() / 20; ++i) { vector<string> pieces(i); for (size_t j = 0; j < cleaned.size(); ++j) pieces[j % i] += cleaned[j];
// The correlation seems to increase for smaller // pieces/longer keys, so weigh against them a little double corr = -0.5*i; for (size_t j = 0; j < i; ++j) corr += correlation(pieces[j]);
if (corr > bestCorr) { bestLength = i; bestCorr = corr; } }
if (bestLength == 0) return make_pair("Text is too short to analyze", "");
vector<string> pieces(bestLength); for (size_t i = 0; i < cleaned.size(); ++i) pieces[i % bestLength] += cleaned[i];
vector<vector<pair<char, double>>> freqs; for (size_t i = 0; i < bestLength; ++i) freqs.push_back(frequency(pieces[i]));
string key = ""; for (size_t i = 0; i < bestLength; ++i) { sort(freqs[i].begin(), freqs[i].end(), [](pair<char, double> u, pair<char, double> v)->bool { return u.second > v.second; });
size_t m = 0; double mCorr = 0.0; for (size_t j = 0; j < 26; ++j) { double corr = 0.0; char c = 'A' + j; for (size_t k = 0; k < 26; ++k) { int d = (freqs[i][k].first - c + 26) % 26; corr += freqs[i][k].second * targets[d]; }
if (corr > mCorr) { m = j; mCorr = corr; } }
key += m + 'A'; }
string result = ""; for (size_t i = 0; i < cleaned.size(); ++i) result += (cleaned[i] - key[i % key.length()] + 26) % 26 + 'A';
return make_pair(result, key); }
};
int main() {
string input = "MOMUD EKAPV TQEFM OEVHP AJMII CDCTI FGYAG JSPXY ALUYM NSMYH" "VUXJE LEPXJ FXGCM JHKDZ RYICU HYPUS PGIGM OIYHF WHTCQ KMLRD" "ITLXZ LJFVQ GHOLW CUHLO MDSOE KTALU VYLNZ RFGBX PHVGA LWQIS" "FGRPH JOOFW GUBYI LAPLA LCAFA AMKLG CETDW VOELJ IKGJB XPHVG" "ALWQC SNWBU BYHCU HKOCE XJEYK BQKVY KIIEH GRLGH XEOLW AWFOJ" "ILOVV RHPKD WIHKN ATUHN VRYAQ DIVHX FHRZV QWMWV LGSHN NLVZS" "JLAKI FHXUF XJLXM TBLQV RXXHR FZXGV LRAJI EXPRV OSMNP KEPDT" "LPRWM JAZPK LQUZA ALGZX GVLKL GJTUI ITDSU REZXJ ERXZS HMPST" "MTEOE PAPJH SMFNB YVQUZ AALGA YDNMP AQOWT UHDBV TSMUE UIMVH" "QGVRW AEFSP EMPVE PKXZY WLKJA GWALT VYYOB YIXOK IHPDS EVLEV" "RVSGB JOGYW FHKBL GLXYA MVKIS KIEHY IMAPX UOISK PVAGN MZHPW" "TTZPV XFCCD TUHJH WLAPF YULTB UXJLN SIJVV YOVDJ SOLXG TGRVO" "SFRII CTMKO JFCQF KTINQ BWVHG TENLH HOGCS PSFPV GJOKM SIFPR" "ZPAAS ATPTZ FTPPD PORRF TAXZP KALQA WMIUD BWNCT LEFKO ZQDLX" "BUXJL ASIMR PNMBF ZCYLV WAPVF QRHZV ZGZEF KBYIO OFXYE VOWGB" "BXVCB XBAWG LQKCM ICRRX MACUO IKHQU AJEGL OIJHH XPVZW JEWBA" "FWAML ZZRXJ EKAHV FASMU LVVUT TGK";
double fs[] = {0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, 0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749, 0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, 0.00978, 0.02360, 0.00150, 0.01974, 0.00074};
vector<double> english(&fs[0], &fs[26]); VigenereAnalyser va(english); pair<string, string> output = va.analyze(input);
cout << "Key: " << output.second << endl << endl; cout << "Text: " << output.first << endl;
}</lang>
D
<lang d>import std.stdio,std.algorithm,std.typecons,std.string,std.array;
string[2] vigenereDecrypt(in double[] targetFreqs, string input) {
enum nalpha = uppercase.length; double[] sortedTargets = targetFreqs.dup; sortedTargets.sort(); Tuple!(char, double)[] frequency(string input) { auto freqs = new Tuple!(char, double)[nalpha]; foreach (i, char c; uppercase) freqs[i] = tuple(c, 0.0); foreach (c; input) freqs[c - 'A'][1]++; return freqs; } double correlation(string input) { auto freq = frequency(input); sort!q{ a[1] < b[1] }(freq); // slow double corr = 0.0; foreach (i, f; freq) corr += f[1] * sortedTargets[i]; return corr; } const string cleaned = input.toupper().removechars("^A-Z"); size_t bestLength = 0; double bestCorr = -100.0; // Assume that if there are less than 20 characters // per column, the key's too long to guess foreach (i; 2 .. cleaned.length / 20) { auto pieces = new Appender!string[i]; foreach (j, c; cleaned) pieces[j % i].put(c); // The correlation seems to increase for smaller // pieces/longer keys, so weigh against them a little double corr = -0.5 * i; foreach (p; pieces) corr += correlation(p.data); if (corr > bestCorr) { bestLength = i; bestCorr = corr; } } if (bestLength == 0) return ["Text is too short to analyze", ""]; auto pieces = new string[bestLength]; foreach (i, c; cleaned) pieces[i % bestLength] ~= c; auto freqs = array(map!frequency(pieces)); string key; foreach (i; 0 .. bestLength) { sort!q{ a[1] > b[1] }(freqs[i]); size_t m = 0; double maxCorr = 0.0; foreach (j; 0 .. nalpha) { double corr = 0.0; foreach (k, char c; uppercase) { const d = (freqs[i][k][0] - c + nalpha) % nalpha; corr += freqs[i][k][1] * targetFreqs[d]; } if (corr > maxCorr) { m = j; maxCorr = corr; } } key ~= cast(char)(m + 'A'); } string decoded; foreach (i, c; cleaned) decoded ~= cast(char)((c-key[i % $]+ nalpha) % nalpha + 'A'); return [key, decoded];
}
void main() {
const encoded = " MOMUD EKAPV TQEFM OEVHP AJMII CDCTI FGYAG JSPXY ALUYM NSMYH VUXJE LEPXJ FXGCM JHKDZ RYICU HYPUS PGIGM OIYHF WHTCQ KMLRD ITLXZ LJFVQ GHOLW CUHLO MDSOE KTALU VYLNZ RFGBX PHVGA LWQIS FGRPH JOOFW GUBYI LAPLA LCAFA AMKLG CETDW VOELJ IKGJB XPHVG ALWQC SNWBU BYHCU HKOCE XJEYK BQKVY KIIEH GRLGH XEOLW AWFOJ ILOVV RHPKD WIHKN ATUHN VRYAQ DIVHX FHRZV QWMWV LGSHN NLVZS JLAKI FHXUF XJLXM TBLQV RXXHR FZXGV LRAJI EXPRV OSMNP KEPDT LPRWM JAZPK LQUZA ALGZX GVLKL GJTUI ITDSU REZXJ ERXZS HMPST MTEOE PAPJH SMFNB YVQUZ AALGA YDNMP AQOWT UHDBV TSMUE UIMVH QGVRW AEFSP EMPVE PKXZY WLKJA GWALT VYYOB YIXOK IHPDS EVLEV RVSGB JOGYW FHKBL GLXYA MVKIS KIEHY IMAPX UOISK PVAGN MZHPW TTZPV XFCCD TUHJH WLAPF YULTB UXJLN SIJVV YOVDJ SOLXG TGRVO SFRII CTMKO JFCQF KTINQ BWVHG TENLH HOGCS PSFPV GJOKM SIFPR ZPAAS ATPTZ FTPPD PORRF TAXZP KALQA WMIUD BWNCT LEFKO ZQDLX BUXJL ASIMR PNMBF ZCYLV WAPVF QRHZV ZGZEF KBYIO OFXYE VOWGB BXVCB XBAWG LQKCM ICRRX MACUO IKHQU AJEGL OIJHH XPVZW JEWBA FWAML ZZRXJ EKAHV FASMU LVVUT TGK"; const englishFrequences = [ 0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, 0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749, 0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, 0.00978, 0.02360, 0.00150, 0.01974, 0.00074]; const key_dec = vigenereDecrypt(englishFrequences, encoded); writeln("Key: ", key_dec[0]); writeln("\nText: ", key_dec[1]);
}</lang>
Python
<lang python>from string import uppercase from operator import itemgetter
def vigenere_decrypt(target_freqs, input):
nchars = len(uppercase) ordA = ord('A') sorted_targets = sorted(target_freqs)
def frequency(input): result = [[c, 0.0] for c in uppercase] for c in input: result[c - ordA][1] += 1 return result
def correlation(input): result = 0.0 freq = frequency(input) freq.sort(key=itemgetter(1))
for i, f in enumerate(freq): result += f[1] * sorted_targets[i] return result
cleaned = [ord(c) for c in input.upper() if c.isupper()] best_len = 0 best_corr = -100.0
# Assume that if there are less than 20 characters # per column, the key's too long to guess for i in xrange(2, len(cleaned) // 20): pieces = [[] for _ in xrange(i)] for j, c in enumerate(cleaned): pieces[j % i].append(c)
# The correlation seems to increase for smaller # pieces/longer keys, so weigh against them a little corr = -0.5 * i + sum(correlation(p) for p in pieces)
if corr > best_corr: best_len = i best_corr = corr
if best_len == 0: return ("Text is too short to analyze", "")
pieces = [[] for _ in xrange(best_len)] for i, c in enumerate(cleaned): pieces[i % best_len].append(c)
freqs = [frequency(p) for p in pieces]
key = "" for i in xrange(best_len): freqs[i].sort(key=itemgetter(1), reverse=True)
m = 0 max_corr = 0.0 for j in xrange(nchars): corr = 0.0 c = ordA + j for k in xrange(nchars): d = (ord(freqs[i][k][0]) - c + nchars) % nchars corr += freqs[i][k][1] * target_freqs[d]
if corr > max_corr: m = j max_corr = corr
key += chr(m + ordA)
r = (chr((c - ord(key[i % best_len]) + nchars) % nchars + ordA) for i, c in enumerate(cleaned)) return (key, "".join(r))
def main():
encoded = """ MOMUD EKAPV TQEFM OEVHP AJMII CDCTI FGYAG JSPXY ALUYM NSMYH VUXJE LEPXJ FXGCM JHKDZ RYICU HYPUS PGIGM OIYHF WHTCQ KMLRD ITLXZ LJFVQ GHOLW CUHLO MDSOE KTALU VYLNZ RFGBX PHVGA LWQIS FGRPH JOOFW GUBYI LAPLA LCAFA AMKLG CETDW VOELJ IKGJB XPHVG ALWQC SNWBU BYHCU HKOCE XJEYK BQKVY KIIEH GRLGH XEOLW AWFOJ ILOVV RHPKD WIHKN ATUHN VRYAQ DIVHX FHRZV QWMWV LGSHN NLVZS JLAKI FHXUF XJLXM TBLQV RXXHR FZXGV LRAJI EXPRV OSMNP KEPDT LPRWM JAZPK LQUZA ALGZX GVLKL GJTUI ITDSU REZXJ ERXZS HMPST MTEOE PAPJH SMFNB YVQUZ AALGA YDNMP AQOWT UHDBV TSMUE UIMVH QGVRW AEFSP EMPVE PKXZY WLKJA GWALT VYYOB YIXOK IHPDS EVLEV RVSGB JOGYW FHKBL GLXYA MVKIS KIEHY IMAPX UOISK PVAGN MZHPW TTZPV XFCCD TUHJH WLAPF YULTB UXJLN SIJVV YOVDJ SOLXG TGRVO SFRII CTMKO JFCQF KTINQ BWVHG TENLH HOGCS PSFPV GJOKM SIFPR ZPAAS ATPTZ FTPPD PORRF TAXZP KALQA WMIUD BWNCT LEFKO ZQDLX BUXJL ASIMR PNMBF ZCYLV WAPVF QRHZV ZGZEF KBYIO OFXYE VOWGB BXVCB XBAWG LQKCM ICRRX MACUO IKHQU AJEGL OIJHH XPVZW JEWBA FWAML ZZRXJ EKAHV FASMU LVVUT TGK"""
english_frequences = [ 0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, 0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749, 0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, 0.00978, 0.02360, 0.00150, 0.01974, 0.00074]
(key, decoded) = vigenere_decrypt(english_frequences, encoded) print "Key:", key print "\nText:", decoded
main()</lang>