Vigenère cipher/Cryptanalysis: Difference between revisions

From Rosetta Code
Content added Content deleted
No edit summary
(C++ algorithm added.)
Line 23: Line 23:


Letter frequencies for English can be found [[wp:Letter_frequency|here]].
Letter frequencies for English can be found [[wp:Letter_frequency|here]].

=={{header|C++}}==

Not guaranteed to give a 100% correct answer, but it works here.

<lang cpp>#include <iostream>
#include <string>
#include <vector>
#include <map>
#include <algorithm>
using namespace std;

class VigenereAnalyser
{
public:
vector<double> targets;

VigenereAnalyser(vector<double> targetFreqs): targets(targetFreqs) {}

vector<pair<char, double>> frequency(string input)
{
vector<pair<char, double>> out;
for(char c = 'A'; c <= 'Z'; ++c)
out.push_back(make_pair(c, 0));

for(int i = 0; i < input.size(); ++i)
++out[input[i] - 'A'].second;

return out;
}

pair<string, string> analyze(string input)
{
string cleaned;
for(int i = 0; i < input.size(); ++i)
{
if(input[i] >= 'A' && input[i] <= 'Z')
cleaned += input[i];
else if(input[i] >= 'a' && input[i] <= 'z')
cleaned += input[i] + 'A' - 'a';
}

string out;
string key;

for(int i = 1; i < cleaned.size(); ++i)
{
vector<string> pieces(i);
for(int j = 0; j < cleaned.size(); ++j)
pieces[j % i] += cleaned[j];

vector<vector<pair<char, double>>> freqs;
for(int j = 0; j < i; ++j)
freqs.push_back(frequency(pieces[j]));

int pass = 0;
for(int j = 0; j < i; ++j)
{
double chiSquared = 0.0;
for(int k = 0; k < 26; ++k)
{
double e = targets[k]*pieces[j].size();
double d = -e;
if(k < freqs[j].size())
d += freqs[j][k].second;

chiSquared += d*d/e;
}

// Chi-test limit for 0.05 and 25 degrees
if(chiSquared < 38.885)
++pass;
}

// If at least some of the columns look like an English distribution
if(pass == 0)
continue;

key = "";
for(int j = 0; j < i; ++j)
{
sort(freqs[j].begin(), freqs[j].end(), [](pair<char, double> u, pair<char, double> v)->bool
{ return u.second > v.second; });

int m = 0;
double corr, mCorr = 0.0;
for(int k = 0; k < 26; ++k)
{
corr = 0.0;
char c = 'A' + k;
for(int l = 0; l < 26; ++l)
{
int d = (freqs[j][l].first - c + 26) % 26;
corr += freqs[j][l].second * targets[d];
}

if(corr > mCorr)
{
m = k;
mCorr = corr;
}
}

key += m + 'A';
}

break;
}

for(int i = 0; i < cleaned.size(); ++i)
out += (cleaned[i] - key[i % key.length()] + 26) % 26 + 'A';

return make_pair(out, key);
}
};

int main()
{
string input =
"MOMUD EKAPV TQEFM OEVHP AJMII CDCTI FGYAG JSPXY ALUYM NSMYH"
"VUXJE LEPXJ FXGCM JHKDZ RYICU HYPUS PGIGM OIYHF WHTCQ KMLRD"
"ITLXZ LJFVQ GHOLW CUHLO MDSOE KTALU VYLNZ RFGBX PHVGA LWQIS"
"FGRPH JOOFW GUBYI LAPLA LCAFA AMKLG CETDW VOELJ IKGJB XPHVG"
"ALWQC SNWBU BYHCU HKOCE XJEYK BQKVY KIIEH GRLGH XEOLW AWFOJ"
"ILOVV RHPKD WIHKN ATUHN VRYAQ DIVHX FHRZV QWMWV LGSHN NLVZS"
"JLAKI FHXUF XJLXM TBLQV RXXHR FZXGV LRAJI EXPRV OSMNP KEPDT"
"LPRWM JAZPK LQUZA ALGZX GVLKL GJTUI ITDSU REZXJ ERXZS HMPST"
"MTEOE PAPJH SMFNB YVQUZ AALGA YDNMP AQOWT UHDBV TSMUE UIMVH"
"QGVRW AEFSP EMPVE PKXZY WLKJA GWALT VYYOB YIXOK IHPDS EVLEV"
"RVSGB JOGYW FHKBL GLXYA MVKIS KIEHY IMAPX UOISK PVAGN MZHPW"
"TTZPV XFCCD TUHJH WLAPF YULTB UXJLN SIJVV YOVDJ SOLXG TGRVO"
"SFRII CTMKO JFCQF KTINQ BWVHG TENLH HOGCS PSFPV GJOKM SIFPR"
"ZPAAS ATPTZ FTPPD PORRF TAXZP KALQA WMIUD BWNCT LEFKO ZQDLX"
"BUXJL ASIMR PNMBF ZCYLV WAPVF QRHZV ZGZEF KBYIO OFXYE VOWGB"
"BXVCB XBAWG LQKCM ICRRX MACUO IKHQU AJEGL OIJHH XPVZW JEWBA"
"FWAML ZZRXJ EKAHV FASMU LVVUT TGK";

double fs[] = {0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, 0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406,
0.06749, 0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, 0.00978, 0.02360, 0.00150, 0.01974, 0.00074};

vector<double> english(&fs[0], &fs[26]);
VigenereAnalyser va(english);
pair<string, string> output = va.analyze(input);

cout << "Key: " << output.second << endl << endl;
cout << "Text: " << output.first << endl;
}</lang>

Revision as of 10:49, 31 May 2011

Vigenère cipher/Cryptanalysis is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Given some text you suspect has been encrypted with a Vigenère cipher, extract the key and plaintext. There are several methods for doing this. See the Wikipedia entry for more information. Use the following encrypted text:

MOMUD EKAPV TQEFM OEVHP AJMII CDCTI FGYAG JSPXY ALUYM NSMYH
VUXJE LEPXJ FXGCM JHKDZ RYICU HYPUS PGIGM OIYHF WHTCQ KMLRD
ITLXZ LJFVQ GHOLW CUHLO MDSOE KTALU VYLNZ RFGBX PHVGA LWQIS
FGRPH JOOFW GUBYI LAPLA LCAFA AMKLG CETDW VOELJ IKGJB XPHVG
ALWQC SNWBU BYHCU HKOCE XJEYK BQKVY KIIEH GRLGH XEOLW AWFOJ
ILOVV RHPKD WIHKN ATUHN VRYAQ DIVHX FHRZV QWMWV LGSHN NLVZS
JLAKI FHXUF XJLXM TBLQV RXXHR FZXGV LRAJI EXPRV OSMNP KEPDT
LPRWM JAZPK LQUZA ALGZX GVLKL GJTUI ITDSU REZXJ ERXZS HMPST
MTEOE PAPJH SMFNB YVQUZ AALGA YDNMP AQOWT UHDBV TSMUE UIMVH
QGVRW AEFSP EMPVE PKXZY WLKJA GWALT VYYOB YIXOK IHPDS EVLEV
RVSGB JOGYW FHKBL GLXYA MVKIS KIEHY IMAPX UOISK PVAGN MZHPW
TTZPV XFCCD TUHJH WLAPF YULTB UXJLN SIJVV YOVDJ SOLXG TGRVO
SFRII CTMKO JFCQF KTINQ BWVHG TENLH HOGCS PSFPV GJOKM SIFPR
ZPAAS ATPTZ FTPPD PORRF TAXZP KALQA WMIUD BWNCT LEFKO ZQDLX
BUXJL ASIMR PNMBF ZCYLV WAPVF QRHZV ZGZEF KBYIO OFXYE VOWGB
BXVCB XBAWG LQKCM ICRRX MACUO IKHQU AJEGL OIJHH XPVZW JEWBA
FWAML ZZRXJ EKAHV FASMU LVVUT TGK

Letter frequencies for English can be found here.

C++

Not guaranteed to give a 100% correct answer, but it works here.

<lang cpp>#include <iostream>

  1. include <string>
  2. include <vector>
  3. include <map>
  4. include <algorithm>

using namespace std;

class VigenereAnalyser { public:

 vector<double> targets;
 VigenereAnalyser(vector<double> targetFreqs): targets(targetFreqs) {}
 vector<pair<char, double>> frequency(string input)
 {
   vector<pair<char, double>> out;
   for(char c = 'A'; c <= 'Z'; ++c)
     out.push_back(make_pair(c, 0));
   for(int i = 0; i < input.size(); ++i)
     ++out[input[i] - 'A'].second;
   return out;
 }
 pair<string, string> analyze(string input)
 {
   string cleaned;
   for(int i = 0; i < input.size(); ++i)
   {
     if(input[i] >= 'A' && input[i] <= 'Z')
       cleaned += input[i];
     else if(input[i] >= 'a' && input[i] <= 'z')
       cleaned += input[i] + 'A' - 'a';
   }
   string out;
   string key;
   for(int i = 1; i < cleaned.size(); ++i)
   {
     vector<string> pieces(i);
     for(int j = 0; j < cleaned.size(); ++j)
       pieces[j % i] += cleaned[j];
     vector<vector<pair<char, double>>> freqs;
     for(int j = 0; j < i; ++j)
       freqs.push_back(frequency(pieces[j]));
     int pass = 0;
     for(int j = 0; j < i; ++j)
     {
       double chiSquared = 0.0;
       for(int k = 0; k < 26; ++k)
       {
         double e = targets[k]*pieces[j].size();
         double d = -e;
         if(k < freqs[j].size())
           d += freqs[j][k].second;
         chiSquared += d*d/e;
       }
       // Chi-test limit for 0.05 and 25 degrees
       if(chiSquared < 38.885) 
         ++pass;
     }
     // If at least some of the columns look like an English distribution
     if(pass == 0)
       continue;
     key = "";
     for(int j = 0; j < i; ++j)
     {
       sort(freqs[j].begin(), freqs[j].end(), [](pair<char, double> u, pair<char, double> v)->bool
         { return u.second > v.second; });
       int m = 0;
       double corr, mCorr = 0.0;
       for(int k = 0; k < 26; ++k)
       {
         corr = 0.0;
         char c = 'A' + k;
         for(int l = 0; l < 26; ++l)
         {
           int d = (freqs[j][l].first - c + 26) % 26;
           corr += freqs[j][l].second * targets[d];
         }
         if(corr > mCorr)
         {
           m = k;
           mCorr = corr;
         }
       }
       key += m + 'A';
     }
     break;
   }
   for(int i = 0; i < cleaned.size(); ++i)
     out += (cleaned[i] - key[i % key.length()] + 26) % 26 + 'A';
   return make_pair(out, key);
 }

};

int main() {

 string input = 
   "MOMUD EKAPV TQEFM OEVHP AJMII CDCTI FGYAG JSPXY ALUYM NSMYH"
   "VUXJE LEPXJ FXGCM JHKDZ RYICU HYPUS PGIGM OIYHF WHTCQ KMLRD"
   "ITLXZ LJFVQ GHOLW CUHLO MDSOE KTALU VYLNZ RFGBX PHVGA LWQIS"
   "FGRPH JOOFW GUBYI LAPLA LCAFA AMKLG CETDW VOELJ IKGJB XPHVG"
   "ALWQC SNWBU BYHCU HKOCE XJEYK BQKVY KIIEH GRLGH XEOLW AWFOJ"
   "ILOVV RHPKD WIHKN ATUHN VRYAQ DIVHX FHRZV QWMWV LGSHN NLVZS"
   "JLAKI FHXUF XJLXM TBLQV RXXHR FZXGV LRAJI EXPRV OSMNP KEPDT"
   "LPRWM JAZPK LQUZA ALGZX GVLKL GJTUI ITDSU REZXJ ERXZS HMPST"
   "MTEOE PAPJH SMFNB YVQUZ AALGA YDNMP AQOWT UHDBV TSMUE UIMVH"
   "QGVRW AEFSP EMPVE PKXZY WLKJA GWALT VYYOB YIXOK IHPDS EVLEV"
   "RVSGB JOGYW FHKBL GLXYA MVKIS KIEHY IMAPX UOISK PVAGN MZHPW"
   "TTZPV XFCCD TUHJH WLAPF YULTB UXJLN SIJVV YOVDJ SOLXG TGRVO"
   "SFRII CTMKO JFCQF KTINQ BWVHG TENLH HOGCS PSFPV GJOKM SIFPR"
   "ZPAAS ATPTZ FTPPD PORRF TAXZP KALQA WMIUD BWNCT LEFKO ZQDLX"
   "BUXJL ASIMR PNMBF ZCYLV WAPVF QRHZV ZGZEF KBYIO OFXYE VOWGB"
   "BXVCB XBAWG LQKCM ICRRX MACUO IKHQU AJEGL OIJHH XPVZW JEWBA"
   "FWAML ZZRXJ EKAHV FASMU LVVUT TGK";
 double fs[] = {0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, 0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 
   0.06749, 0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, 0.00978, 0.02360, 0.00150, 0.01974, 0.00074};
 vector<double> english(&fs[0], &fs[26]);
 VigenereAnalyser va(english);
 pair<string, string> output = va.analyze(input);
 cout << "Key: " << output.second << endl << endl;
 cout << "Text: " << output.first << endl;

}</lang>