-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathenglish.cpp
More file actions
executable file
·135 lines (115 loc) · 9.82 KB
/
english.cpp
File metadata and controls
executable file
·135 lines (115 loc) · 9.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#include <cmath>
#include <wx/wx.h>
#include "english.h"
double english_score(const wxString& txt){
const double contactScore = english_contact(txt);
const double frequencyScore = english_frequency(txt);
return frequencyScore * contactScore;
}
double english_contact(const wxString& txt){
double eng_freq[] = {
0.00030534,0.02458884,0.03974687,0.04591277,0.00131570,0.00815733,0.01845870,0.00157882,0.04013676,0.00025343,0.01186070,0.08382093,0.02808435,0.23106135,0.00058795,0.02102505,0.00018837,0.11898719,0.10062596,0.14194842,0.01055803,0.03301898,0.00956393,0.00102986,0.02517033,0.00201416,
0.05843819,0.00591397,0.00007969,0.00227949,0.34423669,0.00020724,0.00043041,0.00036663,0.04826810,0.00774713,0.00000000,0.11591985,0.00129122,0.00035071,0.09738099,0.00001592,0.00000000,0.07458592,0.02285875,0.01082366,0.12051076,0.00116367,0.00003184,0.00000000,0.08709927,0.00000000,
0.12487945,0.00002752,0.02187714,0.00053268,0.17423605,0.00000914,0.00000000,0.17279417,0.05742049,0.00000000,0.04609622,0.04964133,0.00000000,0.00003676,0.17863542,0.00000914,0.00174498,0.03826192,0.00106537,0.09207303,0.03638838,0.00000000,0.00000000,0.00000000,0.00426158,0.00000914,
0.07380493,0.00133133,0.00068054,0.02134708,0.32245524,0.00178994,0.01443849,0.00186391,0.20926960,0.00091721,0.00050310,0.02439463,0.00565124,0.00801799,0.10459057,0.00042912,0.00005923,0.04710279,0.08083199,0.00241124,0.04707304,0.00730796,0.00359488,0.00000000,0.02013395,0.00000000,
0.08431265,0.00197671,0.03464759,0.13027235,0.04897712,0.01322750,0.00816046,0.00274342,0.01964448,0.00040235,0.00169390,0.05251074,0.02876997,0.12592244,0.00499414,0.01564734,0.00330904,0.20195215,0.11009423,0.03730653,0.00220700,0.02429758,0.01094760,0.01623921,0.01946083,0.00028281,
0.10183455,0.00038018,0.00050696,0.00033266,0.14803078,0.07978200,0.00003173,0.00015835,0.13085771,0.00000000,0.00003173,0.04252091,0.00006331,0.00014257,0.24641167,0.00026935,0.00001579,0.13161807,0.00630531,0.05452936,0.05342040,0.00001579,0.00031687,0.00000000,0.00242395,0.00000000,
0.08758273,0.00056798,0.00076948,0.00296823,0.21913992,0.00040315,0.01502464,0.20994192,0.07407888,0.00003666,0.00005499,0.05841293,0.00441584,0.01890901,0.08474267,0.00082448,0.00000000,0.13078771,0.03860597,0.00923467,0.03789149,0.00005499,0.00051299,0.00000000,0.00483717,0.00020150,
0.17827382,0.00087285,0.00230819,0.00058495,0.51861436,0.00052199,0.00004050,0.00008100,0.14131959,0.00000000,0.00000451,0.00161077,0.00147133,0.00145780,0.07903332,0.00004953,0.00035999,0.01321943,0.00248825,0.03353459,0.01483483,0.00000000,0.00116087,0.00000000,0.00815754,0.00000000,
0.01769624,0.00732705,0.05158450,0.03851663,0.05093502,0.02876344,0.03262290,0.00008213,0.00051140,0.00000371,0.00774891,0.05582842,0.04375728,0.27752978,0.05275280,0.00664398,0.00045916,0.04637374,0.12270909,0.12699041,0.00074284,0.02498599,0.00002236,0.00199326,0.00000000,0.00341906,
0.21144609,0.00000000,0.00000000,0.00000000,0.19134994,0.00000000,0.00000000,0.00000000,0.00043692,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.26539970,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.33114895,0.00000000,0.00000000,0.00000000,0.00021841,0.00000000,
0.02413310,0.00166431,0.00072128,0.00244106,0.47367547,0.00593623,0.00066581,0.00110959,0.18552014,0.00000000,0.00044378,0.02651869,0.00138695,0.14396665,0.01619972,0.00171978,0.00000000,0.00061019,0.08748957,0.00327329,0.00427178,0.00000000,0.00588076,0.00000000,0.01237172,0.00000000,
0.11881855,0.00064446,0.00239074,0.07813372,0.19368137,0.01566143,0.00137901,0.00018019,0.12946279,0.00001391,0.00627841,0.15836008,0.00647250,0.00113654,0.09658086,0.00264724,0.00000689,0.00215516,0.02689462,0.02110817,0.01688105,0.00486469,0.00301452,0.00000000,0.11320541,0.00002769,
0.18285061,0.03396601,0.00019636,0.00301530,0.29650981,0.00211418,0.00002316,0.00035814,0.10423186,0.00002316,0.00000000,0.00187162,0.02635256,0.00351219,0.13049200,0.05762672,0.00000000,0.02018320,0.02969144,0.00100506,0.04578490,0.00003469,0.00013863,0.00000000,0.06001827,0.00000000,
0.03231631,0.00073110,0.05695540,0.23787020,0.11429035,0.00713818,0.15998787,0.00387609,0.03981062,0.00164511,0.00960349,0.01181106,0.00088588,0.01145009,0.08796403,0.00044994,0.00130769,0.00055775,0.05747089,0.12866982,0.01262655,0.00450881,0.00114362,0.00044994,0.01630574,0.00017335,
0.00951981,0.00834642,0.01454577,0.02324311,0.00336945,0.13000662,0.00694274,0.00212051,0.01005569,0.00022639,0.01223664,0.03074806,0.06643520,0.16708611,0.03784176,0.01587025,0.00026416,0.13453446,0.03693242,0.05638717,0.15304966,0.01886234,0.05589662,0.00061508,0.00388267,0.00098100,
0.13681579,0.00047153,0.00047153,0.00050199,0.23576925,0.00015210,0.00009128,0.02356327,0.05739453,0.00003046,0.00012174,0.10199577,0.00115608,0.00062374,0.12911857,0.06472662,0.00000000,0.14512150,0.02374573,0.04352121,0.02885693,0.00000000,0.00124736,0.00000000,0.00450270,0.00000000,
0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00022945,0.00022945,0.00000000,0.99954110,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,
0.09456578,0.00413588,0.01505642,0.03662225,0.30238531,0.00729892,0.01384406,0.00340761,0.10216019,0.00005608,0.01124129,0.01511753,0.02481036,0.02861517,0.11852553,0.00619370,0.00004074,0.02441819,0.05920166,0.05434757,0.02239608,0.00910213,0.00256202,0.00000516,0.03667821,0.00721242,
0.05844177,0.00107617,0.02636472,0.00254097,0.19713038,0.00176404,0.00078328,0.09520594,0.08455186,0.00008271,0.00898562,0.01913028,0.01094715,0.00375732,0.08208089,0.04809327,0.00138828,0.00031846,0.07750839,0.21205774,0.05516214,0.00013366,0.00899832,0.00000000,0.00349617,0.00000000,
0.05208840,0.00021058,0.00492614,0.00008927,0.11819488,0.00122084,0.00005351,0.44406525,0.08830612,0.00000715,0.00002146,0.01937255,0.00120654,0.00146361,0.12092568,0.00010714,0.00000358,0.04109394,0.03249812,0.02267086,0.02459854,0.00000358,0.01042337,0.00000000,0.01608126,0.00036766,
0.02776436,0.01930066,0.04880785,0.02370402,0.03279814,0.00623888,0.05367474,0.00007421,0.03165792,0.00000924,0.00016689,0.11320828,0.03277033,0.12272877,0.00214138,0.04761199,0.00018537,0.17272324,0.12817968,0.13269430,0.00003705,0.00099191,0.00021318,0.00093628,0.00031521,0.00106612,
0.10243309,0.00000000,0.00000000,0.00496603,0.68108163,0.00000000,0.00000000,0.00023758,0.14705599,0.00000000,0.00000000,0.00014259,0.00000000,0.01285461,0.04300720,0.00000000,0.00000000,0.00083160,0.00030893,0.00000000,0.00258998,0.00059401,0.00000000,0.00000000,0.00389675,0.00000000,
0.20795014,0.00037776,0.00033861,0.00214902,0.18220062,0.00148484,0.00003904,0.21485326,0.20300086,0.00000000,0.00057306,0.01014616,0.00031263,0.04230382,0.10596789,0.00009112,0.00001305,0.00931256,0.01435308,0.00205790,0.00132847,0.00000000,0.00036471,0.00000000,0.00078151,0.00000000,
0.06664546,0.00000000,0.17474488,0.00111604,0.06154342,0.00000000,0.00015947,0.01466837,0.12468116,0.00000000,0.00000000,0.00159444,0.00015947,0.00000000,0.00621808,0.23963654,0.00350771,0.00000000,0.00302931,0.26211732,0.02168367,0.00573980,0.00015947,0.01036355,0.00223220,0.00000000,
0.02662036,0.01450219,0.00442025,0.00764850,0.20754906,0.00561227,0.00114222,0.00263224,0.05214793,0.00004979,0.00009921,0.01177038,0.01440261,0.00829392,0.44459894,0.01028073,0.00000000,0.01802844,0.11984106,0.04042697,0.00218523,0.00024821,0.00591027,0.00049679,0.00034779,0.00074501,
0.44509129,0.00079382,0.00635091,0.00714474,0.32045511,0.00000000,0.00000000,0.00052929,0.07488755,0.00026464,0.00000000,0.02090504,0.00000000,0.00000000,0.07700448,0.00000000,0.00000000,0.00026464,0.00185229,0.00052929,0.00396933,0.00105847,0.00000000,0.00000000,0.01058479,0.02831442};
unsigned int intfreq[676];
double freq[676];
unsigned int total[26];
for(unsigned int p = 0; p < 26; p++){
total[p] = 0;
for(unsigned int l = 0; l < 26; l++){
intfreq[p * 26 + l] = 0;
}
}
if(txt[0] == 0 || txt[1] == 0){
return 0.0f;
}
for(unsigned int i = 0; txt[i+1] != 0; i++){
char a = 0, b = 0;
if(txt[i] >= 'a' && txt[i] <= 'z'){
a = txt[i] - 'a';
}else if(txt[i] >= 'A' && txt[i] <= 'Z'){
a = txt[i] - 'A';
}
if(txt[i+1] >= 'a' && txt[i+1] <= 'z'){
b = txt[i+1] - 'a';
}else if(txt[i+1] >= 'A' && txt[i+1] <= 'Z'){
b = txt[i+1] - 'A';
}
intfreq[a * 26 + b]++;
total[(std::size_t) a]++;
}
for(unsigned int j = 0; j < 26; j++){
for(unsigned int m = 0; m < 26; m++){
if(total[j] == 0){
freq[j * 26 + m] = 0.03846;
}else{
freq[j * 26 + m] = double(intfreq[j * 26 + m]) / double(total[j]);
}
}
}
double perfectAccuracy = 0.0;
double actualAccuracy = 0.0;
for(std::size_t i = 0; i < 676; i++){
perfectAccuracy += eng_freq[i] * eng_freq[i];
actualAccuracy += eng_freq[i] * freq[i];
}
return actualAccuracy / perfectAccuracy;
}
double english_frequency(const wxString& txt){
unsigned int intfreq[26];
double f[26];
unsigned int total = 0;
double eng_freq[] = {0.08167, 0.01492, 0.02782, 0.04253,
0.12702, 0.02228, 0.02015, 0.06094,
0.06966, 0.00153, 0.00772, 0.04025,
0.02406, 0.06749, 0.07507, 0.01929,
0.00095, 0.05987, 0.06327, 0.09056,
0.02758, 0.00978, 0.02360, 0.00150,
0.01974, 0.00074};
double accuracy = 0;
for(unsigned int p = 0; p < 26; p++){
intfreq[p] = 0;
}
for(unsigned int i = 0; txt[i] != 0; i++){
if(txt[i] >= 'a' && txt[i] <= 'z'){
intfreq[txt[i] - 'a']++;
total++;
}else if(txt[i] >= 'A' && txt[i] <= 'Z'){
intfreq[txt[i] - 'A']++;
total++;
}
}
for(unsigned int j = 0; j < 26; j++){
f[j] = double(intfreq[j]) / double(total);
}
double perfectAccuracy = 0.0;
double actualAccuracy = 0.0;
for(unsigned int i = 0; i < 26; i++){
perfectAccuracy += eng_freq[i] * eng_freq[i];
actualAccuracy += eng_freq[i] * f[i];
}
return actualAccuracy / perfectAccuracy;
}