seg/trabalho1/afreq.py

78 lines
1.7 KiB
Python

import sys
import cesar
if len(sys.argv) < 2:
print("Usage: python3 afreq.py [arquivo]")
sys.exit(1)
frequencias = {
'A': 14.63,
'B': 1.04,
'C': 3.88,
'D': 4.99,
'E': 12.57,
'F': 1.02,
'G': 1.30,
'H': 1.28,
'I': 6.18,
'J': 0.40,
'K': 0.02,
'L': 2.78,
'M': 4.74,
'N': 5.05,
'O': 10.73,
'P': 2.52,
'Q': 1.20,
'R': 6.53,
'S': 7.81,
'T': 4.34,
'U': 4.63,
'V': 1.67,
'W': 0.01,
'X': 0.21,
'Y': 0.01,
'Z': 0.47
}
frequencias = sorted(frequencias.keys(), key=lambda x: frequencias[x])
frequencias.reverse()
caracteres = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
def afreq(string):
str_freq = {}
for c in string:
if not c.isalnum():
continue
if c in str_freq:
str_freq[c] += 1
else:
str_freq[c] = 1
freq_sorted = sorted(str_freq.items(), key=lambda item: item[1])
freq_sorted = [x[0] for x in freq_sorted]
freq_sorted.reverse()
most_freq = freq_sorted[0]
possiveis_chaves = []
for i, c in enumerate(frequencias):
if c not in freq_sorted:
continue
index_c = caracteres.index(c)
index_freq = caracteres.index(most_freq)
possiveis_chaves.append((index_freq - index_c) % 26) # (indice da freq da letra na str - indice da letra em PT) % ignorar letras maiusculas
print(freq_sorted[0], c + ' : ' + str(possiveis_chaves[-1]))
return possiveis_chaves
file_str = open(sys.argv[1], 'r').read()
possiveis_chaves = afreq(file_str)
print(possiveis_chaves)
for key in possiveis_chaves:
print(str(key) + ' : ' + cesar.cesar(file_str, -key))