seg/trabalho1/afreq.py

77 lines
1.7 KiB
Python

import sys
if len(sys.argv) < 2:
print("Usage: python3 afreq.py [arquivo]")
sys.exit(1)
frequencias = {
'A': 14.63,
'B': 1.04,
'C': 3.88,
'D': 4.99,
'E': 12.57,
'F': 1.02,
'G': 1.30,
'H': 1.28,
'I': 6.18,
'J': 0.40,
'K': 0.02,
'L': 2.78,
'M': 4.74,
'N': 5.05,
'O': 10.73,
'P': 2.52,
'Q': 1.20,
'R': 6.53,
'S': 7.81,
'T': 4.34,
'U': 4.63,
'V': 1.67,
'W': 0.01,
'X': 0.21,
'Y': 0.01,
'Z': 0.47
}
frequencias = sorted(frequencias.keys(), key=lambda x: frequencias[x])
frequencias.reverse()
caracteres = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
def afreq(string):
# determina a frequencia de cada caractere na string
# compara com a frequencia de cada caractere na lingua portuguesa
# calcula a diferenca entre as frequencias
str_freq = {}
for c in string:
if not c.isalnum():
continue
if c in str_freq:
str_freq[c] += 1
else:
str_freq[c] = 1
freq_sorted = sorted(str_freq.items(), key=lambda item: item[1])
freq_sorted = [x[0] for x in freq_sorted]
freq_sorted.reverse()
possiveis_chaves = []
for i, c in enumerate(frequencias):
if c not in freq_sorted:
continue
index_c = caracteres.index(c)
index_freq = caracteres.index(freq_sorted[i])
# print(c, index_c, index_freq)
possiveis_chaves.append((index_freq - index_c) % 26) # (indice da freq da letra na str - indice da letra em PT) % ignorar letras maiusculas
return possiveis_chaves
file_str = open(sys.argv[1], 'r').read()
possiveis_chaves = afreq(file_str)
print(possiveis_chaves)