我有一些输入inputs = ["and", "dick", "jane", "puff", "spot", "yertle"]
然后我有一个由输入encrypted_sentence ="bjvg xsb hxsn xsb qymm xsb rqat xsb pnetfn"
的单词组成的句子,但是字母以某种模式改变了。例如"a"='x';"n"= ' s ';"d"= ' b '……字母表中的每个字母都被字母表中的一些不同的字母所取代。我需要创建字典,其中新的"字母"将是key
,旧的字母将是value
。比如decrypt_dict={ "x":'a', "s":'n', "b":'d', "q":'a', "t":'t'}
等等…如果输入中的唯一字母的数量与句子中的唯一字母的数量不同,print "ERROR"
因为不可能从输入创建句子如果有些字母不使用,没关系,直接跳过它们。没有必要为字母表中的每个字符创建字典,我只需要在编码
中使用的字符。这是我到目前为止的代码(我使用了另一个测试字符串):
import itertools
def deduplicate(lst):
elems = set()
rest = []
for el in lst:
if el not in elems:
elems.add(el)
rest.append(el)
return rest
def deduplicate_lists(lst_of_lists, differenet_lists=False):
lst_of_lists.sort()
if differenet_lists:
return [lst for lst, _ in itertools.groupby(lst_of_lists)]
else:
return [*[lst for lst, _ in itertools.groupby(lst_of_lists)]]
def try_it_faster(lists_by_len_of_words, decrypt_dict):
for key, val in lists_by_len_of_words.items():
# print(ke, val)
if len(val) == 1:
for index, char in enumerate(val[0]):
# print(char, index_for_word)
decrypt_dict.update({char: key[index]})
return decrypt_dict
def decrypt(string_to_decrypt, decrypt_dict):
returner = None
try:
returner = "".join(decrypt_dict[i] for i in string_to_decrypt)
except KeyError:
pass
return returner
def c_just_multiple_val_dict(dict, help_dict=None):
copy_dict = dict.copy()
for keys, values in copy_dict.items():
# print(keys, values)
if len(values) == 1:
dict.pop(keys)
if help_dict is not None:
copy_dict = dict.copy()
for word in copy_dict.keys():
if all([char in help_dict.values() for char in word]):
dict.pop(word)
return dict
def move_items(lst):
return lst[0:] + lst[0]
# timed
number = 6
inputs = ['all', 'and', 'apple', 'ass', 'gibble', 'horror', 'hound', 'love',
'tie'] # input words default: "and", "dick", "jane", "puff", "spot", "yertle"
decrypt_dict = {' ': ' '}
encryption_as_string = "cvvli euynm dxggli euzzuz cnm bxi lupi cll cww" # default: bjvg xsb hxsn xsb qymm xsb rqat xsb pnetfn
encryption_key = encryption_as_string.split() # create list from encrypted string
lists_by_len_of_words = {inp: deduplicate([a for a in encryption_key if len(a) == len(inp)]) for inp in
inputs} # dict key=input word, deduplicated value=words from encryption_key with same length as input word
# timed
# print start values
print(f"1. {inputs}")
print(f"2. {encryption_key}")
print(f"3. {lists_by_len_of_words}")
print(f"4. {decrypt_dict}")
decrypt_dict = try_it_faster(lists_by_len_of_words,
decrypt_dict) # if len of input words is unique, this will make it faster
secondary_decrypt_dict = decrypt_dict.copy()
print(f"4. After try_it_faster(): {decrypt_dict}")
# same_len_word_list = [word for word in lists_by_len_of_words.keys()]
inputs_sorted = sorted(inputs, key=len) # input sorted
inputs_as_string = " ".join(inputs_sorted) # sorted input to string
encryption_key_deduplicated_sorted = sorted(deduplicate(encryption_key),
key=len) # encryption_key sorted and deduplicated
encryption_key_as_string = " ".join(encryption_key_deduplicated_sorted) # sorted encryption_key to string
# print sorted and deduplicated start values
print(f"1. Sorted input words as string: {inputs_as_string}")
print(f"2. Sorted and deduplicated encrypted words as list: {encryption_key_deduplicated_sorted}")
print(f"2. Sorted and deduplicated encrypted words as string: {encryption_key_as_string}")
mover_check = len(encryption_key_deduplicated_sorted)#*len(encryption_key_deduplicated_sorted)
mover = 0
# while decrypt(encryption_key_as_string, secondary_decrypt_dict) != inputs_as_string:
while decrypt(encryption_key_as_string, secondary_decrypt_dict) != inputs_as_string and mover < mover_check:
print(f"Before clearing:{encryption_key_deduplicated_sorted}")
for index_of_word, input_word in enumerate(inputs_sorted):
print(index_of_word, input_word,
decrypt(encryption_key_deduplicated_sorted[index_of_word], secondary_decrypt_dict))
if decrypt(encryption_key_deduplicated_sorted[index_of_word], secondary_decrypt_dict) == input_word:
for index_of_char, char in enumerate(encryption_key_deduplicated_sorted[index_of_word]):
# print(index_of_char, char)
decrypt_dict[char] = input_word[index_of_char]
print(f"pop out {inputs_sorted[index_of_word]} a {encryption_key_deduplicated_sorted[index_of_word]}")
inputs_sorted.pop(index_of_word)
encryption_key_deduplicated_sorted.pop(index_of_word)
print(f"After clearing:{encryption_key_deduplicated_sorted}")
print(f"Work wi :{inputs_sorted}")
secondary_decrypt_dict.clear()
secondary_decrypt_dict.update(decrypt_dict)
# secondary_decrypt_dict.update(decrypt_dict)
# print(encryption_key_deduplicated_sorted)
for index_for_word, encryption_word in enumerate(encryption_key_deduplicated_sorted):
print(index_for_word, encryption_word)
for index_for_char, letter in enumerate(encryption_word):
# print(letter)
if secondary_decrypt_dict.get(letter) is None:
print(f"secondary_dict nor contain:{letter}")
if decrypt_dict.
try:
print("".join(char for char in inputs_sorted[index_for_word])[index_for_char])
secondary_decrypt_dict.update(
{letter: "".join(char for char in inputs_sorted[index_for_word])[index_for_char]})
setter = False
# print(secondary_decrypt_dict)
except IndexError:
setter = True
else:
# pass
setter = False
print(f"Contains:{letter}")
encryption_key_deduplicated_sorted = encryption_key_deduplicated_sorted[1:] + encryption_key_deduplicated_sorted[:1]
if setter:
sorted(encryption_key_deduplicated_sorted, key=len)
print(decrypt(encryption_key_as_string, secondary_decrypt_dict))
mover += 1
print(secondary_decrypt_dict)
print(decrypt_dict)
final_dict = {**secondary_decrypt_dict, **decrypt_dict}
print(final_dict)
str = "cvvli euynm acqyg dxggli euzzuz cnm bxi lupi cll cww"
if decrypt(str, final_dict) is None:
ret_str = ""
for char in encryption_key_as_string:
if char != " ":
ret_str.__add__(char)
else:
ret_str.__add__(" ")
print(ret_str)
else:
print(decrypt(str, final_dict))
可以使用回溯方法来查找字母映射的组合,从而在逐长度的基础上生成匹配的单词。
为了优化这种"试错"组合遍历,每个加密字母被分配到它可以映射到的潜在的清晰字母,基于加密单词与相同长度的清晰单词的配对。
knownWords = ["and", "dick", "jane", "puff", "spot", "yertle"]
encrypted = "bjvg xsb hxsn xsb qymm xsb rqat xsb pnetfn"
letterMap = dict()
for word in encrypted.split():
for kw in knownWords: # matching words
if len(kw)!=len(word): continue # of same length
for k,e in zip(kw,word):
letterMap.setdefault(e,set()).add(k) # letter to letter options
def decrypt(w,mapping): return "".join(mapping.get(c,c) for c in w)
from collections import deque
finalMap = None
mapQ = deque([[dict(),set(letterMap),set()]]) # mapping,remaining,usedClear
while mapQ and not finalMap:
mapping,remaining,used = mapQ.pop() # DFS mapping to complete
letter = remaining.pop() # pick a letter to map
for clearLetter in letterMap[letter]-used: # select clear letters
newMap = {**mapping,letter:clearLetter} # enhance mapping
if remaining: # more letters to map
mapQ.append([newMap, remaining.copy(), used | {clearLetter}])
continue
if all(decrypt(w,newMap) in knownWords for w in encrypted.split()):
finalMap = newMap # found mapping that works
break
最终结果:
print(decrypt(encrypted, finalMap))
'dick and jane and puff and spot and yertle'
print(finalMap)
{'y': 'u', 'p': 'y', 'q': 'p', 't': 't', 'j': 'i', 'm': 'f', 'e': 'r',
'f': 'l', 'g': 'k', 'h': 'j', 'a': 'o', 'v': 'c', 'n': 'e', 'x': 'a',
'r': 's', 'b': 'd', 's': 'n'}
其他测试用例:
knownWords = ['all', 'and', 'apple', 'ass', 'gibble', 'horror', 'hound', 'love',
'tie']
encrypted = "cvvli euynm dxggli euzzuz cnm bxi lupi cll cww"
...
decrypt(encrypted, finalMap)
'apple hound gibble horror and tie love all ass'
decrypt("cvvli euynm acqyg dxggli euzzuz cnm bxi lupi cll cww",finalMap)
'apple hound aaqub gibble horror and tie love all ass'
请注意,我使用了一个严格的条件来退出搜索循环,其中所有加密的单词都在已知单词列表中找到。这可以通过追踪"最佳匹配"来放松。并让循环耗尽mapQ队列。定义什么是"最佳匹配"方法取决于你。它可以用单词数、不同单词数、字符覆盖率等来衡量。
[编辑]更有效的方法…
我尝试了上面的"最佳匹配"。接近并发现处理时间会变得非常长。所以我做了一个新的(递归的)解决方案,基于整个单词的映射,而不是一个字母一个字母的搜索。这样可以得到更好的结果,并且对于部分匹配的管理也不那么复杂:
def decrypt(w,mapping): return "".join(mapping.get(c,c) for c in w)
def findMapping(knownWords,encrypted):
if not encrypted: return {} # no more encrypted words, stop recursion
if isinstance(encrypted,str): encrypted = list(set(encrypted.split()))
bestMap = findMapping(knownWords,encrypted[1:]) # track best
bestMatch = sum(decrypt(w,bestMap) in knownWords for w in encrypted)
eWord = encrypted[0] # map first encrypted word
for kWord in knownWords:
if len(eWord) != len(kWord): continue # with word of same length
mapping = {e:k for e,k in zip(eWord,kWord)} # letter to letter
if decrypt(eWord,mapping) != kWord: continue # one to one only
unused = str.maketrans('','',kWord) # removed used letters
subWords = [*filter(None,(kw.translate(unused) for kw in knownWords))]
unmapped = str.maketrans('','',eWord) # remove mapped letters
subCrypt = [*filter(None,(ew.translate(unmapped) for ew in encrypted))]
mapping.update(findMapping(subWords,subCrypt)) # Recurse subwords
match = sum(decrypt(w,mapping) in knownWords for w in encrypted)
if match>bestMatch: bestMap,bestMatch = mapping,match # best match
return bestMap # return mapping with best match (number of mapped words)
- 此解决方案选择一个加密的单词并尝试将其映射到相同长度的已知单词。
- 建立一个字母映射(必须是一对一的)。
- 在进行下一层递归之前,从加密单词和已知单词中删除该映射中的字母。
- 下一层递归只执行有限的映射,并且不会与第一个单词的字母映射重叠。
- 对相同长度的每个已知单词执行此操作,并通过递归处理所有加密单词将生成从字母到字母的角度兼容的单词到单词映射。
- 跟踪匹配字数最多的映射,并返回给主叫。
- 为了支持不完全匹配,解决方案从递归开始,跳过第一个加密单词。这为选择最佳匹配建立了基线。 输出:
knownWords = ["and", "dick", "jane", "puff", "spot", "yertle"]
encrypted = "bjvg xsb hxsn xsb qymm xsb rqat xsb pnetfn"
mapping = findMapping(knownWords,encrypted)
print(decrypt(encrypted,mapping))
'dick and jane and puff and spot and yertle'
print(mapping)
{'r': 's', 'q': 'p', 'a': 'o', 't': 't', 'y': 'u', 'm': 'f', 'b': 'd',
'j': 'i', 'v': 'c', 'g': 'k', 'p': 'y', 'n': 'e', 'e': 'r', 'f': 'l',
'h': 'j', 'x': 'a', 's': 'n'}
knownWords = "apple hound gibble horror and tie love all ass".split()
encrypted = "cvvli euynm acqyg dxggli euzzuz cnm bxi lupi upilucnm"
mapping = findMapping(knownWords,encrypted)
print(decrypt(encrypted,mapping))
'apple hound aaqub gibble horror and tie love oveloand'
print(mapping)
{'b': 't', 'x': 'i', 'i': 'e', 'l': 'l', 'u': 'o', 'p': 'v', 'c': 'a',
'v': 'p', 'd': 'g', 'g': 'b', 'e': 'h', 'y': 'u', 'n': 'n', 'm': 'd',
'z': 'r'}