如何有效地用单个下划线替换特殊字符和空格?



如何用下划线_轻松替换特殊字符和空格,以及用单个_替换多个下划线?

我的方法如下,但它并不漂亮。

import string
s = 'Hi th@re gu^ys!@#()tes   t;"[]ing'
for chr in string.punctuation + string.whitespace:
s = s.replace(chr, '_')
while '__' in s:
s = s.replace('__', '_')
print(s)  # Hi_th_re_gu_ys_tes_t_ing

versien 2:

dont_want_these = frozenset(string.punctuation + string.whitespace)
def repl_it(s):
chrs = list(s)
prev_chr_was_fail = False
for i, chr in enumerate(chrs):
if chr in dont_want_these:
if prev_chr_was_fail:
chrs[i] = ''
else:
prev_chr_was_fail = True
chrs[i] = '_'
else:
prev_chr_was_fail = False
return ''.join(chrs)
assert repl_it('Hi th@re gu^ys!@#()tes   t;"[]ing') == 'Hi_th_re_gu_ys_tes_t_ing'

感谢
import re
new_s1 = re.sub(r'[W_]+','_',s1)
new_s2 = re.sub(r'[W_]+','_',s2)

输入:

s1 = 'Hi th@re gu^ys!@#()tes   t;"[]ing'
s2 = 'Hi th@re gu^ys!@#()tes   t___;"[]ing'

输出:

>>> print(new_s1)
>>> Hi_th_re_gu_ys_tes_t_ing
>>> print(new_s2)
>>> Hi_th_re_gu_ys_tes_t_ing
import re
s = 'Hi th@re gu^ys!@#()tes   t;"[]ing'
new_s = re.sub(r'[W]+','_',s)
print(new_s)

最新更新