使用特定的掩码正则表达式 python 格式化数字



我需要用特定的掩码格式化一个数字:9.9.9.9.99.999,具体取决于数字字符串的长度。
例如:

- 123456789 => 1.2.3.4.56.789
- 123456    => 1.2.3.4.56
- 1234      => 1.2.3.4
- 123       => 1.2.3
- 12        => 1.2

输入中不会出现包含 7 或 8 位数字的数字字符串。

如何使用正则表达式实现这一点,最好是在 python 中?

提前谢谢。

您可以使用此模式:

(?:(?<=^d)|(?<=^d{2})|(?<=^d{3})|(?<=^d{4})|(?<=^d{6}))(?=d)

.作为替代品。

例:

re.sub(r'(?:(?<=^d)|(?<=^d{2})|(?<=^d{3})|(?<=^d{4})|(?<=^d{6}))(?=d)', '.', yourstr)

它必须是正则表达式吗?

num = "123456789"
def fmt(num):
    block1 = list(num[:4])
    block2 = len(num) > 4 and [num[4:6]] or []
    block3 = len(num) > 6 and [num[6:]] or []
    return ".".join(block1 + block2 + block3)
print fmt(num)

我相信两年后我仍然明白这一点。不太确定正则表达式。

没有正则表达式帮助的更一般的东西:

from itertools import islice
def formatn(n, pat='9.9.9.9.99.999', sep='.'):
    pos = map(len, pat.split(sep))
    it = iter(str(n))
    spl = [''.join(islice(it, p)) for p in pos]
    return sep.join(x for x in spl if x)

演示:

>>> formatn(1234)
'1.2.3.4'
>>> formatn(123456789)
'1.2.3.4.56.789'

一种非正则表达式的方式:(这提醒我应该尽快学习正则表达式)

def get_formated_number(num, split_at):
    nums = list(str(num))
    for i in sorted(split_at[len(nums)], reverse=True):
        nums.insert(i, '.')
    return ''.join(nums)
nums = [12, 123, 1234, 123456, 123456789]
split_at = {2: [1], 
            3: [1, 2],
            4: [1, 2, 3],
            6: [1, 2, 3, 4],
            9: [1, 2, 3, 4, 6]}
for num in nums:
    print get_formated_number(num, split_at)

输出

1.2
1.2.3
1.2.3.4
1.2.3.4.56
1.2.3.4.56.789

编辑 2

我找到了一个比我的正则表达式解决方案快 2 倍的解决方案,它是最快的解决方案。
而且它不需要正则表达式:

def fmt3(num):
   return '.'.join((num[0:1],num[1:2],num[2:3],num[3:4],
                     num[4:6],num[6:])).rstrip('.')

我认为这是因为对字符串元素的访问非常快。

.

它可以像alko一样被推广,但它保持可接受的执行时间,类似于其他解决方案,而alko的解决方案比所有其他解决方案慢10倍。

def fmt4(num,mask = '9.9.9.9.99.999'):
    def gen(mask,a = 0,b = 0,li = []):
        for c in mask:
            if c=='.':
                yield num[a:b]
                a = b
            else:
                b += 1
        yield num[a:b]
    return '.'.join(gen(mask)).strip('.')
print fmt4('123456789')
print fmt4('123456')
print fmt4('1234')
print fmt4('123')
print fmt4('12')
print
print fmt4('123456789',mask = '.9.99.9.99.99.9')
print fmt4('123456789',mask = '9.99.9.99.99.9')
print fmt4('123456789',mask = '9...99.9.99.99.9')
print fmt4('123456789',mask = '9.99.9.99.99.9.')
print fmt4('123456789',mask = '9.99.99999.9')

结果

1.2.3.4.56.789
1.2.3.4.56
1.2.3.4
1.2.3
1.2
1.23.4.56.78.9
1.23.4.56.78.9
1...23.4.56.78.9
1.23.4.56.78.9
1.23.45678.9

我最初的答案

我下面的解决方案,
pat1 = '(d)(d)?(d)?(d)?(dd)?(ddd)?'
'.'.join(filter(None,r1.match(thestring).groups('')))
似乎是最快的;

import re
from time import clock
from itertools import islice
def formatn(n, pat='9.9.9.9.99.999', sep='.'):
    pos = map(len, pat.split(sep))
    it = iter(str(n))
    spl = [''.join(islice(it, p)) for p in pos]
    return sep.join(x for x in spl if x)
def fmt(num):
    block1 = list(num[:4])
    block2 = len(num) > 4 and [num[4:6]] or []
    block3 = len(num) > 6 and [num[6:]] or []
    return ".".join(block1 + block2 + block3)
pat1 = '(d)(d)?(d)?(d)?(dd)?(ddd)?'
r1 = re.compile(pat1)
pat2 = '(?:(?<=^d)|(?<=^d{2})|(?<=^d{3})|(?<=^d{4})|(?<=^d{6}))(?=d)'
r2 = re.compile(pat2)
iterat = 20000
te = clock()
for i in xrange(iterat):
    '.'.join(filter(None,r1.match('123456789').groups('')))
print clock()-te
print '  ','.'.join(filter(None,r1.match('123456789').groups('')))
te = clock()
for i in xrange(iterat):
    r2.sub('.','123456789')
print clock()-te
print '  ',r2.sub('.','123456789')
te = clock()
for i in xrange(iterat):
    fmt('123456789')
print clock()-te
print '  ',fmt('123456789')
te = clock()
for i in xrange(iterat):
    formatn('123456789')
print clock()-te
print '  ',formatn('123456789')
print '-----------------------------'
te = clock()
for i in xrange(iterat):
    '.'.join(filter(None,r1.match('123456').groups()))
print clock()-te
print '  ','.'.join(filter(None,r1.match('123456').groups()))
te = clock()
for i in xrange(iterat):
    r2.sub('.','123456')
print clock()-te
print "  ",r2.sub('.','123456')
te = clock()
for i in xrange(iterat):
    fmt('123456')
print clock()-te
print '  ',fmt('123456')
te = clock()
for i in xrange(iterat):
    formatn('123456789')
print clock()-te
print '  ',formatn('123456789')
print '-----------------------------'
te = clock()
for i in xrange(iterat):
    '.'.join(filter(None,r1.match('1234').groups()))
print clock()-te
print '  ','.'.join(filter(None,r1.match('1234').groups()))
te = clock()
for i in xrange(iterat):
    r2.sub('.','1234')
print clock()-te
print '  ',r2.sub('.','1234')
te = clock()
for i in xrange(iterat):
    fmt('1234')
print clock()-te
print '  ',fmt('1234')
te = clock()
for i in xrange(iterat):
    formatn('1234')
print clock()-te
print '  ',formatn('1234')

结果

0.186308036357
   1.2.3.4.56.789
0.397971250536
   1.2.3.4.56.789
0.258452959804
   1.2.3.4.56.789
1.9979410791
   1.2.3.4.56.789
-----------------------------
0.208518959812
   1.2.3.4.56
0.319339748488
   1.2.3.4.56
0.247042291688
   1.2.3.4.56
1.97725548918
   1.2.3.4.56.789
-----------------------------
0.179872581571
   1.2.3.4
0.273376644238
   1.2.3.4
0.207427200943
   1.2.3.4
1.9792909434
   1.2.3.4

编辑

受到卢卡斯·格拉夫(Lukas Graf)回答的启发:

def fmt2(num):
    a = '.'.join(num[:4])
    b = num[4:6]
    c = num[6:]
    return '%s.%s.%s' % (a,b,c) if c 
           else a + '.' + b if b else a

最新更新