从 tzdata 中提取历史闰秒

有没有办法从分布在大多数Linux发行版上的时区数据库中提取历史闰秒的时刻？我正在寻找 python 的解决方案，但任何在命令行上工作的东西也可以。

我的用例是在 gps-time(基本上是自 1980 年第一颗 GPS 卫星打开以来的秒数(和 UTC 或本地时间之间进行转换。UTC时不时地调整闰秒，而GPS时间线性增加。这相当于在 UTC 和 TAI 之间进行转换。TAI 也忽略闰秒，因此 TAI 和 GPS 时间应始终以相同的偏移量演变。在工作中，我们使用GPS时间作为同步世界各地天文观测的时间标准。

我有在 gps 时间和 UTC 之间转换的工作函数，但我不得不硬编码一个闰秒表，我在这里得到(文件tzdata2013xx.tar.gz包含一个名为 leapseconds 的文件(。当宣布新的闰秒时，我必须每隔几年手动更新此文件。我更愿意从标准 tzdata 中获取此信息，该标准 tzdata 每年通过系统更新自动更新几次。

我很确定这些信息隐藏在/usr/share/zoneinfo/某处的某些二进制文件中。我已经能够使用 struct.unpack 提取其中的一些(man tzfile提供了有关格式的一些信息(，但我从未让它完全工作。是否有任何标准软件包可以访问此信息？我知道 pytz，它似乎从同一个数据库中获取标准的 DST 信息，但它无法访问闰秒。我也找到了tai64n，但查看它的源代码，它只包含一个硬编码表。

编辑

受到 steveha 的回答和 pytz/tzfile.py 中的一些代码的启发，我终于得到了一个有效的解决方案(在 py2.5 和 py2.7 上测试(：

from struct import unpack, calcsize
from datetime import datetime
def print_leap(tzfile = '/usr/share/zoneinfo/right/UTC'):
    with open(tzfile, 'rb') as f:
        # read header
        fmt = '>4s c 15x 6l'
        (magic, format, ttisgmtcnt, ttisstdcnt,leapcnt, timecnt,
            typecnt, charcnt) =  unpack(fmt, f.read(calcsize(fmt)))
        assert magic == 'TZif'.encode('US-ASCII'), 'Not a timezone file'
        print 'Found %i leapseconds:' % leapcnt
        # skip over some uninteresting data
        fmt = '>%(timecnt)dl %(timecnt)dB %(ttinfo)s %(charcnt)ds' % dict(
            timecnt=timecnt, ttinfo='lBB'*typecnt, charcnt=charcnt)
        f.read(calcsize(fmt))
        #read leap-seconds
        fmt = '>2l'
        for i in xrange(leapcnt):
            tleap, nleap = unpack(fmt, f.read(calcsize(fmt)))
            print datetime.utcfromtimestamp(tleap-nleap+1)

有结果

In [2]: print_leap()
Found 25 leapseconds:
1972-07-01 00:00:00
1973-01-01 00:00:00
1974-01-01 00:00:00
...
2006-01-01 00:00:00
2009-01-01 00:00:00
2012-07-01 00:00:00

虽然这确实解决了我的问题，但我可能不会选择这个解决方案。相反，我将在我的代码中包含 leap-seconds.list，正如 Matt Johnson 所建议的那样。这似乎是用作tzdata来源的权威列表，并且可能由NIST每年更新两次。这意味着我将不得不手动进行更新，但此文件易于解析，并且包含到期日期(tzdata 似乎缺少(。

我只是做了man 5 tzfile并计算了一个偏移量，该偏移量可以找到闰秒信息，然后读取闰秒信息。

您可以取消注释"DEBUG："打印语句以查看它在文件中找到的更多内容。

编辑：程序更新为现在正确。它现在使用文件/usr/share/zoneinfo/right/UTC，现在查找要打印的闰秒。

原始程序没有跳过 timezeone 缩写字符，这些字符记录在手册页中，但有点隐藏("......tt_abbrind用作文件中 ttinfo 结构后面的时区缩写字符数组的索引。

import datetime
import struct
TZFILE_MAGIC = 'TZif'.encode('US-ASCII')
def leap_seconds(f):
    """
    Return a list of tuples of this format: (timestamp, number_of_seconds)
        timestamp: a 32-bit timestamp, seconds since the UNIX epoch
        number_of_seconds: how many leap-seconds occur at timestamp
    """
    fmt = ">4s c 15x 6l"
    size = struct.calcsize(fmt)
    (tzfile_magic, tzfile_format, ttisgmtcnt, ttisstdcnt, leapcnt, timecnt,
        typecnt, charcnt) =  struct.unpack(fmt, f.read(size))
    #print("DEBUG: tzfile_magic: {} tzfile_format: {} ttisgmtcnt: {} ttisstdcnt: {} leapcnt: {} timecnt: {} typecnt: {} charcnt: {}".format(tzfile_magic, tzfile_format, ttisgmtcnt, ttisstdcnt, leapcnt, timecnt, typecnt, charcnt))
    # Make sure it is a tzfile(5) file
    assert tzfile_magic == TZFILE_MAGIC, (
            "Not a tzfile; file magic was: '{}'".format(tzfile_magic))
    # comments below show struct codes such as "l" for 32-bit long integer
    offset = (timecnt*4  # transition times, each "l"
        + timecnt*1  # indices tying transition time to ttinfo values, each "B"
        + typecnt*6  # ttinfo structs, each stored as "lBB"
        + charcnt*1)  # timezone abbreviation chars, each "c"
    f.seek(offset, 1) # seek offset bytes from current position
    fmt = '>{}l'.format(leapcnt*2)
    #print("DEBUG: leapcnt: {}  fmt: '{}'".format(leapcnt, fmt))
    size = struct.calcsize(fmt)
    data = struct.unpack(fmt, f.read(size))
    lst = [(data[i], data[i+1]) for i in range(0, len(data), 2)]
    assert all(lst[i][0] < lst[i+1][0] for i in range(len(lst)-1))
    assert all(lst[i][1] == lst[i+1][1]-1 for i in range(len(lst)-1))
    return lst
def print_leaps(leap_lst):
    # leap_lst is tuples: (timestamp, num_leap_seconds)
    for ts, num_secs in leap_lst:
        print(datetime.datetime.utcfromtimestamp(ts - num_secs+1))
if __name__ == '__main__':
    import os
    zoneinfo_fname = '/usr/share/zoneinfo/right/UTC'
    with open(zoneinfo_fname, 'rb') as f:
        leap_lst = leap_seconds(f)
    print_leaps(leap_lst)

PyEphem有一个delta_t函数，它返回地面时间和世界时间(秒(之间的差异。您可以从中减去 32.184 以获得闰秒 (ref(。

import ephem, datetime
ephem.delta_t(datetime.datetime.now()) - 32.184
Out[2]: 35.01972996360122

相关内容

最新更新

热门标签：