我有一个程序,它读取一个10M字节的文件,并在以4K块读取数据时处理数据。测试通常需要1分钟-2分钟。但也有一些情况下,程序需要超过10分钟,在这一点上,它终止了测试并生成了核心。以下是读取文件的代码:
string filename("data.out");
ifstream ifs;
vector<char> buf(4096);
ifs.open(filename, ios::in | ios::binary);
if (!ifs.is_open()) {
cout << "ERROR : " << filename << "can't be opened." << endl;
VERIFY(ifs.is_open());
}
while (!ifs.eof()) {
ifs.read(buf.data(), buf.size()); <======== Line 1
process_data (buf.data(), ifs.gcount()); <======== Line 2
}
ifs.close();
我有两个核心,显示程序卡在第1行和第2行。
1号线岩心1的bt顶部:
#0 0x00007f942a462175 in std::istream::read (this=0x7fff4ce69de0,
__s=0x9120000 "324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324324"..., __n=4096) at /home/packages/gcc/4.7/w/gcc-4.7-4.7.2/build/x86_64-linux-gnu/libstdc++-v3/include/bits/istream.tcc:651
2号线岩心2的bt顶部:
#0 0x00000000004375f3 in std::__addressof<char> (__r=@0x7fa3176391a6: -128 '200') at /usr/include/c++/4.7/bits/move.h:47
#1 0x0000000000436cd4 in std::vector<char, std::allocator<char> >::data (this=0x7fff346ad770)
at /usr/include/c++/4.7/bits/stl_vector.h:859
最初,在core1中,我认为问题在于ifs.read()花费了很长时间。但在第二个核心之后,我认为这个问题可能与vector::data()有关。
有没有一种方法可以通过检查ifstream中存储的某些字段(例如文件偏移量)来判断文件的任何部分是否已被读取。
我不喜欢发布大结构的垃圾堆,但如果有人能告诉我如何从这个垃圾堆中计算出10MB的阅读量。
(gdb) p ifs
$3 = warning: can't find linker symbol for virtual table for `std::basic_ifstream<char, std::char_traits<char> >' value
{
<std::basic_istream<char, std::char_traits<char> >> = {
<std::basic_ios<char, std::char_traits<char> >> = {
<std::ios_base> = {
_vptr.ios_base = 0xfbfcc0,
static boolalpha = std::_S_boolalpha,
static dec = std::_S_dec,
static fixed = std::_S_fixed,
static hex = std::_S_hex,
static internal = std::_S_internal,
static left = std::_S_left,
static oct = std::_S_oct,
static right = std::_S_right,
static scientific = std::_S_scientific,
static showbase = std::_S_showbase,
static showpoint = std::_S_showpoint,
static showpos = std::_S_showpos,
static skipws = std::_S_skipws,
static unitbuf = std::_S_unitbuf,
static uppercase = std::_S_uppercase,
static adjustfield = std::_S_adjustfield,
static basefield = std::_S_basefield,
static floatfield = std::_S_floatfield,
static badbit = std::_S_badbit,
static eofbit = std::_S_eofbit,
static failbit = std::_S_failbit,
static goodbit = std::_S_goodbit,
static app = std::_S_app,
static ate = std::_S_ate,
static binary = std::_S_bin,
static in = std::_S_in,
static out = std::_S_out,
static trunc = std::_S_trunc,
static beg = std::_S_beg,
static cur = std::_S_cur,
static end = std::_S_end,
_M_precision = 6,
_M_width = 0,
_M_flags = 4098,
_M_exception = std::_S_goodbit,
_M_streambuf_state = 5,
_M_callbacks = 0x0,
_M_word_zero = {
_M_pword = 0x0,
_M_iword = 0
},
_M_local_word = {{
_M_pword = 0x0,
_M_iword = 0
}, {
_M_pword = 0x0,
_M_iword = 0
}, {
_M_pword = 0x0,
_M_iword = 0
}, {
_M_pword = 0x0,
_M_iword = 0
}, {
_M_pword = 0x0,
_M_iword = 0
}, {
_M_pword = 0x0,
_M_iword = 0
}, {
_M_pword = 0x0,
_M_iword = 0
}, {
_M_pword = 0x0,
_M_iword = 0
}},
_M_word_size = 8,
_M_word = 0x7fff4ce69f20,
_M_ios_locale = {
static none = 0,
static ctype = 1,
static numeric = 2,
static collate = 4,
static time = 8,
static monetary = 16,
static messages = 32,
static all = 63,
_M_impl = 0x7f942a6e3aa0,
static _S_classic = 0x7f942a6e3aa0,
static _S_global = 0x7f942a6e3aa0,
static _S_categories = 0x7f942a6c86a0,
static _S_once = 2
}
},
members of std::basic_ios<char, std::char_traits<char> >:
_M_tie = 0x0,
_M_fill = 0 ' 00',
_M_fill_init = false,
_M_streambuf = 0x7fff4ce69df0,
_M_ctype = 0x7f942a6e3d20,
_M_num_put = 0x7f942a6e4040,
_M_num_get = 0x7f942a6e4030
},
members of std::basic_istream<char, std::char_traits<char> >:
_vptr.basic_istream = 0xfbfc98,
_M_gcount = 0
},
members of std::basic_ifstream<char, std::char_traits<char> >:
_M_filebuf = warning: can't find linker symbol for virtual table for `std::basic_filebuf<char, std::char_traits<char> >' value
{
<std::basic_streambuf<char, std::char_traits<char> >> = {
_vptr.basic_streambuf = 0xfc0a70,
_M_in_beg = 0x6306000 "317317317......320320320320"...,
_M_in_cur = 0x6307fff "",
_M_in_end = 0x6307fff "",
_M_out_beg = 0x0,
_M_out_cur = 0x0,
_M_out_end = 0x0,
_M_buf_locale = {
static none = 0,
static ctype = 1,
static numeric = 2,
static collate = 4,
static time = 8,
static monetary = 16,
static messages = 32,
static all = 63,
_M_impl = 0x7f942a6e3aa0,
static _S_classic = 0x7f942a6e3aa0,
static _S_global = 0x7f942a6e3aa0,
static _S_categories = 0x7f942a6c86a0,
static _S_once = 2
}
},
members of std::basic_filebuf<char, std::char_traits<char> >:
_M_lock = {
__data = {
__lock = 0,
__count = 0,
__owner = 0,
__nusers = 0,
__kind = 0,
__spins = 0,
__list = {
__prev = 0x0,
__next = 0x0
}
},
__size = ' 00' <repeats 39 times>,
__align = 0
},
_M_file = {
_M_cfile = 0x70186c0,
_M_cfile_created = true
},
_M_mode = 12,
_M_state_beg = {
__count = 0,
__value = {
__wch = 0,
__wchb = " 00 00 00"
}
},
_M_state_cur = {
__count = 0,
__value = {
__wch = 0,
__wchb = " 00 00 00"
}
},
_M_state_last = {
__count = 0,
__value = {
__wch = 0,
__wchb = " 00 00 00"
}
},
_M_buf = 0x6306000 "317317317317317......320320320320320"...,
_M_buf_size = 8192,
_M_buf_allocated = true,
_M_reading = true,
_M_writing = false,
_M_pback = 0 ' 00',
_M_pback_cur_save = 0x0,
_M_pback_end_save = 0x0,
_M_pback_init = false,
_M_codecvt = 0x7f942a6e3f60,
_M_ext_buf = 0x0,
_M_ext_buf_size = 0,
_M_ext_next = 0x0,
_M_ext_end = 0x0
}
}
(gdb)
谢谢,艾哈迈德。
不要在eof
上循环。
while (ifs.read(buf.data(), buf.size())) {
size_t read = ifs.gcount();
if(read==0) break; // don't trust passing `0` to `process_data`:
process_data(buf.data(), read);
if (read<buf.size()) break; // if we finished, end.
}
最好通过尝试io并注意到出现了问题来找到输入的末尾。在这种情况下,我们读取,计算我们读取的字节数,当我们读取0个字节或读取的字节比预期读取的字节少时,我们就决定没有更多的数据了。
如果IO操作在ifs
上设置了任何故障位,我们也会结束。