C - Linux:列出稀疏文件并打印 0 填充的磁盘块



我现在正在尝试实现以下任务数小时。但它仍然不起作用,我真的不知道进一步。

任务:
如果在命令行或程序中传递参数 -s 时,c 程序会列出当前目录中的所有稀疏文件,该程序应打印已经表示文件中间隙的磁盘块数以及 0 填充但占用磁盘空间的磁盘块数,如果使用 -c 而不是 -s。

代码:

int main(int argc, char* argv[]) {
if(argc > 3) {
fprintf(stderr,"usage: sparse-files [-s|-c <file name>]n");
exit(EXIT_FAILURE);
}
DIR *dirp;
struct dirent *dp;
struct stat st = {0};
int option, sflag = 0, cflag = 0;
if ((dirp = opendir(".")) == NULL)
perror("Could not open '.'");
while((option = getopt(argc, argv,"sc:")) != -1) {
switch(option) {
case 'c':
cflag = 1;
break;
case 's':
sflag = 1;
break;
default:
break;
}
}
off_t sz;
while((dp = readdir(dirp)) != NULL) {
int counter = 0, counter2 = 0;
if(dp->d_type == DT_REG) {
if (sflag) {
char * file = dp->d_name;
if(stat(file, &st) == -1)
perror("stat()");
sz = (st.st_size + st.st_blksize -1) & ~st.st_blksize;
if ((st.st_blocks * st.st_blksize) < sz)
printf("%sn", dp->d_name);
} else if (cflag) {
char * file = dp->d_name;
if(stat(file, &st) == -1)
perror("stat()");
int fd = open(file, O_RDONLY);
sz = (st.st_size + st.st_blksize -1) & ~st.st_blksize;
if ((st.st_blocks * st.st_blksize) < sz) {
while(lseek(fd, 0, SEEK_HOLE) != -1)
counter++;
while(lseek(fd, 0, SEEK_DATA) != -1) 
counter2++;
printf("%d %d %sn", counter, counter2, file);
close(fd);
}
}
}
closedir(dirp);
return 0;
}

我真的不知道如何处理它。我真的希望有人能够提供帮助。

您可能知道,给定调用stat()返回struct stat infoinfo.st_size是文件的总大小(以字节为单位),info.st_blocks*512是存储在磁盘上的字节数。

在 Linux 中,文件系统将数据存储在对齐的info.st_blksize字节块中。(这也意味着info.st_blocks*512可以大于info.st_size(最多info.st_blksize-1字节)。如果文件稀疏,它也可以更小。

未存储的数据(稀疏文件中的漏洞)和显式清零的存储数据都读取为零。

如果你想知道文件中有多少个零填充块,你需要读取整个文件。使用大小为info.st_blksize字节的整数倍的缓冲区。对于每个对齐的st_blksize字节块,检查它们是否都是零。设total_blocks块总数(包括最后一个可能的部分块),并zero_blocks所有内容为零的块数。

struct stat  info;
/* Number of filesystem blocks for the file */
total_blocks = info.st_size / info.st_blksize
+ (info.st_size % info.st_blksize) ? 1 : 0;
/* Number of bytes stored for the file */
stored_bytes = 512 * info.st_blocks;
/* Number of filesystem blocks used for file data */
stored_blocks = stored_bytes / info.st_blksize
+ (stored_bytes % info.st_blksize) ? 1 : 0;
/* Number of sparse blocks */
sparse_blocks = total_blocks - stored_blocks;
/* TODO: count zero_blocks,
*       by reading file in info.st_blksize chunks,
*       and saving the number of all-zero chunks
*       in zero_blocks. */
/* Number of stored zero blocks */
zeroed_blocks = zero_blocks - sparse_blocks;

转换为字节,你有

  • info.st_size是以字节为单位的文件大小
  • stored_blocks*info.st_blksize是磁盘上使用的字节数
  • sparse_blocks*info.st_blksize是磁盘上稀疏孔中的字节数
  • zeroed_blocks*info.st_blksize是磁盘上不必要的存储零字节的数量;这些字节可以存储为稀疏的洞

请注意,您可以使用cp --sparse=always --preserve=all SOURCEFILE TARGETFILE来创建文件的相同副本,但"优化"稀疏性,以便将足够长的零字节运行存储为孔;这可能有助于您测试程序。有关详细信息,请参阅man 1 cp。您还可以使用dd if=/dev/zero of=TARGETFILE bs=BLOCKSIZE count=BLOCKS创建零的长序列;有关详细信息,请参阅man 1 ddman 4 null


编辑添加:

这是一个示例函数,examine(),它打开指定的文件,获取统计信息,并在必要时(即请求不必要的存储零的数量),读取整个文件。

我只是稍微测试了一下,但它应该实现上面的逻辑。

它非常粗糙;我最关注的是正确的错误检查和动态内存分配/释放的正确性。(它应该检查并返回所有错误条件,甚至是一些不应该发生的错误条件,并且永远不会泄漏内存。也就是说,除非我在代码中有错误或想法 - 否则非常欢迎更正。

最好将其拆分为更小、更易于管理的功能。

#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdint.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
/* Return nonzero if the buffer is all zeros.
*/
static inline int is_zero(const void *const ptr, const size_t len)
{
const char       *p = (const char *)ptr;
const char *const q = (const char *const)ptr + len;
while (p < q)
if (*(p++))
return 0;
return 1;
}
/* Return 0 if success, errno error code otherwise.
*   (*sizeptr):       File size in bytes
*   (*blocksizeptr):  File block size in bytes
*   (*storedptr):     Bytes stored on disk
*   (*sparseptr):     Bytes in sparse holes
*   (*zeroedptr):     Unnecessarily stored zero bytes
* If zeroedptr is NULL, the file is only opened and
* statistics obtained via fstat(). Otherwise, the entire
* file will be read.
* Special errors:
*   EINVAL: NULL or empty file name
*   EISDIR: Name refers to a directory
*   EISNAM: Name refers to a pipe or device
*   EBUSY:  File was modified during read
*/
int examine(const char *const filename,
uint64_t *const sizeptr,
uint64_t *const blocksizeptr,
uint64_t *const storedptr,
uint64_t *const sparseptr,
uint64_t *const zeroedptr)
{
struct stat  info;
int          fd, result;
size_t       size, have;
uint64_t     total, nonzero, stored;
int          cause = 0;
char        *data = NULL;
/* Check for NULL or empty filename. */
if (!filename || !*filename)
return errno = EINVAL;
/* Open the specified file. */
do {
fd = open(filename, O_RDONLY | O_NOCTTY);
} while (fd == -1 && errno == EINTR);
if (fd == -1)
return errno;
do {
/* Obtain file statistics. */
if (fstat(fd, &info) == -1) {
cause = errno;
break;
}
/* Count total, rounding up to next multiple of block size. */
total = (uint64_t)info.st_size;
if (total % (uint64_t)info.st_blksize)
total += (uint64_t)info.st_blksize - ((uint64_t)total % (uint64_t)info.st_blksize);
/* Count total stored bytes. */
stored = (uint64_t)512 * (uint64_t)info.st_blocks;
/* Fill in immediately known fields. */
if (sizeptr)
*sizeptr = (uint64_t)info.st_size;
if (blocksizeptr)
*blocksizeptr = (uint64_t)info.st_blksize;
if (storedptr)
*storedptr = stored;
if (sparseptr) {
if (total > stored)
*sparseptr = total - stored;
else
*sparseptr = 0;
}
if (zeroedptr)
*zeroedptr = 0;
/* Verify we have a regular file. */
if (S_ISDIR(info.st_mode)) {
cause = EISDIR;
break;
} else
if (!S_ISREG(info.st_mode)) {
cause = EISNAM;
break;
}
/* Verify we have a valid block size. */
if (info.st_blksize < (blksize_t)1) {
cause = ENOTSUP;
break;
}
/* If zeroedptr is NULL, we do not need to read the file. */
if (!zeroedptr) {
/* Close descriptor and return success. */
do {
result = close(fd);
} while (result == -1 && errno == EINTR);
if (result == -1)
return errno;
return 0;
}
/* Use large enough chunks for I/O. */
if (info.st_blksize < (blksize_t)131072) {
const size_t chunks = (size_t)131072 / (size_t)info.st_blksize;
size = chunks * (size_t)info.st_blksize;
} else
size = (size_t)info.st_blksize;
/* Allocate buffer. */
data = malloc(size);
if (!data) {
cause = ENOMEM;
break;
}
/* Clear counters. */
total = 0;
nonzero = 0;
have = 0;
/* Read loop. */
while (1) {
size_t  i;
ssize_t bytes;
int     ended = 0;
while (have < (size_t)info.st_blksize) {
bytes = read(fd, data + have, size - have);
if (bytes > (ssize_t)0) {
have += bytes;
total += (uint64_t)bytes;
} else
if (bytes == (ssize_t)0) {
/* Clear the end of the buffer; just to be sure */
memset(data + have, 0, size - have);
ended = 1;
break;
} else
if (bytes != (ssize_t)-1) {
cause = EIO;
break;
} else
if (errno != EINTR) {
cause = errno;
break;
}
}
if (cause)
break;
/* Count number of zero/nonzero chunks in buffer, but add up as bytes. */
i = have / (size_t)info.st_blksize;
while (i-->0)
if (!is_zero(data + i * (size_t)info.st_blksize, (size_t)info.st_blksize))
nonzero += (uint64_t)info.st_blksize;
/* Followed by a partial chunk? */
{   const size_t overlap = have % (size_t)info.st_blksize;
if (overlap) {
if (have > overlap)
memcpy(data, data + have - overlap, overlap);
have = overlap;
} else
have = 0;
}
/* Next round of the loop, unless end of input. */
if (!ended)
continue;
/* Entire file has been processed. */
/* Partial chunk in buffer? */
if (have) {
if (!is_zero(data, have))
nonzero += (uint64_t)info.st_blksize;
}
/* If file size changed, update statistics. */
if (total != (uint64_t)info.st_size) {
if (fstat(fd, &info) == -1) {
cause = errno;
break;
}
/* File changed from under us? */
if (total != (uint64_t)info.st_size) {
cause = EBUSY;
break;
}
}
/* Align total size to (next) multiple of block size. */
if (total % (uint64_t)info.st_blksize)
total += (uint64_t)info.st_blksize - (total % (uint64_t)info.st_blksize);
/* Bytes stored on disk. */
stored = (uint64_t)512 * (uint64_t)info.st_blocks;
/* Sanity check. (File changed while we read it?) */
if (stored > total || nonzero > stored) {
cause = EBUSY;
break;
}
/* Update fields. */
if (sizeptr)
*sizeptr = (uint64_t)info.st_size;
if (storedptr)
*storedptr = (uint64_t)512 * (uint64_t)info.st_blocks;
if (sparseptr)
*sparseptr = total - stored;
if (zeroedptr)
*zeroedptr = (total - nonzero) - (total - stored);
/* Discard buffer. */
free(data);
/* Close file and return. */
do {
result = close(fd);
} while (result == -1 && errno == EINTR);
if (result == -1)
return errno;
return 0;
}
} while (0);
/* Free buffer, if allocated. free(NULL) is safe. */
free(data);
/* Close file, and return with cause. */
do {
result = close(fd);
} while (result == -1 && errno == EINTR);
return errno = cause;
}

为了便于移植,所有返回参数都是 64 位无符号整数, 并指定相应的大小(以字节为单位)。请注意,(*storedptr)+(*sparseptr)定义了向上舍入到(*blocksizeptr)的下一个倍数的字节总数。(*zeroesptr)仅包括显式存储的零,不包括稀疏孔。同样,(*zeroesptr)视为不必要地存储的零的数量。

我使用rm -f test ; dd if=/dev/zero of=test bs=10000 seek=3 count=1生成了一个test文件,其中包含一个 30,000 字节的漏洞,后跟 10,000 个零。examine()返回size=40000blocksize=4096stored=12288sparse=28672zeroed=12288,这对我来说似乎是正确的。

问题?

最新更新