c-计算文件中的字符、单词和行数



我尝试计算文件中的字符、单词和行数。txt文件为:

The snail moves like a
Hovercraft, held up by a
Rubber cushion of itself,
Sharing its secret

这是代码,

void count_elements(FILE* fileptr, char* filename, struct fileProps* properties) // counts chars, words and lines 
{
fileptr = fopen(filename, "rb"); 
int chars = 0, words = 0, lines = 0; 
char ch;
while ((ch = fgetc(fileptr)) != EOF  )
{
if(ch != ' ') chars++;
if (ch == 'n') // check lines 
lines++;
if (ch == ' ' || ch == 't' || ch == 'n' || ch == '') // check words
words++;


}
fclose(fileptr); 
properties->char_count = chars;
properties->line_count = lines; 
properties->word_count = words;
}

但当我打印字符、单词和行的数量时,输出分别为81、18和5我错过了什么?(读取模式不会改变任何东西,我也尝试了"r"(

我提出的解决方案给我的结果与gedit文档统计数据相同:

#include <stdio.h>
void count_elements(char* filename)
{
// This can be a local variable as its not used externally. You do not have to put it into the functions signature.
FILE *fileptr = fopen(filename, "rb"); 
int chars = 0, words = 0, lines = 0; 
int read;
unsigned char last_char = ' '; // Save the last char to see if really a new word was there or multiple spaces
while ((read = fgetc(fileptr)) != EOF) // Read is an int as fgetc returns an int, which is a unsigned char that got casted to int by the function (see manpage for fgetc)
{
unsigned char ch = (char)read; // This cast is safe, as it was already checked for EOF, so its an unsigned char.
if (ch >= 33 && ch <= 126) // only do printable chars without spaces
{
++chars;
}
else if (ch == 'n' || ch == 't' || ch == '' || ch == ' ')
{
// Only if the last character was printable we count it as new word
if (last_char >= 33 && last_char <= 126)
{
++words;
}
if (ch == 'n')
{
++lines;
}
}
last_char = ch;     
}
fclose(fileptr); 

printf("Chars: %dn", chars);
printf("Lines: %dn", lines);
printf("Words: %dn", words);
}
int main()
{
count_elements("test");
}

有关备注和解释,请参阅代码中的备注。该代码还将过滤掉任何其他特殊的控制序列,如windows CRLF,并只考虑LF

函数将FILE*filename都作为参数,其中一个应该删除。我删除了filename,以便该函数可以与任何FILE*一起使用,如stdin

#include <ctype.h>
#include <stdint.h>
#include <stdio.h>
typedef struct { /* type defining the struct for easier usage */
uintmax_t char_count;
uintmax_t word_count;
uintmax_t line_count;
} fileProps;
/* a helper function to print the content of a fileProps */
FILE* fileProps_print(FILE *fp, const fileProps *p) {
fprintf(fp,
"chars %jun"
"words %jun"
"lines %jun",
p->char_count, p->word_count, p->line_count);
return fp;
}
void count_elements(FILE *fileptr, fileProps *properties) {
if(!fileptr) return;
properties->char_count = 0;
properties->line_count = 0;
properties->word_count = 0;
char ch;
while((ch = fgetc(fileptr)) != EOF) {
++properties->char_count; /* count all characters */
/* use isspace() to check for whitespace characters */
if(isspace((unsigned char)ch)) {
++properties->word_count;      
if(ch == 'n') ++properties->line_count;
}
}
}
int main() {
fileProps p;
FILE *fp = fopen("the_file.txt", "r");
if(fp) {
count_elements(fp, &p);
fclose(fp);
fileProps_print(stdout, &p);
}
}

您在问题中显示的文件的输出:

chars 93
words 17
lines 4

编辑:我刚刚注意到你的评论">试图仅将字母表中的字母计数为char";。为此,您可以使用isalpha并将while循环替换为:

while((ch = fgetc(fileptr)) != EOF) {
if(isalpha((unsigned char)ch)) ++properties->char_count;
else if(isspace((unsigned char)ch)) {
++properties->word_count;
if(ch == 'n') ++properties->line_count;
}
}

修改版本输出:

chars 74
words 17
lines 4

一种能够读取";宽";字符(多字节(:

#include <locale.h>
#include <stdint.h>
#include <stdio.h>
#include <wchar.h>
#include <wctype.h>
typedef struct {
uintmax_t char_count;
uintmax_t word_count;
uintmax_t line_count;
} fileProps;
FILE* fileProps_print(FILE *fp, const fileProps *p) {
fprintf(fp,
"chars %jun"
"words %jun"
"lines %jun",
p->char_count, p->word_count, p->line_count);
return fp;
}
void count_elements(FILE *fileptr, fileProps *properties) {
if(!fileptr) return;
properties->char_count = 0;
properties->line_count = 0;
properties->word_count = 0;
wint_t ch;
while((ch = fgetwc(fileptr)) != WEOF) {
if(iswalpha(ch)) ++properties->char_count;
else if(iswspace(ch)) {
++properties->word_count;
if(ch == 'n') ++properties->line_count;
}
}
}
int main() {
setlocale(LC_ALL, "sv_SE.UTF-8");      // set your locale
FILE *fp = fopen("the_file.txt", "r");
if(fp) {
fileProps p;
count_elements(fp, &p);
fclose(fp);
fileProps_print(stdout, &p);
}
}

如果the_file.txt包含一行öäü,它将报告

chars 3
words 1
lines 1

对于您的原始文件,它会报告与上面相同的内容。

最新更新