在 c 的文本文件中查找单词频率与单词总数的比率时出现问题


#include<conio.h>
#include<stdio.h>
#include<iostream.h>
#define NULL 0

int main()
{
    char name[20],c;
    int nw=0;
    int j=0;
    int t=0;
    char s[] = "newas";  // find the frequency of this word in abc.txt
    char p[5];
    FILE *fpt;
    //printf("Enter the name of file to be checked:- ");
    //gets(name);
    fpt=fopen("abc.txt","r");
    if (fpt==NULL)
    {
        printf("ERROR - can/'t open file %s",name);
        getch();
        exit(0);
    }
    else
    {
        while ((c=getc(fpt))!=EOF)
        {
            switch(1)
            {
                case 1:
                    if (c==' ')
                    {
point:
                        while((c=getc(fpt))==' ');
                        if (c!=' ')
                            nw=nw+1;
                        // if(c==' ')
                            // nw--;
                        if(j < 5)
                            p[j++] = c;
                        printf("n %c ",p[j]);
                        if(j == 5)
                        {
                            if(p == s)
                            {
                                t++;
                                j = 0;    
                            }
                        }
                    }
                    if(c==' ')
                    {
                        j = 0;
                        goto point;
                    }
            }
        }
    }
    printf("n The no. of words is %d. ",nw);
    printf("n Freq of words %s is %d. ",s,t);
    getch();
}

上面的代码给出了单词总数的正确答案,但没有给出特定单词[在给定代码中]的频率的正确值,请对此发表评论,如何计算文本文件中特定单词的频率。

由于您包括iostream.h,我猜这应该是某种形式的C++,而不是C。如果是这样,这就是你做词频的方式:

#include <iostream>
#include <map>
#include <string>
#include <fstream>
using namespace std;
typedef map <string, int> FreqMap;
int main() {
    FreqMap frequencies;
    ifstream ifs( "words.txt" );
    string word;
    while( ifs >> word ) {
        frequencies[ word ] += 1;
    }
    for ( FreqMap::const_iterator it = frequencies.begin();
            it != frequencies.end(); ++it ) {
        cout << it->first << " " << it->second << "n";
    }
}

这个寻找有趣单词的代码:

                        if(p == s)
                        {
                            t++;
                            j = 0;    
                        }

错了。你不能像在 C 中那样比较字符串,这只比较指针值,而不是指向的字符(字符串的内容)。

假设代码的其余部分设置正确,以便p真正指向一个真正的字符串,你可以这样做:

if(strcmp(p, s) == 0)
{
  t++;
  j = 0;
}

这要求p指向一个完全以 0 结尾的字符串,如果它指向一行中间的某个字符,则上述字符将不起作用。

我并没有完全回答这个问题,但这是一些可能对你有所帮助的反馈......

#include<conio.h>
#include<stdio.h>
#include<iostream.h>
#define NULL 0

int main()
{
/* 
 * GIVE YOUR VARIABLES NAMES THAT MAKE SENSE
 * j, t, c, s, nw are meaningless to anybody picking up the code 
 */
    char name[20],c;
    int nw=0;
    int j=0;
    int t=0;
    char s[] = "newas";  // find the frequency of this word in abc.txt
/* 
 * Personally, I'd tend to have p as an array of 6, so that it's the same size as
 * s and I'd initialize it to "", so that it's got a null terminator.
 */
    char p[5]; 
    FILE *fpt;
    fpt=fopen("abc.txt","r");
    if (fpt==NULL)
    {
        printf("ERROR - can/'t open file %s",name);
        getch();
        exit(0);
    }
/*
 * you don't need an else here... the other flow has already terminated */
 */
    else
    {
        while ((c=getc(fpt))!=EOF)
        {
/*
 * What is the point of this switch statement?  It may as well say if(true)
 */
            switch(1)
            {
                case 1:
                    if (c==' ')
                    {
/*
 * If you start using goto's in your code, it's usually a good sign that there's
 * something wrong
 */
point:
/*
 * It's hard to follow what you're doing because your variables don't have names
 * and your code has no clear intent.  If the while loop was in a function
 * 'SkipToNextWord', the intent would be clearer, which would make it easier to find
 * issues.  What happens if there is a space at the end of your file?
 */
                        while((c=getc(fpt))==' ');
/*
 * 'c' is never going to equal ' ', if it did, you'd still be in the while loop
 */
                        if (c!=' ')
                            nw=nw+1;
                        // if(c==' ')
                            // nw--;
                        if(j < 5)
                            p[j++] = c;
                        printf("n %c ",p[j]);
/*
 * This as written, could be a compound if statement...
 *     if(j == 5 && p == s)
 */
                        if(j == 5)
                        {
/*
 * However, it looks like you're trying to do a string comparison?
 *     if(strncmp(p, s, sizeof(s)-1)==0)
 */
                            if(p == s)
                            {
                                t++;
/* 
 * This 'j=0' should be outside of the inner if, otherwise if there isn't a match
 * you don't reset j to 0
 */
                                j = 0;    
                            }
                        }
                    }
/* 
 * If you have a six letter word in your file, j is never reset to
 * 0 and next time round the loop, you're not going to collect the
 * letters correctly
 */
                    if(c==' ')
                    {
                        j = 0;
                        goto point;
                    }
            }
        }
    }
    printf("n The no. of words is %d. ",nw);
    printf("n Freq of words %s is %d. ",s,t);
    getch();
}

我认为以下代码将回答您的问题:

#include <stdio.h>
#include <conio.h>
int main(int argc, char* argv[])
{
    char*   name = "abc.txt";
    char*   word = "newas";
    FILE*   fpt = fopen(name, "rt");
    int     c;
    int     nw = 0;
    int     t = 0;
    int     i;
    if (fpt == NULL)
    {
        printf("ERROR - can't open file %sn", name);
        getch();
        return 0;
    }
    while ((c = getc(fpt)) != EOF)
    {
        // Skip spaces
        if (c == ' ')
            continue;   
        // Increase num of words
        nw++;
        // Check match
        i = 0;
        while ((c != EOF) && (c != ' ') && ((char)c == word[i]) && (word[i] != ''))
        {
            c = getc(fpt);
            i++;
        }
        if (((c == ' ') || (c == EOF)) && (word[i] == ''))
            t++; 
        // skip this word
        while ((c != EOF) && (c != ' '))
            c = getc(fpt);
    }
    fclose(fpt);
    printf("n The no. of words is %d.n", nw);
    printf("n Freq of words %s is %d.n", word, t);
    getch();
    return 0;
}

最新更新