我在这里做错了什么(尝试用 C 编写 *.srt 解析器)



我刚刚找到了一个mpv无法加载的*.srt文件。所以,我想让我们制作我自己的字幕解析器,将字幕的路径作为命令行参数。这是我尝试过的:

/* Intended to be a program for parsing *.srt subtitles as an alternative to video players' */
#include <ncurses.h>
#include <unistd.h>
#define SEC_IN_MIN 60
#define MIN_IN_HR 60
long get_duration(FILE *fp); // to get the duration of a dialogue in seconds
long turn_to_sec(int hours, int minutes, int seconds); // returns sum of hours and minutes, all in seconds
int main(int argc, char **argv)
{
    FILE *fp;
    long sec;
    char ch;
    if(argv[1] == NULL)
    {
        printf("Please enter a filename!n");
        return 1;
    }
    printf("Trying to open specified file %sn",argv[1]);
    fp = fopen(argv[1],"r");
    if(fp == NULL)
    {
        printf("Error while opening file %sn",argv[1]);
        return 1;
    }
    initscr(); // initialise nCurses window
    ch = getc(fp);
    while(ch != EOF)
    {
        clear();
        sec = get_duration(fp);
        while(1)
        {
            if((ch = getc(fp)) == 'n')
            {
                if((ch = getc(fp)) == 'n' || ch == EOF)
                    break;
                else
                    addch(ch);
            }
            addch(ch);
        }
        refresh();
        sleep(sec);
    }
    endwin(); // close nCurses
    fclose(fp); // close the file
    return 0;
}
long get_duration(FILE *fp)
{
    long duration = 0;
    char ch;
    short hour_start = 0, hour_end = 0, minute_start = 0, minute_end = 0, second_start = 0, second_end = 0;
    short count=0;
    /* just to get to the point where time-specs of the dialogue start */
    while((ch = getc(fp)) != 'n');
    /* extract characters until ':' to get hour_start */
    while((ch = getc(fp)) != 58)
    {
        hour_start += ch;
        count++;
    }
    hour_start -= (hour_start/(49*count));
    /* extract characters until ':' to get minute_start */
    count = 0;
    while((ch = getc(fp)) != 58)
    {
        minute_start += ch;
        count++;
    }
    minute_start -= (minute_start/(49*count));
    /* extract characters until ',' to get second_start */
    count = 0;
    while((ch = getc(fp)) != 44)
    {
        second_start += ch;
        count++;
    }
    second_start -= (second_start/(49*count));
    /* now, see if you can find a '>' */
    while((ch = getc(fp)) != 62);
    ch = getc(fp); // to get rid of that space after "-->"
    /* extract characters until ':' to get hour_end */
    while((ch = getc(fp)) != 58)
    {
        hour_end += ch;
        count++;
    }
    hour_end -= (hour_end/(49*count));
    /* extract characters until ':' to get minute_end */
    count = 0;
    while((ch = getc(fp)) != 58)
    {
        minute_end += ch;
        count++;
    }
    minute_end -= (minute_end/(49*count));
    /* extract characters until ',' to get second_end */;
    count = 0;
    while((ch = getc(fp)) != 44)
    {
        second_end += ch;
        count++;
    }
    second_end -= (second_end/(49*count));
    /* finally, gonna get those values */
    second_end -= second_start;
    minute_end -= minute_start;
    hour_end -= hour_start;
    duration += (turn_to_sec(hour_end, minute_end, second_end));
    /* positioning the fp to the right position just to keep the 'main()' :) */
    while((ch = getc(fp)) != 'n' || ch != EOF);
    return duration;
}
long turn_to_sec(int hours, int minutes, int seconds)
{
    long temp;
    /* manipulating hours */
    temp = hours;
    temp *= MIN_IN_HR;
    temp *= SEC_IN_MIN;
    seconds += temp;
    /* manipulating minutes */
    temp = minutes;
    temp *= SEC_IN_MIN;
    seconds += temp;
    return seconds;
}

在第一次尝试时,我只使用对话的开始时间作为对话的持续时间,即 end_time - start_time,这就是为什么缺少这部分:

/* extract characters until ':' to get hour_end */
    while((ch = getc(fp)) != 58)
    {
        hour_end += ch;
        count++;
    }
    hour_end = (hour_end/(49*count));
    /* extract characters until ':' to get minute_end */
    count = 0;
    while((ch = getc(fp)) != 58)
    {
        minute_end += ch;
        count++;
    }
    minute_end = (minute_end/(49*count));
    /* extract characters until ',' to get second_end */
    count = 0;
    while((ch = getc(fp)) != 44)
    {
        second_end += ch;
        count++;
    }
    second_end = (second_end/(49*count));

变量的名称有点不同,然后我意识到我错了,但这都无关紧要.我之所以这么说,是因为在那之前,代码运行良好(尽管有一些垃圾,但结果出乎意料(,但现在它只是卡住了,什么也没做。为什么?非常感谢您的时间!

这是我正在尝试的文件:https://gist.github.com/gaurav712/6646ad7dfd3c487536dce9b0712471e7

您的问题之一(可能是您现在遇到的问题(是getc()不返回读取字符的 ASCII 值。如果没有什么可读的,它也可能会返回 EOF。

由于您的循环在找到某个字符(例如":"(时结束,并且EOF绝对不是该字符,因此在这些情况下,您的程序将永远循环

我建议将该逻辑封装到数字读取函数中:

/**
 * Reads a positive number (hopefully less than INT_MAX) from a stream
 * Returns -1 if the stream is at end-of-file
 * 
 * @param FILE *fp
 * @return int            the number read, or -1 if EOF
 */
int readDigits(FILE *fp) {
    int value = 0, c;
    if (feof(fp)) {
        return -1;
    }
    for (;;) {
        c = fgetc(fp);
        // EOF is not a digit so we catch it in the digit check
        // if (EOF == c) {
        //    break;
        // }
        if ((c < '0') || (c > '9')) {
            break;
        }
        value *= 10;
        value += (c - '0');
    }
    return value;
}
int seconds(int h, int m, int s) {
    return (h*60 + m) * 60 + s;
}

现在您可以执行以下操作:

hour_start = readDigits(fp);
if (hour_start < 0) {
    // Error, do something
}
min_start = readDigits(fp);
// check if -1
sec_start = readDigits(fp);
// check if -1
sec_start = seconds(hour_start, min_start, sec_start);
...

我解决了它:

我用一大堆评论从头开始重写了它

/* Third attempt to create a subtitle parser
 * 29 March, 2019
 * 12:55
 */
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <ncurses.h>
/* to convert strings of format hh:mm:ss,m_s to seconds (ignoring milliseconds for now) */
int strToTime(char start[13], char end[13]);
int main(int argc, char **args){
    short count, shouldExit = 0;
    int duration, timeBwDialogues;
    char ch, startTimeStr[13], endTimeStr[13], prevTimeStr[13];
    FILE *fp;
    endTimeStr[0] = 0;
    /* Check if argument is present */
    if(args[1] == NULL){
        printf("No file specified!n");
        exit(1);
    }
    /* Opening file for reading */
    printf("Trying to open file "%s"n", args[1]);
    fp = fopen(args[1], "r");
    /* Checking if file was opened properly */
    if(fp == NULL){
        printf("Failed to open file "%s"!n", args[1]);
        exit(2);
    }
    /* Checking if file has contents or not */
    if((ch = getc(fp)) == EOF){
        printf("File has no contents!n");
        exit(3);
    }
    ungetc(ch, fp); /* putting ch back as file isn't empty */
    /* Section for setting up dialogue-number details
     * If I ever want to do that
     */
    /* initialising screen for nCurses */
    initscr();
    /* Here comes the actual stuff */
    while(1){
        /* As we aren't considering dialogue number, let's just skip that*/
        while((getc(fp)) != 'n');
        /* Reading dialogue's starting time :
         * It says keep reading until you find a character other that 0-9, ','
         * or ':' and store it to startTimeStr[]. Finally pass startTimeStr[]
         * to strToTime() to convert it to seconds(for now)
         */
        count = 0; /* Setting-up counter for the loop */
        while(1){
            ch = getc(fp);
            if(ch == ' ' || ch == '-')
                break;
            startTimeStr[count] = ch;
            count++;
        }
        startTimeStr[count] = ''; /* to terminate the string */
        /* Making a copy of endTimeStr[] in prevTimeStr[] to get timeBwDialogues */
        if(endTimeStr[0]){
            strcpy(prevTimeStr, endTimeStr);
            /* Calculating timeBwDialogues */
            timeBwDialogues = strToTime(prevTimeStr, startTimeStr);
        } else
            timeBwDialogues = strToTime("00:00:00,000", startTimeStr);
        /* For better readability */
        clear();
        refresh();
        /* Sleeping when there's no voice for synchronisation */
        sleep(timeBwDialogues);
        /* Placing the pointer to right position for reading ending time.
         * Using do-while to make sure at least one character is read before checking the condition
         */
        while((getc(fp)) != '>');
        if((ch = getc(fp)) == ' ');
        else
            ungetc(ch, fp);
        /* Just similar to what was done above, reading ending time */
        count = 0; /* Setting-up counter for the loop */
        while(1){
            ch = getc(fp);
            if(ch == 'n' || ch == ' ')
                break;
            endTimeStr[count] = ch;
            count++;
        }
        endTimeStr[count] = ''; /* to terminate the string */
        /* Calculating duration for individual dialogues */
        duration = strToTime(startTimeStr, endTimeStr); /* passing startTimeStr[] to strToTime */
        /* displaying the dialogue */
        while(1){
            ch = getc(fp);
            /* If ch is newline, the next character maybe EOF. So let's check */
            if(ch == EOF){
                shouldExit = 1;
                break;
            } else if(ch == '<'){
                while((ch = getc(fp)) != '>');
                continue;
            } else if(ch == 'n'){
                if((ch = getc(fp)) == EOF){
                    shouldExit  = 1;
                    break;
                } else if(ch == 'n') /* if the next character is newline, it's the end of the dialogue */
                    break;
                else{
                    ungetc(ch, fp);
                    addch('n');
                    continue;
                }
            }
            /* As the next character to ch is not EOF, dialogue still exists(a dialogue might take multiple lines)
             * and it should be put on the screen
             */
            addch(ch);
        }
        refresh();
        sleep(duration);
        if(shouldExit)
            break;
    }
    /* Closing nCurses' window */
    endwin();
    /* Closing the file */
    fclose(fp);
    return 0;
}
/* Defining the function */
int strToTime(char start[13], char end[13]){
    int hour_start, hour_end, minute_start, minute_end, second_start, second_end;
    /* Separating hh, mm and ss for starting time. As I said above, I'll ignore milliseconds */
    /* getting hour_start */
    hour_start = ((start[0] - '0')*10)+(start[1] - '0');
    /* getting minute_start */
    minute_start = ((start[3] - '0')*10)+(start[4] - '0');
    /* getting second_start */
    second_start = ((start[6] - '0')*10)+(start[7] - '0');
    /* Separating hh, mm and ss for ending time. As I said above, I'll ignore milliseconds */
    /* getting hour_end */
    hour_end = ((end[0] - '0')*10)+(end[1] - '0');
    /* getting minute_end */
    minute_end = ((end[3] - '0')*10)+(end[4] - '0');
    /* getting second_end */
    second_end = ((end[6] - '0')*10)+(end[7] - '0');
    return ( ( ( ( (hour_end - hour_start) * 60) + (minute_end - minute_start) ) * 60) + (second_end - second_start) );
}

最新更新