c-进行单词/短语替换的程序超过时间限制



很抱歉问了这个可能又长又笨的问题,但我真的被难住了。我正在为大学做一项任务。它的意思很简单。您需要实现一个函数,该函数将改变";坏的";短语到";好";。函数的输入是一个文本和一个包含好单词和坏单词的双数组(在左列中是需要替换的单词,在右列中是要插入的单词而不是坏单词)。包含坏词和好词的词典本身可以有任何大小,但最终总会有一对NULL-NULL

需要注意的是,程序不应该对已经替换的短语进行任何更改。线";终止专家";包含单词";"专家";,所以程序必须检查文本中是否有已经被替换的单词;终止专家";不会变为行";具有认证知识水平的终止人员";。支票就在这里。

程序还必须确保输入的好词和坏词词典是正确的,这意味着一个坏词不能成为另一个坏单词的开头。此检查发生在函数replaceInvalidity

有单词的文本和字典不一定是有意义的。在这个任务的上下文中,它只是一组符号,即字母、数字、符号

我写了一个程序,它通过了大部分测试,但由于某种原因,在其中一个测试中,它循环并超过了时间限制(2秒)。结果,我整个任务得了0分。

我试着用Valgrind检查记忆,但没有显示任何错误。

完整代码:

#ifndef __PROGTEST__
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <assert.h>
#endif /* __PROGTEST__ */
int replaceInvalidity(const char * (*replace)[2])
{
int size = 0;
for (int i = 0; replace[i][0] != NULL; i++)
size++;
for (int i = 0; i < size - 1; i++)
{
for (int j = i + 1; j < size; j++)
{
if (strlen(replace[i][0]) >= strlen(replace[j][0]))
{
if (strstr(replace[i][0], replace[j][0]) == replace[i][0])
return 1;
}
else
{
if (strstr(replace[j][0], replace[i][0]) == replace[j][0])
return 1;
}
}
}
return 0;
}
char *newSpeak(const char *text, const char * (*replace)[2])
{
if (replaceInvalidity(replace))
{
return NULL;
}
int i = 0, k = 0, flag= 0, Nlen = 0, Olen = 0, length = 0;
char *result = (char *)malloc(sizeof(char));
length = strlen(text);
for (i = 0, k = 0; i < length; i++, k++)
{
flag = 0;
for (int j = 0; replace[j][1] != NULL; j++)
{
if (strstr(&text[i], replace[j][1]) == &text[i])
{
Nlen = strlen(replace[j][1]);
result = (char *)realloc(result, ((k + Nlen + 1) * sizeof(char)));
for (int l = k; l < k + Nlen; l++)
result[l] = replace[j][1][l-k];
i += Nlen - 1;
k += Nlen - 1;
flag = 1;
break;
}
}
if (flag) continue;
for (int j = 0; replace[j][0] != NULL; j++)
{
if (strstr(&text[i], replace[j][0]) == &text[i])
{
Olen = strlen(replace[j][0]);
Nlen = strlen(replace[j][1]);
result = (char *)realloc(result, ((k + Nlen + 1) * sizeof(char)));
for (int l = k; l < k + Nlen; l++)
result[l] = replace[j][1][l-k];
i += Olen - 1;
k += Nlen - 1;
flag = 1;
break;
}
}
if (flag) continue;
result = (char *)realloc(result, (k + 2) * sizeof(char));
result[k] = text[i];
}
result[k] = '';
return result;
}
#ifndef __PROGTEST__
int main(int argc, char * argv[])
{
char *res;
const char * d1[][2] = {
{ "murderer", "termination specialist" },
{ "failure", "non-traditional success" },
{ "specialist", "person with certified level of knowledge" },
{ "dumb", "cerebrally challenged" },
{ "teacher", "voluntary knowledge conveyor" },
{ "evil", "nicenest deprived" },
{ "incorrect answer", "alternative answer" },
{ "student", "client" },
{ NULL, NULL }
};
const char * d2[][2] = {
{ "fail", "suboptimal result" },
{ "failure", "non-traditional success" },
{ NULL, NULL }
};
res = newSpeak("dumb termination specialist.", d1);
assert(!strcmp(res, "cerebrally challenged termination specialist."));
free(res);

res = newSpeak("The student answered an incorrect answer.", d1);
assert(!strcmp(res, "The client answered an alternative answer."));
free(res);
res = newSpeak("He was dumb, his failure was expected.", d1);
assert(!strcmp(res, "He was cerebrally challenged, his non-traditional success was expected."));
free(res);
res = newSpeak("The evil teacher became a murderer.", d1);
assert(!strcmp(res, "The nicenest deprived voluntary knowledge conveyor became a termination specialist."));
free(res);
res = newSpeak("Devil's advocate.", d1);
assert(!strcmp(res, "Dnicenest deprived's advocate."));
free(res);
res = newSpeak("Hello.", d2);
assert(!res);
return EXIT_SUCCESS;
}
#endif /* __PROGTEST__ */

在添加了缺失的include并组合了您的3个片段后,我无法重现此问题。当您将这个问题表述为性能问题时,我重新编写了您的代码,将每个1e6调用的运行时间从0.476秒减少到0.275秒。

  1. 对于给定的坏词,不按输入text的每个字符调用strstr(),而只调用一次+在text中找到给定坏词的次数。更换后继续处理text。这应该会对大量输入产生重大影响。

  2. 不要使用循环来移动字符串中的数据,而是使用经过高度优化的memmove()

  3. 而不是在替换的大小改变时为每个替换调用realloc()

  4. 删除了replaceInvalidity(),因为我认为您是在保护自己,将替换字符串作为输入的子字符串,以避免无限循环。下面的实施方式通过只在事后寻找替代品来避免这种情况。

  5. result = realloc(result, ...)在失败时会泄漏内存,因此通过释放原始字符串来处理错误,并在错误时返回NULL。CCD_ 12错误的处理类似。

  6. 问题描述与您的测试用例不匹配,所以我修改了测试用例。如果这不正确,请澄清预期行为(最多只替换一个坏单词?)。

#define _XOPEN_SOURCE 500
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *newSpeak(const char *text, const char *(*replace)[2]) {
char *result = strdup(text);
if(!result)
return NULL;
size_t result_len = strlen(result);
for(size_t i = 0; replace[i][0] && replace[i][0][0] && replace[i][1] && replace[i][1][0]; i++) {
size_t bad_len = strlen(replace[i][0]);
size_t good_len = strlen(replace[i][1]);
char *found = result;
for(;;) {
found = strstr(found, replace[i][0]);
if(!found)
break;
size_t offset = found - result;
if(bad_len < good_len) {
char *tmp = realloc(result, result_len + good_len - bad_len + 1);
if(!tmp) {
free(result);
return NULL;
}
result = tmp;
found = result + offset;
memmove(found + good_len, found + bad_len, result_len - offset - bad_len + 1);
} else if(bad_len > good_len) {
memmove(found + good_len, found + bad_len, result_len - offset - bad_len + 1);
char *tmp = realloc(result, result_len + good_len - bad_len + 1);
if(!tmp) {
free(result);
return NULL;
}
result = tmp;
found = result + offset;
}
result_len += good_len - bad_len;
memcpy(found, replace[i][1], good_len);
found += good_len;
}
}
return result;
}
int main(void) {
const char *d1[][2] = {
{ "murderer", "termination specialist" },
{ "failure", "non-traditional success" },
{ "specialist", "person with certified level of knowledge" },
{ "dumb", "cerebrally challenged" },
{ "teacher", "voluntary knowledge conveyor" },
{ "evil", "nicenest deprived" },
{ "incorrect answer", "alternative answer" },
{ "student", "client" },
{ NULL, NULL }
};
char *res = newSpeak("dumb termination specialist.", d1);
assert(!strcmp(res, "cerebrally challenged termination person with certified level of knowledge."));
free(res);
}

少数建议:

replaceInvalidity()函数中存在大量不必要的字符串遍历。像strlen()strstr()等函数,只有在真正需要的时候才使用,否则要避免使用。此外,如果字典应该以NULL结尾,那么这是不需要的:

for (int i = 0; replace[i][0] != NULL; i++) size++;

for循环条件中直接使用字典的NULL终止检查,而不是先计算size然后使用它。

程序还必须确保输入的好词和坏词词典是正确的,这意味着一个坏词不能成为另一个坏单词的开头

为此,您使用的是strstr(),它将解析整个字符串以找出子字符串,即使它们的第一个字符不匹配
如果一个坏单词不能作为另一个坏词的开头,那么只需从开始开始比较它们的字符,如果任何字符串到达结尾,则会使字典无效,否则不会。

newSpeak()函数中,您的程序逐字符迭代输入字符串text,对于每个字符,首先将查找整个字典中的好短语作为子字符串,如果找不到,则对整个字典的坏短语执行相同的活动。如果输入的短语很大,并且字典中的元素数量过多,这将需要大量的处理时间。你应该想一个更好的办法,可能是从输入文本中提取一个单词,并在字典中搜索该单词,然后根据字典中找到的全部或部分匹配项或未匹配项进行进一步处理。

你可以这样做(下面的代码只是为了演示):

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>
#define A_B_EQUAL       0
#define A_SUBSTR        1
#define B_SUBSTR        2
#define A_B_NOTEQUAL    3

int strMatch (const char * a, const char * b) {
if (!a || !b) {
return A_B_NOTEQUAL;
}
while(*a && (*a == *b)) {
a++; b++;
}
if ((*a == '') && (*b == '')) {
return A_B_EQUAL;
} else if (*a == '') {
return A_SUBSTR;
} else if (*b == '') {
return B_SUBSTR;
}
return A_B_NOTEQUAL;
}
int replaceInvalidity (const char * (*replace)[2]) {
for (int i = 0; replace[i][0] && replace[i + 1][0]; i++) {
for (int j = i + 1; replace[j][0]; j++) {
if (strMatch (replace[j][0], replace[i][0]) != A_B_NOTEQUAL) {
fprintf (stdout, "Invalid entries found in dictionary - [%s, %s]n", replace[j][0], replace[i][0]);
return 1;
}
}
}
return 0;
}
int findInDict (const char * (*replace)[2], const char * ps, int len) {
if (!replace || !ps || !len) {
fprintf (stderr, "(%s):Invalid argument...n", __func__);
return -1;
}
int index = -1;
for (int i = 0; replace[i][0] && (index == -1); i++) {
if (strncmp (replace[i][0], ps, len) == 0) {
index = i;
}
if ((index != -1) && (replace[index][0][len] != '')) {
//dictionary entry partially matched, match rest
int res = strMatch (&ps[len], &replace[index][0][len]);
if ((res != A_B_EQUAL) && (res != B_SUBSTR)) {
index = -1;
}
}
}
return index;
}
char * newSpeak (const char * text, const char * (*replace)[2]) {
if ((!text) || (replaceInvalidity(replace))) {
fprintf (stderr, "(%s):Invalid argument...n", __func__);
return NULL;
}
char * result = NULL;
int resultlen = 0;
while (*text) {
int ws_and_oc = 0;
int curr_text_len = 0;
const char * start = text;
const char * str = NULL;
while (isspace (*text) || !isalpha (*text)) {
ws_and_oc++; text++;
}
while (isalpha (*text)) {
curr_text_len++; text++;
}
int dict_index = findInDict (replace, start + ws_and_oc, curr_text_len);
if (dict_index >= 0) {
int len = strlen (replace [dict_index][0]);
// adjust the text pointer and curr_text_len when the dictionary bad word is a phrase and not just a word
text = (((text - start - ws_and_oc) == len) ? text : start + len + ws_and_oc);
curr_text_len = strlen (replace [dict_index][1]);
str = replace [dict_index][1];
} else {
str = start + ws_and_oc;
}
char * tmp;
result = realloc (tmp = result, resultlen + curr_text_len + ws_and_oc + 1);
if (result == NULL) {
fprintf (stderr, "(%s:%d):Failed to allocate memory...n", __func__, __LINE__);
free (tmp);
return NULL;
}
for (int i = 0; i < ws_and_oc; ++i) {
result[resultlen++] = start[i];
}
for (int i = 0; i < curr_text_len; ++i) {
result[resultlen++] = str[i];
}
}
result[resultlen] = '';
return result;
}
int main (void) {
char * res;
const char * d1 [][2] = {
{ "murderer", "termination specialist" },
{ "failure", "non-traditional success" },
{ "specialist", "person with certified level of knowledge" },
{ "dumb", "cerebrally challenged" },
{ "teacher", "voluntary knowledge conveyor" },
{ "evil", "nicenest deprived" },
{ "incorrect answer", "alternative answer" },
{ "student", "client" },
{ NULL, NULL }
};
res = newSpeak ("dumb termination specialist.", d1);
if (res) {
assert (!strcmp (res, "cerebrally challenged termination person with certified level of knowledge."));
free (res);
}

res = newSpeak ("The student answered an incorrect answer.", d1);
if (res) {
assert (!strcmp ( res, "The client answered an alternative answer."));
free (res);
}
res = newSpeak ("He was dumb, his failure was expected.", d1);
if (res) {
assert (!strcmp ( res, "He was cerebrally challenged, his non-traditional success was expected."));
free (res);
}
res = newSpeak ("The evil teacher became a murderer.", d1);
if (res) {
assert (!strcmp ( res, "The nicenest deprived voluntary knowledge conveyor became a termination specialist."));
free (res);
}
return 0;
}

由于缺乏清晰度,我跳过了几个测试用例-

第一个是:

res = newSpeak ( "Devil's advocate.", d1 );
assert ( ! strcmp ( res, "Dnicenest deprived's advocate." ) );
free ( res );

在这里,字典中存在的短语中的单词的子串被好短语取代。如果字典中也存在Devil呢?在这种情况下应该采取什么行为?应该寻找最佳匹配或第一匹配(即使是部分匹配也可以)
可能的是,一旦您对此有了明确的了解,就可以对findInDict()函数进行适当的更改。

第二种是:

res = newSpeak ( "Hello.", d2 );
assert ( ! res );

为什么这个测试用例期望resNULL?根据您提供的信息,res应该是Hello.,而不是NULL

相关内容

  • 没有找到相关文章

最新更新