出乎意料的字符被添加到C中的字符串末端



在我的代码中,我有一个随机字符,当我通过函数发送char数组时出现,例如:

struct TokenizerT_ {        //Defintion of the struct
char * sep;
char * toks;
};
TokenizerT *TKCreate(char *separators, char *ts) {
TokenizerT * inu = malloc(sizeof(*inu));
inu->toks = malloc(sizeof(char)); //Initialize char array that will store the tokens
strcpy(inu->toks, hr);      
return inu;
}
....... 
best = "sein";
printf("%sn", best);
char * rondo = malloc(sizeof(char));                       
printf("%sn", rondo);
TokenizerT * Ray = TKCreate(copy, rondo);                          /
printf("%sn", Ray->toks);

在最后位,打印的值如下:

sein
sein
sein?

为什么出现问号?这通常是一个随机的角色,并不总是问标记。

  Edit: Full code, really desperate

 struct TokenizerT_ {        //Defintion of the struct
char * sep;
char * toks;
 };
 char nulines[10] = "ntvbrfa\"";           //for the arguments with backslashes
 char resp[37] = "0x0a0x090x0b0x080x0d0x0c0x070x5c0x22";
 typedef struct TokenizerT_ TokenizerT;

  TokenizerT *TKCreate(char *separators, char *ts) {
if (ts==NULL) {                 //If there are no tokens to be parsed (empty entry)
    return NULL;
}int lim = 1;
char yr[strlen(separators)]; //Initializes delimitors
yr[0] = *separators;
if(strlen(separators)>0){
int h =1;                          
char zmp = *(separators+h);
for(h=1; h<strlen(separators); h++){
    zmp = *(separators+h);
    int z=0;
    for (z=0; z<lim; z++) {
        if (zmp==yr[z]) {
            z=-1;
            break;
        }
    }
    if(z>-1){
        yr[lim] = zmp;
        lim++;}
    else{
        continue;
    }                                   //yr is local variable that contains delimitors
}}
TokenizerT * inu = malloc(sizeof(*inu));    //Creates TokenizerT
inu->sep = malloc((int)strlen(yr)*sizeof(char)); 
strcpy(inu->sep, yr);              

char hr [strlen(ts)];                       
lim = 0; int q = 0; int wy=0;
for(q=0; q<strlen(ts); q++){
    if(ts[q]=='\'){
        q++;
        for(wy = 0; wy<strlen(nulines); wy++){
            if (nulines[wy]==ts[q]) {
     hr[lim] = '['; hr[++lim] = '0'; hr[++lim] = 'x'; hr[++lim] = resp[wy*4+2];
     hr[++lim] = resp[wy*4+3];
                hr[++lim] = ']'; lim++;
                break;
            }
        }
        continue;
    }
    else{                               
        hr[lim] = ts[q];
        lim++;
    }
}

inu->toks = (char *)malloc(sizeof(char) * strlen(hr) + 1);
strcpy(inu->toks, hr);      //Makes copy
return inu;
 }

void TKDestroy(TokenizerT *tk) {
free(tk->toks); //Free Memory associated with the token char array
free(tk->sep);  //Free Memory associated with the delimitor char array
free(tk); //Free Memory associated with the tokenizer
}

 char *TKGetNextToken(TokenizerT *tk) {
char * stream = tk->toks;
char * dels = tk->sep;
/*The following two  lines intialize the char array to be printed
 as well as the integers to be used in the various loops*/
char * temps = malloc(sizeof(char)); int g = 0;
int z = 0, x= 0, len = 0;
if (strlen(dels)==0) {          
    return stream;
}

for(z = 0; z<strlen(stream); z++){
    char b = *(stream+z);           
    for(x = 0; x<strlen(dels); x++){ 
        len = (int)strlen(temps); 
        char c = *(dels+x);
        if(c==b){   //Here, the current character is a delimitor
            g = -1;
            break;
        }
    }
    if (g==-1) {    //If delimitor, then return the current token
        return temps;
    }
        *(temps+len) = b;   
}
len = (int)strlen(temps);
*(temps+len) = '';    //Returns the string with the null character ending it
return temps;
 }

void TKN(TokenizerT * tin, int sum){
char * tmp = TKGetNextToken(tin);      
char * copy = malloc(sizeof(char));
   strcpy(copy, tin->sep);                 
   int difference = (int)strlen(tmp)+1;
   sum = sum-difference;
  char * best = malloc(sizeof(char));
  strcpy(best, tin->toks + difference);   

    if((int)strlen(tmp)>0){              
   printf("%sn", tmp);           
  }                                 
  TKDestroy(tin);
tin = TKCreate(copy, best);
while(sum>0){
    tmp = TKGetNextToken(tin);
    if((int)strlen(tmp)>0){                
        printf("%sn", tmp);
    }
    difference = (int)strlen(tmp)+1;
    sum = sum-difference;
    free(best);
    best = malloc(sizeof(char));
    strcpy(best, tin->toks + difference);
       TKDestroy(tin);
       tin = TKCreate(copy, best);
 }
free(copy);
free(best);
free(tmp);
  TKDestroy(tin); //Freeing up memory associated with the Tokenizer
  return;
}
int main(int argc, char **argv) {
if(argc<2){
    printf("%sn", "Not enough arguments");
    return 0;
}
else if(argc>3){
    printf("%sn", "Too many arguments");
    return 0;
}
 else{
char * arr = argv[1];   //Represents delimitors
char * y = argv[2];       //Represents string to be tokenized
TokenizerT * jer = TKCreate(arr, y);    //Create and initialize tokenizer
 //printf("%sn", jer->toks);
  TKN(jer, (int)strlen(jer->toks)); 
 }
return 0;
 }

在大多数malloc中,您不仅为一个字符分配:

malloc(sizeof(char))

虽然您应该写:

malloc(sizeof(char) * n + 1)

其中 n是您想要的字符串长度,而 1则用于终止null字符。您看到的是随机字符,因为C和C 都使用null字符作为字符串数据类型的终止,并且通过不正确分配,它开始阅读,直到到达null

struct TokenizerT_ {        //Defintion of the struct
    char * sep;
    char * toks;
};
char nulines[10] = "ntvbrfa\"";           //for the arguments with backslashes
char resp[37] = "0x0a0x090x0b0x080x0d0x0c0x070x5c0x22";
typedef struct TokenizerT_ TokenizerT;

TokenizerT *TKCreate(char *separators, char *ts) {
    if (ts==NULL) {                 //If there are no tokens to be parsed (empty entry)
        return NULL;
    }int lim = 1;
    char yr[strlen(separators)]; //Initializes delimitors
    yr[0] = *separators;
    if(strlen(separators)>0){
        int h =1;
        char zmp = *(separators+h);
        for(h=1; h<strlen(separators); h++){
            zmp = *(separators+h);
            int z=0;
            for (z=0; z<lim; z++) {
                if (zmp==yr[z]) {
                    z=-1;
                    break;
                }
            }
            if(z>-1){
                yr[lim] = zmp;
                lim++;}
            else{
                continue;
            }                                   //yr is local variable that contains delimitors
        }}
    TokenizerT * inu = (TokenizerT *)malloc(sizeof(*inu));    //Creates TokenizerT
    inu->sep = (char *)malloc((int)strlen(yr)*sizeof(char));
    strcpy(inu->sep, yr);

    char hr [strlen(ts)];
    lim = 0; int q = 0; int wy=0;
    for(q=0; q<strlen(ts); q++){
        if(ts[q]=='\'){
            q++;
            for(wy = 0; wy<strlen(nulines); wy++){
                if (nulines[wy]==ts[q]) {
                    hr[lim] = '['; hr[++lim] = '0'; hr[++lim] = 'x'; hr[++lim] = resp[wy*4+2];
                    hr[++lim] = resp[wy*4+3];
                    hr[++lim] = ']'; lim++;
                    break;
                }
            }
            continue;
        }
        else{
            hr[lim] = ts[q];
            lim++;
        }
    }

    inu->toks = (char *)malloc(sizeof(char) * strlen(hr) + 1);
    strcpy(inu->toks, hr);      //Makes copy
    return inu;
}

void TKDestroy(TokenizerT *tk) {
    free(tk->toks); //Free Memory associated with the token char array
    free(tk->sep);  //Free Memory associated with the delimitor char array
    free(tk); //Free Memory associated with the tokenizer
}

char *TKGetNextToken(TokenizerT *tk) {
    char * stream = tk->toks;
    char * dels = tk->sep;
    /*The following two  lines intialize the char array to be printed
     as well as the integers to be used in the various loops*/
    char * temps = (char *)malloc(sizeof(char)); int g = 0;
    int z = 0, x= 0, len = 0;
    if (strlen(dels)==0) {
        return stream;
    }

    for(z = 0; z<strlen(stream); z++){
        char b = *(stream+z);
        for(x = 0; x<strlen(dels); x++){
            len = (int)strlen(temps);
            char c = *(dels+x);
            if(c==b){   //Here, the current character is a delimitor
                g = -1;
                break;
            }
        }
        if (g==-1) {    //If delimitor, then return the current token
            return temps;
        }
        *(temps+len) = b;
    }
    len = (int)strlen(temps);
    *(temps+len) = '';    //Returns the string with the null character ending it
    return temps;
}

void TKN(TokenizerT * tin, int sum){
    char * tmp = TKGetNextToken(tin);
    char * copy = (char *)malloc(sizeof(char));
    strcpy(copy, tin->sep);
    int difference = (int)strlen(tmp)+1;
    sum = sum-difference;
    char * best = (char *)malloc(sizeof(char));
    strcpy(best, tin->toks + difference);

    if((int)strlen(tmp)>0){
        printf("%sn", tmp);
    }
    TKDestroy(tin);
    tin = TKCreate(copy, best);
    while(sum>0){
        tmp = TKGetNextToken(tin);
        if((int)strlen(tmp)>0){
            printf("%sn", tmp);
        }
        difference = (int)strlen(tmp)+1;
        sum = sum-difference;
        free(best);
        best = (char *)malloc(sizeof(char));
        strcpy(best, tin->toks + difference);
        TKDestroy(tin);
        tin = TKCreate(copy, best);
    }
    free(copy);
    free(best);
    free(tmp);
    TKDestroy(tin); //Freeing up memory associated with the Tokenizer
    return;
}
int main(int argc, char **argv) {
    if(argc<2){
        printf("%sn", "Not enough arguments");
        return 0;
    }
    else if(argc>3){
        printf("%sn", "Too many arguments");
        return 0;
    }
    else{
        char * arr = argv[1];   //Represents delimitors
        char * y = argv[2];       //Represents string to be tokenized
        TokenizerT * jer = TKCreate(arr, y);    //Create and initialize tokenizer
                                                //printf("%sn", jer->toks);
        TKN(jer, (int)strlen(jer->toks));
    }
    return 0;
}
char * rondo = malloc(sizeof(char));                       
printf("%sn", rondo);

是UB(未定义的行为)条件。
这就是您正在做的:

免费商店(堆) ->分配大小char(通常为1个字节)的内存,并获取该位置的地址并将其存储在rondo中。
因此,当您解除rondo即*rondo时,您可以合法地访问char大小的位置,访问其旁边或附近的任何东西都是非法的。

因此,在printf("%sn", rondo);中,您要做的是告诉printf,您给出的指针是一个指针指向字符串,然后打印直到获得(null)字符。但是您实际上没有这样做。这意味着printf实际上正在访问未分配的内存。您看到的是出于纯粹的运气(或者不幸)。

您只能做这个

printf("%cn", *rondo);,但即使在此之前,您也必须初始化e.g

char * rondo  = malloc(sizeof(char));
*rondo = 'K';
printf("%cn",*rondo);

,但我敢打赌,您意味着您的意思是

char * rondo = malloc(sizeof(char)*no_of_characters_in_string+1);  

其中 1是为空字符。

您看到的角色与您的程序无关。您访问了别人的内存(如果分配给其他人或OS的属性)。

编辑:您的代码也有一个巨大的问题。您正在杂乱无章的内存,但永远不会释放它。对于小型演示程序而言,还可以(不是真的),但绝对是非常糟糕的。请始终将mallocfree();

关联

我的建议得到一本好的教科书。它将在有关这些事情的更多详细信息中告诉您。

最新更新