C语言 如何使 sscanf 读取到"\0"字符



我希望 name 在''之前保留该行中的所有字符。

#include <stdio.h>
int main(){
char line[] = "1999-08-01,14.547,0.191,United Kingdom";
unsigned int year, month, day;
float temp, uncertainty;
char name[100];
sscanf(line, "%u - %u - %u, %f , %f , %s", &year, &month,
&day, &temp, &uncertainty, name);
printf("%u-%u-%u,%lf,%lf,%sn", year, month, day, temp, uncertainty, name);
}

我可以像这样做这项工作:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(){
char line[] = "1999-08-01,14.547,0.191,United Kingdom";
char* newline = malloc(strlen(line) + 2);
strcpy(newline, line);
newline[strlen(newline)] = 'n';
newline[strlen(newline)] = '';
unsigned int year, month, day;
float temp, uncertainty;
char name[100];
sscanf(line, "%u - %u - %u, %f , %f , %[^n]", &year, &month,
&day, &temp, &uncertainty, name);
printf("%u-%u-%u,%lf,%lf,%sn", year, month, day, temp, uncertainty, name);
}

但我觉得这很不优雅。

这应该有效:

char line[] = "1999-08-01,14.547,0.191,United Kingdom";
unsigned int year, month, day;
float temp, uncertainty;
char name[100];
sscanf(line, "%u - %u - %u, %f , %f , %99[^n]", &year, &month,
&day, &temp, &uncertainty, name);
printf("%u-%u-%u,%lf,%lf,%sn", year, month, day, temp, uncertainty, name);

'n'不会找到,但由于不会达到 99 的限制,sscanf将继续读取,直到字符串末尾标记。

sscanf

不是最优雅的界面,但它有很多功能。其中之一是能够找出您在输入字符串中的位置,这使您可以提取(或仅指向("输入的其余部分"。

例如,之后;

int nchar = -1;
int nfield = sscanf(line, "%u - %u - %u, %f , %f , %n", &year, &month,
&day, &temp, &uncertainty, &nchar);

nchar将包含名称字段line偏移量(除非它仍然是 -1,表示sscanf无法匹配格式字符串(。如果该字段延伸到line末尾,则可以直接使用它(line + nchar(或在检查它不太长后将其复制到不同的字符串中。

如果line,与其名称相反,包含多行,并且您希望将字符串提取为换行符,则可以使用两种%n格式,中间有一个%*[^n](星号抑制复制以避免溢出问题(:

char name[NAME_MAX + 1];
int nstart = -1, nend = -1;
int nfield = sscanf(line, "%u - %u - %u, %f , %f , %n%*[^n]%n", &year, &month,
&day, &temp, &uncertainty, &nstart, &nend);
if (nend > 0) {
if (nend - nstart <= NAME_MAX) {
memcpy(name, line + nstart, nend - nstart);
name[nend - nstart] = 0;
}
else {
/* name is too long */
}
}
else if (nstart > 0) {
/* Name was 0 bytes long. Sscanf requires that %[ match at least
* one character; if not, it fails the scan.
*/
name[0] = 0; /* Perhaps you wanted to signal an error
}
else {
/* Line didn't match format */
}

显然,当我知道缓冲区有多大时,我本可以避免使用固定长度的缓冲区,并且不需要通过动态分配缓冲区来检查溢出:

char* name = NULL;
// ...
if (nend > 0) 
name = strndup(line + nstart, nend - nstart);
// or, if you don't like strndup
//   name = malloc(nend - nstart + 1);
//   memcpy(name, line + nstart);
//   name[nend - nstart] = 0;

如果您真正想要的是一个动态分配的字符串,并且您有一个符合 Posix 的sscanf,您可以通过使用m长度修饰符来避免这种麻烦,这是全方位最简单的解决方案。

char* name = NULL;
int nfield = sscanf(line, "%u - %u - %u, %f , %f , %m[^n]", &year, &month,
&day, &temp, &uncertainty, &name);

有关详细信息,请参阅sscanf手册页。在动态分配name的所有情况下,不要忘记在完成它时释放((。

几天前,我正在阅读2003年版"Unix系统编程:通信,并发,线程"一书中的第2章,我研究了一个例子,该示例导致将字符串分解为带有自定义分隔符的标记(可以是,或_或空格或其他(。它使用了 strtok(( C 库函数。这是以某种方式满足您的需求的示例。我会给 2 个文件:

makeargv.c

#include <errno.h>
#include <stdlib.h>
#include <string.h>
/* frees all the memory that was allocated by makeargv */
void freemakeargv(char **argv)
{
if (argv == NULL)
return;
if (*argv != NULL)
free(*argv);
free(argv);
}
/* Now the function that breaks string s into tokens */ 
int makeargv(const char *s, const char *delimiters, char ***argvp) 
{
int error;  int i;
int numtokens;
const char *snew;
char *t;
if ((s == NULL) || (delimiters == NULL) || (argvp == NULL)) 
{ errno = EINVAL; return -1; }
*argvp = NULL; /* so that a failed call to malloc,will leave it NULL */
/* now we consume any initial delimiters characters of input s */
snew = s + strspn(s, delimiters);   /* snew is real start of string */
if ((t = malloc(strlen(snew) + 1)) == NULL)
return -1;
strcpy(t, snew);
numtokens = 0;
if (strtok(t, delimiters) != NULL)  /* count number of tokens in s */
for (numtokens = 1; strtok(NULL, delimiters) != NULL; numtokens++) ;
/* next,create argument array for ptrs to the tokens */
if ((*argvp = malloc((numtokens + 1)*sizeof(char *))) == NULL) 
{
error = errno;
free(t);
errno = error;
return -1;
}
/* now insert pointers-to-tokens into the argument array */
if (numtokens == 0)  free(t);
else 
{
strcpy(t, snew);
**argvp = strtok(t, delimiters);
for (i = 1; i < numtokens; i++)
*((*argvp) + i) = strtok(NULL, delimiters);
}
*((*argvp) + numtokens) = NULL;      /* append final NULL pointer */
return numtokens;
}

主.c

#include <stdio.h>
#include <stdlib.h>
int makeargv(const char *s, const char *delimiters, char ***argvp);
void freemakeargv(char **argv);
int main()
{
char delim[] = ",";
int i, numtokens;
char **myargv;  /* memory will be allocated dynamicaly, and has to bee freed before exit */
char line[] = "1999-08-01,14.547,0.191,United Kingdom-UK";
if ((numtokens = makeargv(line, delim, &myargv)) == -1) /* nakeargv() allocates memory for myargv, it may fail */
{
fprintf(stderr, "Failed to construct an argument array for %sn", line);
return 1;
}
printf(" The argument array contains:n");
for (i = 0; i < numtokens; i++)
printf("%d:%sn", i, myargv[i]);
freemakeargv(myargv);    /* do not forget to free the memory! */
return 0;
}

gcc -Wall -std=c99 -o tokenizer main.c makeargv.c

并运行它

./tokenizer

@rici好方法的变体:

如何使 sscanf 读取到"\0"字符

使用"%n"来应对"其余部分"。
"%n"记录扫描到该点的偏移量,如果扫描到目前为止。
%*[^n]扫描,但不会根据OP的"剩余行"保存所有字符

,最多保存'n'

使用它来分配结束字符串。

// Some untested code
typedef struct {
unsigned int year, month, day;
float temp, uncertainty;
char *name;
} data_T;
// return 0 on success
int foo(data_T *dest, const char *line) {
int start = 0;
int end = 0;
memset(dest, 0, sizeof *dest); // zero `dest`
sscanf(line, "%u - %u - %u, %f , %f , %n%*[^n]%n", 
&dest->year, &dest->month, &dest->day, &dest->temp, &dest->uncertainty, 
&start, &end);
if (start == 0) {
// line did nor scan properly, return error
return 1;
}
if (end == 0) {
end = start; // there was no non-white-space text after the `,`
}
size_t len = end - start;   
dest->name = malloc(len + 1u);
if (dest->name == NULL) {
// Out of memory
return 1;
}
memcpy(dest->name, line + start, len);
dest->name[len] = '';
printf("%u-%u-%u,%f,%f, %sn", 
dest->year, dest->month, dest->day, dest->temp, dest->uncertainty, dest->name);
return 0;  // be sure to free dest->name when done with it.
}

最新更新