C语言 寻找strtok的解决方案



我必须制作一个可以处理多个' | '操作符的C shell解释器。所以如果我这样写:cat test.txt | sort | uniq -c | sort -nr,它能正常工作。当我尝试使用更复杂的函数时,问题就来了,例如:cat test.txt | awk '/" 404 / {print%7}' | sort | uniq -c | sort -nr | head。当'awk'参数被strtok分隔时,它会中断。

有效的代码:

#define TRUE 1
#define FALSE 0
#define BUF_SIZE 1024
#define ROW_SIZE 64
#define MIN 100000
void failed_allocation(){
fprintf(stderr, "Faild to allocate memory.");
exit(EXIT_FAILURE);
}
char* read_line(){
int buf_size = BUF_SIZE;
int pos = 0;
char* buffer = malloc(sizeof(char) * buf_size);
int c; // using int because EOF is -1
if(buffer == NULL)
failed_allocation();
// read char by char
while(TRUE){
c = getchar();
// look for EOF or end of line
if(c == EOF || c == 'n'){
buffer[pos] = '';
return buffer;
}
else{
buffer[pos] = c;
}
pos++;
// if buffer max size is reached, then extend buffer
if(pos >= buf_size){
buf_size += BUF_SIZE;
buffer = realloc(buffer, buf_size);
if(buffer == NULL)
failed_allocation();
}
}
}
char** split_to_lines(char* str, char* delim){
int buf_size = ROW_SIZE;
int pos = 0;
char* buffer;
char** buffer_list = malloc(buf_size * sizeof(char*));
if(buffer_list == NULL)
failed_allocation();
// split into list
buffer = strtok(str, delim);
while(buffer != NULL){
buffer_list[pos] = buffer;
pos++;
// if buffer max size is reached, then extend buffer
if(pos >= buf_size){
buf_size += ROW_SIZE;
buffer_list = realloc(buffer_list, buf_size * sizeof(char*));
if(buffer_list != NULL)
failed_allocation();
}
buffer = strtok(NULL, delim); // continue reading str
}
buffer_list[pos] = NULL; // end list
return buffer_list;
}
int start_proc(char** args){
int fd[2];
int prev_fd = STDIN_FILENO;
int i;
char** list = NULL;

for(i = 0; args[i + 1] != NULL; ++i){
if(pipe(fd) == -1){
perror("Pipe error: ");
return 1;
}

int pid = fork();
if(pid < 0){
perror("Fork error:");
return 1;
}
else if(pid == 0){
if(prev_fd != STDIN_FILENO){
dup2(prev_fd, STDIN_FILENO);
close(prev_fd);
}

dup2(fd[1], STDOUT_FILENO);
close(fd[1]);

list = split_to_lines(args[i], " trn");
execvp(list[0], list);
perror("Execvp error:");
exit(EXIT_FAILURE);
}

close(prev_fd);
close(fd[1]);
prev_fd = fd[0];
free(list);
}

if(prev_fd != STDIN_FILENO){
dup2(prev_fd, STDIN_FILENO);
close(prev_fd);
}

list = split_to_lines(args[i], " trn");
execvp(list[0], list);

perror("Execvp error:");
}

int main(){
int flag = TRUE;
while(flag == TRUE){
// input
printf("n> ");
char* input = read_line();
char** list = NULL;
// exit condition
if(strcmp(input, "exit") == 0)
flag = FALSE;
if(flag == TRUE){
list = split_to_lines(input, "|");
start_proc(list);
}
// free memory
free(input);
free(list);
}
return 0;
}

我尝试实现我自己的方式来分离字符串,但没有白费,因为当试图执行代码时,它会随机创建空格字符串,然后尝试执行它们,导致execvp错误。

这是我尝试的实现:

int get_length(char* str){
int counter = 0;
for(int i = 0; str[i] != ''; ++i){
++counter;
}
return counter;
}
int find_in_string(char* str, char look_for, int from){
int length = get_length(str);
if(from > length)
return -1;

for(int i = from; i < length; ++i){
if(str[i] == look_for){
return i;
}
}
return -1;
}
char* substr(char* str, int begin, int end){
int length = get_length(str);
if(end > length || begin > length){
fprintf(stderr, "Substr error: invalid interval values.");
exit(EXIT_FAILURE);
}

if(end < 0)
end = length;
else if(begin < 0)
begin = 0;

char* buffer = malloc((end - begin) * sizeof(char));
int pos = 0;
for(int i = begin; i <= end; ++i){
buffer[pos] = str[i];
pos++;
}
buffer[pos] = '';
return buffer;
}
char** test_split_to_lines(char* str, char* delim){
char* buffer;
char** buffer_list = malloc(ROW_SIZE * sizeof(char*));
int pos = 0;

int cursor_pos = 0;
int cursor_delim = 0;
int length = get_length(str);

loop:
int delim_pos = MIN;
for(int i = 0; delim[i] != ''; ++i){
int temp = find_in_string(str, delim[i], cursor_delim);
if((temp < delim_pos && temp > 0) || (temp < 0 && delim_pos == MIN) || (delim_pos < 0 && temp > -1))
delim_pos = temp;
}

if(delim_pos == -1){
buffer = substr(str, cursor_pos, -1);
if(get_length(buffer) != 0){
buffer_list[pos] = buffer;
pos++;
}
buffer_list[pos] = NULL;
return buffer_list;
}

int q_begin = find_in_string(str, 39, cursor_pos);
int q_end = find_in_string(str, 39, q_begin + 1);

if(delim_pos < q_begin || delim_pos > q_end){
buffer = substr(str, cursor_pos, delim_pos - 1);
buffer_list[pos] = buffer;
pos++;

cursor_pos = delim_pos + 1;
cursor_delim = cursor_pos;
}
else{
cursor_delim = q_end;
}
goto loop;
}

所以,基本上我需要帮助写一个函数,正确地分隔字符串。

如果你想把一些东西分割成单独的字符串(它使用原始字符串,破坏它。)如果argv == NULL,它只计算字符串的数量

size_t splitstring(char **argv, size_t argvsize, char *str, const char *delim)
{
size_t pos = 0;
if(str && *str)
{
if(argv)
{
memset(argv, 0, argvsize * sizeof(*argv));
argv[pos] = str;
}
pos += 1;
while(*str)
{
if(strchr(delim, *str))
{
*str++ = 0;
if(argv)
{
argv[pos] = str;
if(pos >= argvsize) break;
}
pos++;
}
else
{
str++;
}
}
}
return pos;
}

我提前道歉;这可能不能回答你的问题,但我想演示一种新技术。您可以使用fmemopengetdelim,而不是strtok,它有它的问题,并使用open_memstream创建一个数组来存储自动增长到足够大的令牌。后一个技巧避免了对realloc的容易出错的调用。这也避免了必须写入原始字符串;虽然我们没有将它声明为const,但它可以是。

请注意,只有在假设'|'不会作为另一个多字节字符的一部分出现的情况下,像这样尝试标记字符串是安全的(无论您使用此方法还是strtok)。我不认为ISO C保证这一点,但如果区域设置的编码使用UTF-8,那么你是安全的。

#define _POSIX_C_SOURCE 200809L
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
int main(void) {
if(!setlocale(LC_ALL, "")) {
fputs("Failed to enable default localen", stderr);
exit(EXIT_FAILURE);
}
char command[] = "cat test.txt | awk '/" 404 / {print%7}' | sort | uniq -c | sort -nr | head";
union {
/* ISO C guarantees that the representations of char * and void * are
* the same, so type-punning with a union is the same as doing a cast. */
char *arrbytes;
void *untyped;
} outarray;
size_t outarraybytecount;
FILE *const outstream = open_memstream(&outarray.arrbytes, &outarraybytecount);
if(!outstream) {
perror("Failed to create memory stream");
exit(EXIT_FAILURE);
}
FILE *const readstream = fmemopen(command, sizeof(command) - 1, "r");
if(!readstream) {
perror("Failed to create memory stream");
goto endoutstream;
}
do {
char *tok = NULL;
ssize_t k = getdelim(&tok, &(size_t){0}, '|', readstream);
if(k == -1) {
free(tok);
if(ferror(readstream)) {
perror("Failed to read from memory stream");
goto endreadstream;
}
} else {
if(tok[k-1] == '|') {
/* You can leave the delimiter in if you want. */
tok[k-1] = '';
}
/* This is a subtle trick: we're writing the contents of our pointer
* variable to the stream. */
if(!fwrite(&tok, sizeof(tok), 1, outstream)) {
perror("Failed to write to memory stream");
goto endreadstream;
}
}
} while(!feof(readstream));
/* Now we're done. */
if(fclose(readstream) == EOF) {
perror("Failed to close memory stream");
goto endoutstream;
}
if(fclose(outstream) == EOF) {
perror("Failed to close memory stream");
exit(EXIT_FAILURE);
}
char **array = outarray.untyped;
size_t arraylen = outarraybytecount/sizeof(*array);
for(size_t i = 0; i < arraylen; i++) {
puts(array[i]);
free(array[i]);
}
free(array);
exit(EXIT_SUCCESS);
endreadstream:
if(fclose(readstream) == EOF) {
perror("Failed to close memory stream");
}
endoutstream:
if(fclose(outstream) == EOF) {
perror("Failed to close memory stream");
} else {
char **array = outarray.untyped;
for(size_t i = 0; i < outarraybytecount/sizeof(char*); i++) {
free(array[i]);
}
free(array);
}
exit(EXIT_FAILURE);
}

最新更新