c语言 - 需要帮助按分隔符拆分字符串(并将分隔符保留在标记列表中)



我想用分隔符拆分字符串并将分隔符保留在标记列表中

我有一个与 strtok 做同样的事情的函数,但有一个字符串分隔符(而不是一组字符),但它不保留分隔符,也不能将分隔符数组作为参数

这是一个函数,它像strtok一样将字符串拆分为标记,但采用分隔符

static char *strtokstr(char *str, char *delimiter)
{
static char *string;
char *end;
char *ret;
if (str != NULL)
string = str;
if (string == NULL)
return string;
end = strstr(string, delimiter);
if (end == NULL) {
char *ret = string;
string = NULL;
return ret;
}
ret = string;
*end = '';
string = end + strlen(delimiter);
return ret;
}

我想要一个char **split(char *str, char **delimiters_list),通过一组分隔符拆分字符串并将分隔符保留在标记列表中

我想我还需要一个函数来计算代币的数量,以便我可以malloc我的split函数的返回

delimiters是一个包含["&&", "||" and NULL]的数组split("ls > file&&foo || bar", delimiters)应返回一个包含["ls > file", "&&", "foo ", "||", " bar"]的数组

如何实现?

首先,您在这里遇到内存错误:

static char *string;
if (str != NULL)
string = str;
if (string == NULL)
return string;

如果str为 NULL,则字符串未初始化,并且在比较中使用未初始化的值。

如果要复制字符串,则必须使用strdup函数,=将只复制指针而不是指针内容。


这里有一种方法可以做到这一点:

#include <stdlib.h>
#include <string.h>
char *get_delimiters(char *str, char **delims)
{
for (int i = 0; delims[i]; i++)
if (!strncmp(str, delims[i], strlen(delims[i])))
return delims[i];
return NULL;
}
char **split(char *str, char **delimiters)
{
char *string = strdup(str);
char **result = NULL;
int n = 0;
char *delim = NULL;
for (int i = 0; string[i]; i++)
if (get_delimiters(string + i, delimiters))
n++;
result = malloc((n * 2 + 2) * sizeof(char *));
if (!result)
return NULL;
result[0] = string;
n = 1;
for (int i = 0; string[i]; i++) {
delim = get_delimiters(string + i, delimiters);
if (delim) {
string[i] = '';
result[n++] = delim;
result[n++] = string + i + strlen(delim);
}
}
result[n] = NULL;
return result;
}

结果:

[0] 'ls> file'[1] '&&'[2] '



foo '
[3] '||'
[4] '酒吧'

请记住,resultstring定位错误,因此您必须释放resultresult[0]

#include <string.h>
#include <stdio.h>
#include <stdlib.h>

char **split(char *str, char **delimiters, int number_of_delimiters, int *number_of_rows_in_return_array);
int main()
{
char **split_str;
char *delimiters[] = {
"&&",
"||"
};

int rows_in_returned_array;
split_str = split("ls > file&&foo || bar && abc ||pqwe", delimiters, 2 , &rows_in_returned_array);
int i;
for (i = 0 ; i < rows_in_returned_array  ; ++i)
{
printf("n%sn", split_str[i]);
}

return 0;
}

char **split(char *str, char **delimiters, int number_of_delimiters, int *number_of_rows_in_return_array)
{
//temporary storage for array to be returned
char temp_store[100][200];
int row = 0;//row size of array that will be returned
char **split_str;
int i, j, k, l, mark = 0;
char temp[100];
for (i = 0 ; str[i] != '' ; ++i)
{
//Iterating through all delimiters to check if any is str
for (j = 0 ; j < number_of_delimiters ; ++j )
{
l = i;
for (k = 0 ; delimiters[j][k] != '' ; ++k)
{
if (str[i] != delimiters[j][k])
{
break;
}
++l;
}
//This means delimiter is in string
if (delimiters[j][k] == '')
{
//store the string before delimiter
strcpy(temp_store[row], &str[mark]);
temp_store[row ++][i - mark] = '';
//store string after delimiter
strcpy(temp_store[row], &str[i]);
temp_store[row ++][k] = '';
//mark index where this delimiter ended
mark = l;
//Set i to where delimiter ends and break so that outermost loop
//can iterate from where delimiter ends
i = l - 1;
break;
}
}
}
//store the string remaining
strcpy(temp_store[row++], &str[mark]);
//Allocate the split_str and store temp_store into it
split_str = (char **)malloc(row * sizeof(char *));
for (i=0 ; i < row; i++)
{
split_str[i] = (char *)malloc(200 * sizeof(char));
strcpy(split_str[i], temp_store[i]);
}
*number_of_rows_in_return_array = row;
return split_str;
}

这可能应该有效。请注意,我已经通过 ref 传递了int * number_of_rows_in_return_array,因为我们需要知道重新调整的数组的行大小。

我进入了抽象。首先,我创建了一个"句子"库,它允许操作 NULL 终止的字符串列表 (char*)。我写了一些初始访问器(sentence_initsentence_sizesentence_freesentence_add_str等)。

然后我去了split,女巫变得非常非常容易 - 如果找到一个分度计,将字符串添加到句子中,然后将分度计添加到句子中。然后递增字符串指针位置。如果未找到分度,请将剩余的字符串添加到句子中。

双指针有一个真正的问题,因为char **不能隐式转换为const char **。对于生产代码,我可能的目标是重构代码,并尝试考虑const正确性。

#define _GNU_SOURCE 1
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <stdbool.h>
/*
* sentence - list of words
*/
/* ----------------------------------------------------------- */
// if this would be production code, I think I would go with a
// struct word_t { char *word; }; struct sentence_t { struct word_t *words; };
// Note: when sentence_add_* fail - they free *EVERYTHING*, so it doesn't work like realloc
// shared_ptr? Never heard of it.
char **sentence_init(void) {
return NULL;
}
size_t sentence_size(char * const *t) {
if (t == NULL) return 0;
size_t i;
for (i = 0; t[i] != NULL; ++i) {
continue;
}
return i;
}
void sentence_free(char * const *t) {
if (t == NULL) return;
for (char * const *i = t; *i != NULL; ++i) {
free(*i);
}
free((void*)t);
}
void sentence_printex(char * const *t, const char *fmt1, const char *delim, const char *end) {
for (char * const *i = t; *i != NULL; ++i) {
printf(fmt1, *i);
if (*(i + 1) != NULL) {
printf(delim);
}
}
printf(end);
}
void sentence_print(char * const *t) {
sentence_printex(t, "%s", " ", "n");
}
void sentence_print_quote_words(char * const *t) {
sentence_printex(t, "'%s'", " ", "n");
}
bool sentence_cmp_const(const char * const *t, const char * const *other) {
const char * const *t_i = t;
const char * const *o_i = other;
while (*t_i != NULL && o_i != NULL) {
if (strcmp(*t_i, *o_i) != 0) {
return false;
}
++t_i;
++o_i;
}
return *t_i == NULL && *o_i == NULL;
}
// thet's always funny, because "dupa" in my language means "as*"
char **sentence_add_strdupped(char **t, char *strdupped) {
const size_t n = sentence_size(t);
const size_t add = 1 + 1;
const size_t new_n = n + add;
void * const pnt = realloc(t,  new_n * sizeof(char*));
if (pnt == NULL) goto REALLOC_FAIL;
// we have to have place for terminating NULL pointer
assert(new_n >= 2);
t = pnt;
t[new_n - 2] = strdupped;
t[new_n - 1] = NULL;
// ownership of str goes to t
return t;
// ownership of str stays in the caller
REALLOC_FAIL:
sentence_free(t);
return NULL;
}
char **sentence_add_strlened(char **t, const char *str, size_t len) {
char *strdupped = malloc(len + 1);
if (strdupped == NULL) goto MALLOC_FAIL;
memcpy(strdupped, str, len);
strdupped[len] = '';
t = sentence_add_strdupped(t, strdupped);
if (t == NULL) goto SENTENCE_ADD_STRDUPPED_FAIL;
return t;
SENTENCE_ADD_STRDUPPED_FAIL:
free(strdupped);
MALLOC_FAIL:
sentence_free(t);
return NULL;
}
char **sentence_add_str(char **t, const char *str) {
const size_t str_len = strlen(str);
return sentence_add_strlened(t, str, str_len);
}
/* ----------------------------------------------------------- */
/**
* Puff. Run strstr for each of the elements inside NULL delimeters dellist.
* If any returns not NULL, return the pointer as returned by strstr
* And fill dellist_found with the pointer inside dellist (can be NULL).
* Finally! A 3 star award is mine!
*/
char *str_find_any_strings(const char *str,
const char * const *dellist,
const char * const * *dellist_found) {
assert(str != NULL);
assert(dellist != NULL);
for (const char * const *i = &dellist[0]; *i != NULL; ++i) {
const char *found = strstr(str, *i);
if (found != NULL) {
if (dellist_found != NULL) {
*dellist_found = i;
}
// __UNCONST(found)
return (char*)found;
}
}
return NULL;
}
/**
* Split the string str according to the list od delimeters dellist
* @param str
* @param dellist
* @return returns a dictionary
*/
char **split(const char *str, const char * const *dellist) {
assert(str != NULL);
assert(dellist != NULL);
char **sen = sentence_init();
while (*str != '') {
const char * const *del_pnt = NULL;
const char *found = str_find_any_strings(str, dellist, &del_pnt);
if (found == NULL) {
// we don't want an empty string to be the last...
if (*str != '') {
sen = sentence_add_str(sen, str);
if (sen == NULL) return NULL;
}
break;
}
// Puff, so a delimeter is found at &str[found - str]
const size_t idx = found - str;
sen = sentence_add_strlened(sen, str, idx);
if (sen == NULL) return NULL;
assert(del_pnt != NULL);
const char *del = *del_pnt;
assert(del != NULL);
assert(*del != '');
const size_t del_len = strlen(del);
sen = sentence_add_strlened(sen, del, del_len);
if (sen == NULL) return NULL;
str += idx + del_len;
}
return sen;
}
int main()
{
char **sentence = split("ls > file&&foo || bar", (const char*[]){"&&", "||", NULL});
assert(sentence != NULL);
sentence_print_quote_words(sentence);
printf("cmp = %dn", sentence_cmp_const((void*)sentence, (const char*[]){"ls > file", "&&", "foo ", "||", " bar", NULL}));
sentence_free(sentence);
return 0;
}

程序将输出:

'ls > file' '&&' 'foo ' '||' ' bar'
cmp = 1

最新更新