我有一个字符串,内容如下(UTF-8(:
__$FOO ${FOO} ${FOO:def} ${FOO2:-тест}
以及具有值CCD_ 2的环境变量CCD_。我的C应用程序应该像GNUenvsubs
一样工作——用test
替换所有$FOO
或${FOO}
条目——没有什么复杂的。预期结果:
__test test test тест
但是。。。如何仅使用C来完成此操作?我不能使用exec
或外部(动态(库之类的东西(我的应用程序是静态链接的,以便在docker scratch中使用(。
我从gettext中了解envsubst,但它至少不支持默认值。
我在Go-stephenc/envsub和Rust-stephenc/envsub中找到了具有所有必需功能的库,但也许有人知道我如何在C中做到这一点?我不想发明一些可能已经发明的东西。
static char *envsubst(char *str) {
// magic
}
由于找不到答案,我决定编写自己的解析器。它的功能比我想要的要少,但这对我的情况来说已经足够了:
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include "envsubst.h"
struct buffer {
char *data;
unsigned int len, cap;
};
static struct buffer *newBuf(unsigned int cap) {
struct buffer *b = malloc(sizeof(struct buffer));
b->data = malloc(cap * sizeof(char));
b->cap = cap;
memset(b->data, ' ', b->cap);
return b;
}
static void emptyBuf(struct buffer *buf) {
if (buf->len > 0) {
memset(buf->data, ' ', buf->cap);
buf->len = 0;
}
}
static void writeInBuf(struct buffer *buf, const char c) {
if (buf->cap <= buf->len + 1) {
size_t newSize = buf->cap + 64; // growing size
void *newAlloc = realloc(buf->data, newSize);
if (newSize > buf->cap && newAlloc) {
size_t diff = newSize - buf->cap;
void *pStart = ((char *) newAlloc) + buf->cap;
memset(pStart, ' ', diff);
buf->data = newAlloc;
buf->cap = newSize;
}
}
buf->data[buf->len++] = c;
}
static void writeStringInBuf(struct buffer *buf, const char *str) {
for (unsigned int j = 0; str[j] != ' '; j++) {
writeInBuf(buf, str[j]);
}
}
/**
* Parse the string and replace patterns in format `${ENV_NAME:-default_value}` with
* the values from the environment (or default values after `:-` if provided).
*/
char *envsubst(const char *str) {
size_t strLen = strlen(str);
if (strLen < 4) {
return (char*) str;
}
struct buffer *result = newBuf(strLen);
struct buffer *envName = newBuf(32);
struct buffer *envDef = newBuf(32);
enum {
DATA,
ENV_NAME,
ENV_DEFAULT,
} state = DATA, prevState = DATA;
bool flush = false;
unsigned int nested = 0;
for (unsigned int i = 0; str[i] != ' '; i++) {
// detect the state
if (str[i] == '$' && str[i + 1] == '{') {
i++;
nested++;
prevState = state;
state = ENV_NAME;
continue;
} else if ((str[i] == ':' && str[i + 1] == '-') && state == ENV_NAME) {
i++;
prevState = state;
state = ENV_DEFAULT;
continue;
} else if (str[i] == '}' && (state == ENV_NAME || state == ENV_DEFAULT)) {
nested--;
if (nested == 0) {
i++;
prevState = state;
state = DATA;
flush = true;
}
}
const char c = str[i];
// state processing
switch (state) {
case ENV_NAME:
writeInBuf(envName, c);
break;
case ENV_DEFAULT:
writeInBuf(envDef, c);
break;
case DATA:
if (prevState == ENV_NAME || prevState == ENV_DEFAULT) {
char *envVar = getenv(envName->data);
if (envVar) {
writeStringInBuf(result, envVar);
} else if (envDef->len > 0) {
writeStringInBuf(result, envDef->data);
}
emptyBuf(envName);
emptyBuf(envDef);
}
if (flush) {
i--;
flush = false;
continue;
}
writeInBuf(result, c);
}
}
free(envName->data);
free(envName);
free(envDef->data);
free(envDef);
char *data = result->data;
free(result);
return data;
}
测试:
#include <assert.h>
// tests running: `gcc -o ./tmp/subs ./src/envsubst.c && ./tmp/subs`
int main() {
putenv("Test_1=foo");
putenv("__#Test_2=😎");
assert(strcmp(
envsubst("__$_UNSET_VAR_ ${_UNSET_VAR_} ${_UNSET_VAR_:-default value 😎}"),
"__$_UNSET_VAR_ default value 😎"
) == 0);
assert(strcmp(
envsubst("${__#Test_2} ${__#Test_2:-foo}${_UNSET_VAR_:-def}${__#Test_2}"), "😎 😎def😎"
) == 0);
assert(strcmp(
envsubst("${Test_1} ${Test_1:-def}${Test_1}"), "foo foofoo"
) == 0);
assert(strcmp(
envsubst("__$FOO ${bar} $FOO:def ${Test_1:-def} ${Test_1} ${_UNSET_VAR_:-default} bla-bla ${FOO2:-тест}${ABC} ${}${}"),
"__$FOO $FOO:def foo foo default bla-bla тест "
) == 0);
assert(strcmp(
envsubst("${_UNSET_VAR_:-${Test_1}}"), ""
) == 0);
assert(strcmp(
envsubst("aaa ${}} ${${} bbb"), "aaa } "
) == 0);
}
当您需要shell替换时,您可以通过libc的服务(如system()
或popen()
(从程序中隐式运行shell。这些函数fork/exec是一个shell,调用者的环境由子进程继承(内部使用FOO
0(。从性能的角度来看,这可能看起来很麻烦,但它确实起到了作用:
#include <stdio.h>
// Non reentrant function as it returns a pointer onto a global static buffer
static char *envsubst(char *str) {
FILE *f;
static char cmd[256];
int rc;
snprintf(cmd, sizeof(cmd), "echo %s", str);
f = popen(cmd, "r");
rc = fread(cmd, 1, sizeof(cmd), f);
cmd[rc]=' ';
pclose(f);
return cmd;
}
int main(int ac, char *av[])
{
if (ac == 2) {
char *p = envsubst(av[1]);
printf("%s", p);
}
return 0;
}
CCD_ 11调用CCD_。如果/bin/sh
或/usr/bin/sh
指向bash,而不是像dash
这样不理解这种变量替换的深奥shell,那么前面的工作原理就是这样的。
$ ./envp '__$FOO ${FOO} ${FOO:def} ${FOO2:-TECT}'
sh: 1: Bad substitution
$ ls -l /bin/sh
lrwxrwxrwx 1 root root 4 août 20 20:30 /bin/sh -> dash
$ sudo rm /bin/sh
$ sudo ln -s bash /bin/sh
$ ls -l /bin/sh
lrwxrwxrwx 1 root root 4 août 20 20:36 /bin/sh -> bash
$ export FOO=test
$ export FOO2=test2
$ ./envp '__$FOO ${FOO} ${FOO:def} ${FOO2:-TECT}'
__test test test test2
$ unset FOO2
$ ./envp '__$FOO ${FOO} ${FOO:def} ${FOO2:-TECT}'
__test test test TECT
$ unset FOO
$ ./envp '__$FOO ${FOO} ${FOO:def} ${FOO2:-TECT}'
__ TECT
PS:再次阅读您的问题,您提到您不能使用外部库,因为您是静态链接的。因此,我不确定您是否能够使用libc及其popen()/pclose()
服务。。。