我想解析当前不存在的文件文件名中的任何"."
和".."
引用。
所以,我想做的类似于realpath(3)
,但除了:
- 文件名可能引用不存在的文件
- 不得解析符号链接(主要是因为路径组件可能不存在(
是否有我可以调用的现有库代码,或者我是否必须编写新代码?
所以,本质上:
a/../b/c.txt
会变得b/c.txt
a/../../b/c.txt
会变得../b/c.txt
a/./b/./c.txt
会变得a/b/c.txt
如果有人想知道,我这样做不是出于安全目的:我正在编写一个允许任意 shell 命令执行的工具,因此安全性不是问题。我只需要一种方法来为比较它们的路径提供规范表示。
这是我30多年前第一次写的代码。 您所追求的代码有两种变体。
该代码在 Stack Overflow 使用的 CC-by-SA 3.0 许可证下可用 — 您可以将其与归属一起使用。
/*
@(#)File: $RCSfile: clnpath.c,v $
@(#)Version: $Revision: 2.19 $
@(#)Last changed: $Date: 2017/03/26 06:32:49 $
@(#)Purpose: Clean up pathname (lexical analysis only)
@(#)Author: J Leffler
@(#)Copyright: (C) JLSS 1987-2017
*/
/*TABSTOP=4*/
#include "clnpath.h"
#include "jlss.h"
#include "tokenise.h"
#include <string.h>
#define MAX_PATH_ELEMENTS 64 /* Number of levels of directory */
#define strequal(a,b) (strcmp((a),(b)) == 0)
#define DIM(x) (sizeof(x)/sizeof(*(x)))
#if !defined(lint)
/* Prevent over-aggressive optimizers from eliminating ID string */
extern const char jlss_id_clnpath_c[];
const char jlss_id_clnpath_c[] = "@(#)$Id: clnpath.c,v 2.19 2017/03/26 06:32:49 jleffler Exp $";
#endif /* lint */
void clnpath(char *path)
{
char *src;
char *dst;
char c;
int slash = 0;
/* Convert multiple adjacent slashes to single slash */
src = dst = path;
while ((c = *dst++ = *src++) != ' ')
{
if (c == '/')
{
slash = 1;
while (*src == '/')
src++;
}
}
if (slash == 0)
return;
/* Remove "./" from "./xxx" but leave "./" alone. */
/* Remove "/." from "xxx/." but reduce "/." to "/". */
/* Reduce "xxx/./yyy" to "xxx/yyy" */
src = dst = (*path == '/') ? path + 1 : path;
while (src[0] == '.' && src[1] == '/' && src[2] != ' ')
src += 2;
while ((c = *dst++ = *src++) != ' ')
{
if (c == '/' && src[0] == '.' && (src[1] == ' ' || src[1] == '/'))
{
src++;
dst--;
}
}
if (path[0] == '/' && path[1] == '.' &&
(path[2] == ' ' || (path[2] == '/' && path[3] == ' ')))
path[1] = ' ';
/* Remove trailing slash, if any. There is at most one! */
/* dst is pointing one beyond terminating null */
if ((dst -= 2) > path && *dst == '/')
*dst++ = ' ';
}
/*
** clnpath2() is not part of the basic clnpath() function because it can
** change the meaning of a path name if there are symbolic links on the
** system. For example, suppose /usr/tmp is a symbolic link to /var/tmp.
** If the user supplies /usr/tmp/../abcdef as the directory name, clnpath
** would transform that to /usr/abcdef, not to /var/abcdef which is what
** the kernel would interpret it as.
*/
void clnpath2(char *path)
{
char *token[MAX_PATH_ELEMENTS];
int ntok;
clnpath(path);
/* Reduce "<name>/.." to "/" */
if ((ntok = tokenise(path, "/", token, MAX_PATH_ELEMENTS, 0)) > 1)
{
for (int i = 0; i < ntok - 1; i++)
{
if (!strequal(token[i], "..") && strequal(token[i + 1], ".."))
{
if (*token[i] == ' ')
continue;
while (i < ntok - 1)
{
token[i] = token[i + 2];
i++;
}
ntok -= 2;
i = -1; /* Restart enclosing for loop */
}
}
}
/* Reassemble string */
char *dst = path;
if (ntok == 0)
{
*dst++ = '.';
*dst = ' ';
}
else
{
if (token[0][0] == ' ')
{
int i;
for (i = 1; i < ntok && strequal(token[i], ".."); i++)
;
if (i > 1)
{
int j;
for (j = 1; i < ntok; i++)
token[j++] = token[i];
ntok = j;
}
}
if (ntok == 1 && token[0][0] == ' ')
{
*dst++ = '/';
*dst = ' ';
}
else
{
for (int i = 0; i < ntok; i++)
{
char *src = token[i];
while ((*dst++ = *src++) != ' ')
;
*(dst - 1) = '/';
}
*(dst - 1) = ' ';
}
}
}
#if defined(TEST)
#include <stdio.h>
#include "phasedtest.h"
/* -- PHASE 1 TESTING -- */
/* -- Phase 1 - Testing clnpath() -- */
typedef struct p1_test_case
{
const char *input;
const char *output;
} p1_test_case;
/* This stress tests the cleaning, concentrating on the boundaries. */
static const p1_test_case p1_tests[] =
{
{ "/", "/", },
{ "//", "/", },
{ "///", "/", },
{ "/.", "/", },
{ "/./", "/", },
{ "/./.", "/", },
{ "/././.profile", "/.profile", },
{ "./", ".", },
{ "./.", ".", },
{ "././", ".", },
{ "./././.profile", ".profile", },
{ "abc/.", "abc", },
{ "abc/./def", "abc/def", },
{ "./abc", "abc", },
{ "//abcd///./abcd////", "/abcd/abcd", },
{ "//abcd///././../defg///ddd//.", "/abcd/../defg/ddd", },
{ "/abcd/./../././defg/./././ddd", "/abcd/../defg/ddd", },
{ "//abcd//././../defg///ddd//.///", "/abcd/../defg/ddd", },
/* Most of these are minimal interest in phase 1 */
{ "/usr/tmp/clnpath.c", "/usr/tmp/clnpath.c", },
{ "/usr/tmp/", "/usr/tmp", },
{ "/bin/..", "/bin/..", },
{ "bin/..", "bin/..", },
{ "/bin/.", "/bin", },
{ "sub/directory", "sub/directory", },
{ "sub/directory/file", "sub/directory/file", },
{ "/part1/part2/../.././../", "/part1/part2/../../..", },
{ "/.././../usr//.//bin/./cc", "/../../usr/bin/cc", },
};
static void p1_tester(const void *data)
{
const p1_test_case *test = (const p1_test_case *)data;
char buffer[256];
strcpy(buffer, test->input);
clnpath(buffer);
if (strcmp(buffer, test->output) == 0)
pt_pass("<<%s>> cleans to <<%s>>n", test->input, buffer);
else
{
pt_fail("<<%s>> - unexpected output from clnpath()n", test->input);
pt_info("Wanted <<%s>>n", test->output);
pt_info("Actual <<%s>>n", buffer);
}
}
/* -- PHASE 2 TESTING -- */
/* -- Phase 2 - Testing clnpath2() -- */
typedef struct p2_test_case
{
const char *input;
const char *output;
} p2_test_case;
static const p2_test_case p2_tests[] =
{
{ "/abcd/../defg/ddd", "/defg/ddd" },
{ "/bin/..", "/" },
{ "bin/..", "." },
{ "/usr/bin/..", "/usr" },
{ "/usr/bin/../..", "/" },
{ "usr/bin/../..", "." },
{ "../part/of/../the/way", "../part/the/way" },
{ "/../part/of/../the/way", "/part/the/way" },
{ "part1/part2/../../part3", "part3" },
{ "part1/part2/../../../part3", "../part3" },
{ "/part1/part2/../../../part3", "/part3" },
{ "/part1/part2/../../../", "/" },
{ "/../../usr/bin/cc", "/usr/bin/cc" },
{ "../../usr/bin/cc", "../../usr/bin/cc" },
{ "part1/./part2/../../part3", "part3" },
{ "./part1/part2/../../../part3", "../part3" },
{ "/part1/part2/.././../../part3", "/part3" },
{ "/part1/part2/../.././../", "/" },
{ "/.././..//./usr///bin/cc/", "/usr/bin/cc" },
};
static void p2_tester(const void *data)
{
const p2_test_case *test = (const p2_test_case *)data;
char buffer[256];
strcpy(buffer, test->input);
clnpath2(buffer);
if (strcmp(buffer, test->output) == 0)
pt_pass("<<%s>> cleans to <<%s>>n", test->input, buffer);
else
{
pt_fail("<<%s>> - unexpected output from clnpath2()n", test->input);
pt_info("Wanted <<%s>>n", test->output);
pt_info("Actual <<%s>>n", buffer);
}
}
/* -- Phased Test Infrastructure -- */
static pt_auto_phase phases[] =
{
{ p1_tester, PT_ARRAYINFO(p1_tests), 0, "Phase 1 - Testing clnpath()" },
{ p2_tester, PT_ARRAYINFO(p2_tests), 0, "Phase 2 - Testing clnpath2()" },
};
int main(int argc, char **argv)
{
#if 0
/* Interactive testing */
printf("Enter pathname: ");
while (fgets(buffer, sizeof(buffer), stdin) != NULL)
{
buffer[strlen(buffer) - 1] = ' '; /* Zap newline */
printf("Unclean: <<%s>>n", buffer);
clnpath(buffer);
printf("Clean 1: <<%s>>n", buffer);
clnpath2(buffer);
printf("Clean 2: <<%s>>n", buffer);
printf("Enter pathname: ");
}
putchar('n');
#endif /* 0 */
return(pt_auto_harness(argc, argv, phases, DIM(phases)));
}
#endif /* TEST */
第二个变体使用未包含在上述源代码中的函数tokenise()
。 如果需要,可以提供它。
该代码包括测试示例,尽管它使用了一个名为"阶段测试"的库,我还没有正式发布该库,因为我有一些打包问题需要解决。 如果认为有必要,可以在短时间内提供。