我正在尝试构建nom解析器来检查ID为UUID的URL
rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912
我创建了以下内容:
extern crate uuid;
use uuid::Uuid;
named!(room_uuid<&str, Option<Uuid>>,
do_parse!(
tag_s!("rooms") >>
id: opt!(complete!(preceded!(
tag_s!("/"),
map_res!(take_s!(36), FromStr::from_str)
))) >>
(id)
)
);
它几乎可以很好地处理所有情况:
assert_eq!(room_uuid("rooms"), Done("", None));
assert_eq!(room_uuid("rooms/"), Done("/", None));
assert_eq!(room_uuid("rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"), Done("", Some(Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap())));
除非 ID 不是有效的 UUID:
assert!(room_uuid("rooms/123").is_err()); # it fails
# room_uuid("rooms/123").to_result() => Ok(None)
据我了解,发生这种情况是因为opt!
将内在Err
转化为None
。
我希望将 ID 作为可选部分,但如果存在,它应该是一个有效的 UUID。
不幸的是,我不明白如何将这两件事结合起来:可选性和严格的格式。
在过去的几周里,我自己才开始使用 nom,但我找到了解决这个问题的一种方法。它并不完全适合宏,但它确实通过一次修改提供了正确的行为。我吞下/
,而不是在没有给出 UUID 后让它悬空。
#[macro_use]
extern crate nom;
extern crate uuid;
use std::str::FromStr;
use nom::IResult;
use uuid::Uuid;
fn room_uuid(input: &str) -> IResult<&str, Option<Uuid>> {
// Check that it starts with "rooms"
let res = tag_s!(input, "rooms");
let remaining = match res {
IResult::Incomplete(i) => return IResult::Incomplete(i),
IResult::Error(e) => return IResult::Error(e),
IResult::Done(i, _) => i
};
// If a slash is not present, return early
let optional_slash = opt!(remaining, tag_s!("/"));
let remaining = match optional_slash {
IResult::Error(_) |
IResult::Incomplete(_) => return IResult::Done(remaining, None),
IResult::Done(i, _) => i
};
// If something follows a slash, make sure
// it's a valid UUID
if remaining.len() > 0 {
let res = complete!(remaining, map_res!(take_s!(36), FromStr::from_str));
match res {
IResult::Done(i, o) => IResult::Done(i, Some(o)),
IResult::Error(e) => IResult::Error(e),
IResult::Incomplete(n) => IResult::Incomplete(n)
}
} else {
// This branch allows for "rooms/"
IResult::Done(remaining, None)
}
}
#[test]
fn match_room_plus_uuid() {
use nom::IResult::*;
assert_eq!(room_uuid("rooms"), Done("", None));
assert_eq!(room_uuid("rooms/"), Done("", None));
assert_eq!(room_uuid("rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"), Done("", Some(Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap())));
assert!(room_uuid("rooms/123").is_err());
}
鉴于解析 URL 不需要流式处理接口,您可以使用synom
。 它作为syn
箱的一部分进行维护,但遗憾的是不再维护(它被合并到syn
中并更改为仅处理锈迹)。
可悲的是,synom
不提供take_s!
和eof!
(后者将禁止尾随的"未解析"123
),但实现这些很容易。
使用eof!
也意味着您无法返回未解析的"/"
(尽管我认为这是一件好事);并且嵌套的option!
最后需要一些解包(您可以返回Option<Option<Uuid>>
来检测尾随"/"
)。
操场
#[macro_use]
extern crate synom;
extern crate uuid;
use uuid::Uuid;
macro_rules! take_s {
($i:expr, $length:expr) => {{
let length: usize = $length;
if 0 == length {
synom::IResult::Done($i, "")
} else {
let mut ci = $i.char_indices().skip(length - 1);
match ci.next() {
None => synom::IResult::Error,
Some(_) => {
match ci.next() {
None => synom::IResult::Done("", $i),
Some((pos, _)) => {
let (value, rem) = $i.split_at(pos);
synom::IResult::Done(rem, value)
},
}
}
}
}
}};
}
macro_rules! eof {
($i:expr,) => {{
if $i.is_empty() {
synom::IResult::Done($i, ())
} else {
synom::IResult::Error
}
}};
}
named!(room_uuid -> Option<Uuid>,
do_parse!(
tag!("rooms") >>
id: option!(preceded!(
tag!("/"),
option!(
switch!(map!(take_s!(36), str::parse),
Ok(v) => value!(v)
)
)
)) >>
eof!() >>
(id.unwrap_or(None))
)
);
fn main() {
use synom::IResult::*;
assert_eq!(room_uuid("rooms"), Done("", None));
assert_eq!(room_uuid("rooms/"), Done("", None));
assert_eq!(
room_uuid("rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"),
Done(
"",
Some(Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap())
)
);
assert_eq!(room_uuid("rooms/123"), Error);
}
好的,所以我让它与nom
和扩展URL格式一起使用api/v1/rooms/UUID/tracks/UUID
。
基础知识与以前相同:您要检查eof
,忽略尾随"/"
,永远不要等待不完整的结果(alt_complete!
在这里做得很好)。
关于您的ErrorKind::Verify
愿望:我认为错误类型实际上并不重要,只需忽略它,或手动将其映射到您想要的任何内容。
小心alt_complete!
分支:在重叠的情况下,首选选项(通常是"较长的")应排在第一位。
我喜欢我的with!
助手,但您也可以内联它。
游乐场不支持nom
,所以这次没有链接。
#[macro_use]
extern crate nom;
extern crate uuid;
use uuid::Uuid;
named!(uuid<&str, Uuid>, preceded!(
tag_s!("/"),
map_res!(take_s!(36), str::parse)
));
#[derive(Clone, PartialEq, Eq, Debug)]
enum ApiRequest {
Rooms,
Room { room: Uuid },
Tracks { room: Uuid },
Track { room: Uuid, track: Uuid },
}
/// shortcut for: `do_parse!(name: expr >> r: otherexpr >> (r))`
///
/// `otherexpr` should use `name`, otherwise you could just use `preceded!`.
macro_rules! with {
($i:expr, $var:ident: $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
do_parse!($i, $var: $submac!($($args)*) >> r: $($rest)* >> (r));
};
($i:expr, $var:ident: $submac:ident >> $($rest:tt)*) => {
do_parse!($i, $var: $submac >> r: $($rest)* >> (r));
};
}
// /api/v1/rooms/UUID/tracks/UUID
named!(apiv1<&str, ApiRequest>, preceded!(tag_s!("/api/v1"),
alt_complete!(
preceded!(tag_s!("/rooms"), alt_complete!(
with!(room: uuid >> alt_complete!(
preceded!(tag_s!("/tracks"), alt_complete!(
with!(track: uuid >> alt_complete!(
// ... sub track requests?
value!(ApiRequest::Track{room, track})
))
|
value!(ApiRequest::Tracks{room})
))
// other room requests
|
value!(ApiRequest::Room{room})
))
|
value!(ApiRequest::Rooms)
))
// | ... other requests
)
));
named!(api<&str, ApiRequest>, terminated!(
alt_complete!(
apiv1
// | ... other versions
// also could wrap in new enum like:
// apiv1 => { ApiRequest::V1 }
// |
// apiv2 => { ApiRequest::V2 }
),
tuple!(
alt_complete!(tag_s!("/") | value!("")), // ignore trailing "/"
eof!() // make sure full URL was parsed
)
));
fn main() {
use nom::IResult::*;
use nom::ErrorKind;
let room = Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap();
let track = Uuid::parse_str("83d235e8-03cd-420d-a8c6-6e42440a5573").unwrap();
assert_eq!(api("/api/v1/rooms"), Done("", ApiRequest::Rooms));
assert_eq!(api("/api/v1/rooms/"), Done("", ApiRequest::Rooms));
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"),
Done("", ApiRequest::Room { room })
);
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/"),
Done("", ApiRequest::Room { room })
);
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks"),
Done("", ApiRequest::Tracks { room })
);
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/"),
Done("", ApiRequest::Tracks { room })
);
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/83d235e8-03cd-420d-a8c6-6e42440a5573"),
Done("", ApiRequest::Track{room, track})
);
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/83d235e8-03cd-420d-a8c6-6e42440a5573/"),
Done("", ApiRequest::Track{room, track})
);
assert_eq!(api("/api/v1"), Error(ErrorKind::Alt));
assert_eq!(api("/api/v1/foo"), Error(ErrorKind::Alt));
assert_eq!(api("/api/v1/rooms/123"), Error(ErrorKind::Eof));
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/bar"),
Error(ErrorKind::Eof)
);
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/83d235e8-03cd-420d-a8c6-6e42440a5573/123"),
Error(ErrorKind::Eof)
);
assert_eq!(api("/api/v2"), Error(ErrorKind::Alt));
}
您还可以使用更严格的alt_full_opt_slash!
分支方法,这将确保分支仅在完全解析输入时才匹配。
然后,您可以使用更"扁平"的方式(尽管嵌套分支应该仍然有效)来解析替代方案(尽管这意味着您最终可能会多次解析一些UUID;而且现在所有错误都是Alt
):
/// Similar to alt_complete, but also requires the branch parses until
/// the end of the input (but ignores a trailing "/").
macro_rules! alt_full_opt_slash {
(__impl_push2 ($i:expr,) ($($new:tt)*), $($rest:tt)*) => {
alt_full_opt_slash!(__impl ($i, $($new)*), $($rest)*)
};
(__impl_push2 ($i:expr, $($result:tt)+) ($($new:tt)*), $($rest:tt)*) => {
alt_full_opt_slash!(__impl ($i, $($result)+ | $($new)*), $($rest)*)
};
(__impl_push ($($result:tt)*) ($($new:tt)*), $($rest:tt)*) => {
// modify branch:
alt_full_opt_slash!(__impl_push2 ($($result)*) (
terminated!(
$($new)*,
tuple!(
alt_complete!(tag_s!("/") | value!("")), // ignore trailing "/"
eof!() // make sure full URL was parsed
)
)
), $($rest)*)
};
(__impl ($($result:tt)*), $e:ident | $($rest:tt)*) => {
alt_full_opt_slash!(__impl_push ($($result)*) ( $e ), $($rest)*)
};
(__impl ($($result:tt)*), $subrule:ident!( $($args:tt)*) | $($rest:tt)*) => {
alt_full_opt_slash!(__impl_push ($($result)*) ( $subrule!($($args)*) ), $($rest)*)
};
(__impl ($($result:tt)*), $subrule:ident!( $($args:tt)* ) => { $gen:expr } | $($rest:tt)*) => {
alt_full_opt_slash!(__impl_push ($($result)*) ( $subrule!($($args)*) => { $gen } ), $($rest)*)
};
(__impl ($($result:tt)*), $e:ident => { $gen:expr } | $($rest:tt)*) => {
alt_full_opt_slash!(__impl_push ($($result)*) ( $e => { $gen } ), $($rest)*)
};
(__impl ($i:expr, $($result:tt)*), __end) => {
alt_complete!($i, $($result)*)
};
($i:expr, $($rest:tt)*) => {{
alt_full_opt_slash!(__impl ($i, ), $($rest)* | __end)
}};
}
// /api/v1/rooms/UUID/tracks/UUID
named!(apiv1<&str, ApiRequest>, preceded!(tag_s!("/api/v1"),
alt_full_opt_slash!(
do_parse!(
tag_s!("/rooms") >>
(ApiRequest::Rooms)
)
|
do_parse!(
tag_s!("/rooms") >>
room: uuid >>
(ApiRequest::Room{room})
)
|
do_parse!(
tag_s!("/rooms") >>
room: uuid >>
tag_s!("/tracks") >>
(ApiRequest::Tracks{room})
)
|
do_parse!(
tag_s!("/rooms") >>
room: uuid >>
tag_s!("/tracks") >>
track: uuid >>
(ApiRequest::Track{room, track})
)
)
));
named!(api<&str, ApiRequest>, alt_complete!(
apiv1
// | ... other versions
// also could wrap in new enum like:
// apiv1 => { ApiRequest::V1 }
// |
// apiv2 => { ApiRequest::V2 }
));