事实上,这应该简单得多。
首先,我不明白为什么船长缺席
完全
相关的
其次,暴露原始输入最好使用
qi::raw[]
而不是跳舞
iter_pos
和笨拙的语义动作。
-
negating a charset
已完成
~
,例如。
~char_(",()")
-
(p|eps)
拼写会更好
-p
-
(lit('(') >> lit(')'))
可能只是
"()"
(毕竟,没有船长,对吧)
-
p >> *(',' >> p)
p % ','
-
鉴于上述情况,
resolve_para
简化为:
resolve_para = '(' >> -(resolve_para_entry % ',') >> ')';
-
resolve_para_entry
我觉得很奇怪。似乎任何嵌套的括号都被简单地吞掉了。为什么不真正解析递归语法以便检测语法错误?
以下是我的看法:
我更愿意将此作为第一步,因为它有助于我思考解析器产品:
namespace Ast {
using ArgList = std::list<std::string>;
struct Resolve {
std::string name;
ArgList arglist;
};
using Resolves = std::vector<Resolve>;
}
qi::rule<It, Ast::Resolves()> start;
qi::rule<It, Ast::Resolve()> resolve;
qi::rule<It, Ast::ArgList()> arglist;
qi::rule<It, std::string()> arg, identifier;
及其定义:
identifier = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
arg = raw [ +('(' >> -arg >> ')' | +~char_(",)(")) ];
arglist = '(' >> -(arg % ',') >> ')';
resolve = identifier >> arglist;
start = *qr::seek[hold[resolve]];
笔记:
Live On Coliru
namespace Ast {
using ArgList = std::list<std::string>;
struct Resolve {
std::string name;
ArgList arglist;
};
using Resolves = std::vector<Resolve>;
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Resolve, name, arglist)
namespace qi = boost::spirit::qi;
namespace qr = boost::spirit::repository::qi;
template <typename It>
struct Parser : qi::grammar<It, Ast::Resolves()>
{
Parser() : Parser::base_type(start) {
using namespace qi;
identifier = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
arg = raw [ +('(' >> -arg >> ')' | +~char_(",)(")) ];
arglist = '(' >> -(arg % ',') >> ')';
resolve = identifier >> arglist;
start = *qr::seek[hold[resolve]];
}
private:
qi::rule<It, Ast::Resolves()> start;
qi::rule<It, Ast::Resolve()> resolve;
qi::rule<It, Ast::ArgList()> arglist;
qi::rule<It, std::string()> arg, identifier;
};
int main() {
using It = std::string::const_iterator;
std::string const samples = R"--(
Samples:
sometext(para) â expect para in the string list
sometext(para1,para2) â expect para1 and para2 in string list
sometext(call(a)) â expect call(a) in the string list
sometext(call(a,b)) â here it fails; it seams that the "!lit(',')" wont make the parser step outside
)--";
It f = samples.begin(), l = samples.end();
Ast::Resolves data;
if (parse(f, l, Parser<It>{}, data)) {
std::cout << "Parsed " << data.size() << " resolves\n";
} else {
std::cout << "Parsing failed\n";
}
for (auto& resolve: data) {
std::cout << " - " << resolve.name << "\n (\n";
for (auto& arg : resolve.arglist) {
std::cout << " " << arg << "\n";
}
std::cout << " )\n";
}
}
Parsed 6 resolves
- sometext
(
para
)
- sometext
(
para1
para2
)
- sometext
(
call(a)
)
- call
(
a
)
- call
(
a
b
)
- lit
(
'
'
)
更多想法
最后一个输出显示了当前语法的问题:
lit(',')
显然不应被视为具有两个参数的调用。
最近,我回答了使用参数提取(嵌套)函数调用的问题,这可以更灵活地完成任务:
奖金
奖金版本使用
string_view
并且还显示了所有提取词的精确行/列信息。
注意,它仍然不需要
任何
phoenix或语义动作。相反,它只是定义了要分配给的必要特征
boost::string_view
从迭代器范围。
Live On Coliru
namespace Ast {
using Source = boost::string_view;
using ArgList = std::list<Source>;
struct Resolve {
Source name;
ArgList arglist;
};
using Resolves = std::vector<Resolve>;
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Resolve, name, arglist)
namespace boost { namespace spirit { namespace traits {
template <typename It>
struct assign_to_attribute_from_iterators<boost::string_view, It, void> {
static void call(It f, It l, boost::string_view& attr) {
attr = boost::string_view { f.base(), size_t(std::distance(f.base(),l.base())) };
}
};
} } }
namespace qi = boost::spirit::qi;
namespace qr = boost::spirit::repository::qi;
template <typename It>
struct Parser : qi::grammar<It, Ast::Resolves()>
{
Parser() : Parser::base_type(start) {
using namespace qi;
identifier = raw [ char_("a-zA-Z_") >> *char_("a-zA-Z0-9_") ];
arg = raw [ +('(' >> -arg >> ')' | +~char_(",)(")) ];
arglist = '(' >> -(arg % ',') >> ')';
resolve = identifier >> arglist;
start = *qr::seek[hold[resolve]];
}
private:
qi::rule<It, Ast::Resolves()> start;
qi::rule<It, Ast::Resolve()> resolve;
qi::rule<It, Ast::ArgList()> arglist;
qi::rule<It, Ast::Source()> arg, identifier;
};
struct Annotator {
using Ref = boost::string_view;
struct Manip {
Ref fragment, context;
friend std::ostream& operator<<(std::ostream& os, Manip const& m) {
return os << "[" << m.fragment << " at line:" << m.line() << " col:" << m.column() << "]";
}
size_t line() const {
return 1 + std::count(context.begin(), fragment.begin(), '\n');
}
size_t column() const {
return 1 + (fragment.begin() - start_of_line().begin());
}
Ref start_of_line() const {
return context.substr(context.substr(0, fragment.begin()-context.begin()).find_last_of('\n') + 1);
}
};
Ref context;
Manip operator()(Ref what) const { return {what, context}; }
};
int main() {
using It = std::string::const_iterator;
std::string const samples = R"--(Samples:
sometext(para) â expect para in the string list
sometext(para1,para2) â expect para1 and para2 in string list
sometext(call(a)) â expect call(a) in the string list
sometext(call(a,b)) â here it fails; it seams that the "!lit(',')" wont make the parser step outside
)--";
It f = samples.begin(), l = samples.end();
Ast::Resolves data;
if (parse(f, l, Parser<It>{}, data)) {
std::cout << "Parsed " << data.size() << " resolves\n";
} else {
std::cout << "Parsing failed\n";
}
Annotator annotate{samples};
for (auto& resolve: data) {
std::cout << " - " << annotate(resolve.name) << "\n (\n";
for (auto& arg : resolve.arglist) {
std::cout << " " << annotate(arg) << "\n";
}
std::cout << " )\n";
}
}
打印
Parsed 6 resolves
- [sometext at line:3 col:1]
(
[para at line:3 col:10]
)
- [sometext at line:4 col:1]
(
[para1 at line:4 col:10]
[para2 at line:4 col:16]
)
- [sometext at line:5 col:1]
(
[call(a) at line:5 col:10]
)
- [call at line:5 col:34]
(
[a at line:5 col:39]
)
- [call at line:6 col:10]
(
[a at line:6 col:15]
[b at line:6 col:17]
)
- [lit at line:6 col:62]
(
[' at line:6 col:66]
[' at line:6 col:68]
)
¹
Boost Spirit: "Semantic actions are evil"?