代码之家  ›  专栏  ›  技术社区  ›  Claudio La Rosa

boost spirit中嵌套可选结构解析器的实现

  •  -3
  • Claudio La Rosa  · 技术社区  · 3 年前

    我想用BoOS实现嵌套(可选)结构的解析器:精神,C++语言中的“命名空间”。

    最简单的方法是什么?

    0 回复  |  直到 3 年前
        1
  •  2
  •   sehe    3 年前

    您可以创建如下递归规则:

    namespace_ = lexeme["namespace"] >> '{' >> *namespace_ >> '}';
    

    要允许可选的名称标识符,请执行以下操作:

    rule<It, std::string()> identifier_ =     //
        raw[(alpha | '_') >> *(alnum | '_')]; //
    
    namespace_ =
        lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';
    

    还要解释C++风格的注释:

    using Skip = rule<It>;
    Skip skip  = space                           //
        | "//" >> *~char_("\r\n") >> (eol | eoi) //
        | "/*" >> *(char_ - "*/") >> "*/"        //
        ;
    
    rule<It, Skip> namespace_;
    
    rule<It, std::string()> identifier_ =     //
        raw[(alpha | '_') >> *(alnum | '_')]; //
    
    namespace_ =
        lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';
    

    演示:

    Live On Coliru

    #include <boost/spirit/include/qi.hpp>
    
    auto parse(std::string_view input) {
        using namespace boost::spirit::qi;
        using It = std::string_view::const_iterator;
    
        using Skip = rule<It>;
        Skip skip  = space                           //
            | "//" >> *~char_("\r\n") >> (eol | eoi) //
            | "/*" >> *(char_ - "*/") >> "*/"        //
            ;
    
        rule<It, Skip> namespace_;
    
        rule<It, std::string()> identifier_ =     //
            raw[(alpha | '_') >> *(alnum | '_')]; //
    
        namespace_ =
            lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';
    
        phrase_parse(begin(input), end(input), eps > *namespace_ > eoi, skip);
    }
    
    int main() {
        parse(R"(
            namespace X { namespace Y {
                namespace Z1 {
                }
                namespace Z2 {
                }
            }} // namespace X::Y
        )");
    }
    

    奖金

    添加已解析数据的AST表示和调试输出: 住在科里鲁 http://coliru.stacked-crooked.com/a/58542397b7f751e0

    #include <boost/fusion/adapted.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <iomanip>
    
    namespace Ast {
        using Id = std::string;
    
        struct Namespace;
        using Namespaces = std::vector<Namespace>;
    
        struct Namespace {
            boost::optional<Id> id;
            Namespaces          children;
        };
    }
    
    BOOST_FUSION_ADAPT_STRUCT(Ast::Namespace, id, children)
    
    auto parse(std::string_view input) {
        using namespace boost::spirit::qi;
        using It = std::string_view::const_iterator;
    
        using Skip = rule<It>;
        Skip skip  = space                           //
            | "//" >> *~char_("\r\n") >> (eol | eoi) //
            | "/*" >> *(char_ - "*/") >> "*/"        //
            ;
    
        rule<It, Ast::Namespace(), Skip> namespace_;
    
        rule<It, Ast::Id()> identifier_ =     //
            raw[(alpha | '_') >> *(alnum | '_')]; //
    
        namespace_ =
            lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';
    
        Ast::Namespaces parsed;
        phrase_parse(begin(input), end(input), eps > *namespace_ > eoi, skip, parsed);
        return parsed;
    }
    
    namespace Ast {
        void print(std::ostream& os, Namespace const& ns, unsigned indent = 0) {
            os << std::setw(indent) << ""
               << "namespace " << ns.id.value_or("/*anonymous*/") << " {\n";
            for (auto& child : ns.children) {
                print(os, child, indent+2);
            }
            os << std::setw(indent) << "" << "}\n";
        }
    }
    
    int main() {
        auto program = parse(R"(
            namespace X { namespace Y {
                namespace Z1 {
                }
                namespace Z2 {
                }
            }} // namespace X::Y
            namespace { }
        )");
    
        for (auto& ns : program) {
            print(std::cout, ns);
        }
    }
    

    印刷品

    namespace X {
      namespace Y {
        namespace Z1 {
        }
        namespace Z2 {
        }
      }
    }
    namespace /*anonymous*/ {
    }
    

    使现代化

    作为对这些评论的回应,我给出了一个更复杂的示例,该示例解析输入,其中struct可以出现在全局或命名空间级别(或者,实际上是在 struct 名称空间),例如:

    namespace Math {
        long factorial(int x);
    }
    
    struct GlobalA {
        int foo();
        double bar(string stuff, int i, bool flag);
        struct Nested {
            /* todo implementation */
        };
    };
    
    namespace X { namespace Y {
        struct Mixin{};
        namespace Z1 {
            struct Derived : GlobalA, Mixin {
                void qux();
            };
        }
        namespace Z2 {
        }
    }} // namespace X::Y
    namespace { }
    

    看到了吗 Live On Coliru

    //#define BOOST_SPIRIT_DEBUG
    #include <boost/fusion/adapted.hpp>
    #include <boost/fusion/include/io.hpp>
    #include <boost/optional/optional_io.hpp>
    #include <boost/spirit/include/qi.hpp>
    
    namespace Ast {
        struct Id : std::string {
            using std::string::string;
            using std::string::operator=;
        };
    
        struct Namespace;
        using Namespaces = std::vector<Namespace>;
    
        struct VariableDeclaration { Id type, name; };
        using VariableDeclarations = std::vector<VariableDeclaration>;
    
        struct FunctionDeclaration {
            Id return_type, name;
            VariableDeclarations args;
        };
    
        struct StructDeclaration;
        using Declaration = boost::variant< //
            VariableDeclaration,            //
            FunctionDeclaration,            //
            boost::recursive_wrapper<StructDeclaration>>;
    
        using Bases   = std::list<Id>;
        using Members = std::vector<Declaration>;
    
        struct StructDeclaration {
            Id      name;
            Bases   bases;
            Members members;
        };
    
        using NsMember  = boost::variant<Declaration, Namespace>;
        using NsMembers = std::vector<NsMember>;
    
        struct Namespace {
            boost::optional<Id> id;
            NsMembers           members;
        };
    
        using Program = NsMembers;
    } // namespace Ast
    
    BOOST_FUSION_ADAPT_STRUCT(Ast::VariableDeclaration, type,        name)
    BOOST_FUSION_ADAPT_STRUCT(Ast::FunctionDeclaration, return_type, name,    args)
    BOOST_FUSION_ADAPT_STRUCT(Ast::StructDeclaration,   name,        bases,   members)
    BOOST_FUSION_ADAPT_STRUCT(Ast::Namespace,           id,          members)
    
    ///// BEGIN DEBUG OUTPUT FACILITIES
    namespace Ast {
        static std::ostream& operator<<(std::ostream& os, Namespace const& ns) {
            os << "namespace " << ns.id.value_or("/*anonymous*/") << " {\n";
            for (auto& mem : ns.members)
                os << mem;
            return os << "}\n";
        }
    
        static std::ostream& operator<<(std::ostream& os, FunctionDeclaration const& fd) {
            os << fd.return_type << " " << fd.name << "(";
            for (bool first = true; auto& arg : fd.args) {
                os << (std::exchange(first, false) ? "" : ", ") //
                   << arg.type << " " << arg.name;
            }
    
            return os << ");";
        }
    
        static std::ostream& operator<<(std::ostream& os, VariableDeclaration const& vd) {
            return os << vd.type << " " << vd.name << ";";
        }
    
        static std::ostream& operator<<(std::ostream& os, StructDeclaration const& sd) {
            os << "struct " << sd.name;
            if (sd.bases.size())
                for (bool first = true; auto const& base : sd.bases)
                    os << (std::exchange(first, false) ? " : " : ", ") << base;
    
            os << " {\n";
            for (auto& mem : sd.members)
                os << mem << "\n";
    
            return os << "};\n";
        }
    }
    ///// END DEBUG OUTPUT FACILITIES
    
    namespace qi = boost::spirit::qi;
    
    template <typename It> struct ProgramParser : qi::grammar<It, Ast::Program()> {
        ProgramParser() : ProgramParser::base_type(start) {
            using namespace qi;
    
            keywords_ += "if", "do", "for", "else", "while", "not", "and", "or",
                "xor", "continue", "break", "case", "goto", "struct", "class",
                "enum", "namespace";
    
            kw_lexeme = keywords_ >> !(alnum|'_');
    
            skipper_ = space                             //
                | "//" >> *~char_("\r\n") >> (eol | eoi) //
                | "/*" >> *(char_ - "*/") >> "*/"        //
                ;
    
            identifier_ = !kw_lexeme >> raw[(alpha | '_') >> *(alnum | '_')];
            vardecl_    = identifier_ >> identifier_;
            fundecl_    = identifier_ >> identifier_ >> //
                '(' >> -(vardecl_ % ',') >> ')' >> ';';
    
            decl_ = fundecl_ | vardecl_ | struct_;
    
            Ast::Bases const no_bases;
            baselist_ = ':' >> identifier_ % ',' | attr(no_bases);
    
            struct_ =                                     //
                lexeme["struct" >> !graph] >> identifier_ //
                >> baselist_ >> '{'                       //
                >> *decl_                                 //
                >> '}' >> ';';
    
            nsmember_ = namespace_ | decl_;
    
            namespace_ = lexeme["namespace" >> ((!graph) | '{')] >>
                -identifier_ >> '{' >> *nsmember_ >> '}';
    
            program_  = *nsmember_;
            start     = skip(skipper_.alias())[program_ > eoi];
    
            BOOST_SPIRIT_DEBUG_NODES((start)(program_)(nsmember_)(namespace_)(
                struct_)(decl_)(vardecl_)(fundecl_)(baselist_)(identifier_))
        }
    
      private:
        qi::symbols<char> keywords_;
        qi::rule<It>      kw_lexeme;
    
        qi::rule<It, Ast::Program()> start;
        qi::rule<It, Ast::Id()>      identifier_;
    
        using Skip = qi::rule<It>;
        Skip skipper_;
    
        qi::rule<It, Ast::Bases(),               Skip> baselist_;
        qi::rule<It, Ast::Declaration(),         Skip> decl_;
        qi::rule<It, Ast::FunctionDeclaration(), Skip> fundecl_;
        qi::rule<It, Ast::Namespace(),           Skip> namespace_;
        qi::rule<It, Ast::NsMember(),            Skip> nsmember_;
        qi::rule<It, Ast::Program(),             Skip> program_;
        qi::rule<It, Ast::StructDeclaration(),   Skip> struct_;
        qi::rule<It, Ast::VariableDeclaration(), Skip> vardecl_;
    };
    
    Ast::Program parse_program(std::string_view input) {
        using It = std::string_view::const_iterator;
    
        Ast::Program parsed;
        static ProgramParser<It> const p;
        parse(begin(input), end(input), p, parsed);
        return parsed;
    }
    
    int main() {
        auto program = parse_program(R"(
                namespace Math {
                    long factorial(int x);
                }
    
                struct GlobalA {
                    int foo();
                    double bar(string stuff, int i, bool flag);
                    struct Nested {
                        /* todo implementation */
                    };
                };
    
                namespace X { namespace Y {
                    struct Mixin{};
                    namespace Z1 {
                        struct Derived : GlobalA, Mixin {
                            void qux();
                        };
                    }
                    namespace Z2 {
                    }
                }} // namespace X::Y
                namespace { }
            )");
    
        for (auto& member : program)
            std::cout << member << '\n';
    }
    

    输出(打印得不好):

    namespace Math {
    long factorial(int x);}
    
    struct GlobalA {
    int foo();
    double bar(string stuff, int i, bool flag);
    struct Nested {
    };
    
    };
    
    namespace X {
    namespace Y {
    struct Mixin {
    };
    namespace Z1 {
    struct Derived : GlobalA, Mixin {
    void qux();
    };
    }
    namespace Z2 {
    }
    }
    }
    
    namespace /*anonymous*/ {
    }