boost spirit parsing

foofel

Hey ich versuche gerade ein ini like format mit boost spirit zu parsen. das klappt so weit auch ganz gut, die gramatik funktioniert aber ich kriege das automatische mapping in ein struct nicht hin, irgendwo fehlt mir da die richtige denkweise. Erstmal das format:

[fine]
@cmp1
@cmp2
muh=b

[fail]
@cmp1
a=b
@cmp2

Das struct:

typedef std::map<std::string, std::string> Pairs;
struct Section
{
	std::string name;
	std::vector<std::string> requirements;
	Pairs properties;
};

die "fine" variante lässt sich ziemlich gut beschreiben und die krieg ich auch richtig geparst und in das struct geschrieben (andere grammatik). Die fail variante wird (mit der gezeigten grammatik) richtig geparst aber ich krieg wie gesagt das automatische mapping nicht hin.

für das mapping hab ich:

BOOST_FUSION_ADAPT_STRUCT(
	Section, 
	(std::string, name)
	(std::vector<std::string>, requirements)
	(Pairs, properties)
)

Ich weis das das händisch einfacher geht aber ich versuch mich gerade ein bischen an spirit und würde das gerne damit hin bekommen

gramatik:

template <typename Iterator, typename Skipper>
struct SectionParser : qi::grammar<Iterator, Section(), Skipper>
{
	qi::rule<Iterator, Section(), Skipper> start;
	qi::rule<Iterator, std::string()> value, ident, name, component;
	qi::rule<Iterator, std::pair<std::string, std::string>()> pair;
		SectionParser() 
		: SectionParser::base_type(start, "section grammar")
	{
		ident = +qi::char_("a-zA-Z0-9_");
		component = qi::char_('@') > ident >> (qi::eol | qi::eoi);
		value = *(qi::char_ - (qi::eol | qi::eoi));
		pair = ident > qi::lit('=') > value >> (qi::eol | qi::eoi);
		name = qi::lit('[') > ident > qi::lit(']') >> (qi::eol | qi::eoi);
		start = name >> *(component | pair);
		BOOST_SPIRIT_DEBUG_NODES((start)(value)(pair)(component)(ident)(name));
	}
};

der skipper ist ein qi::blank.

am ende soll da halt ein std::vector<Section> rauskommen, hoffe ihr könnt mir helfen

foofel

ok habs

kein adapt struct mehr. das struct ist das selbe. die grmattik ist:

template <typename Iterator, typename Skipper>
	struct SectionParser : qi::grammar<Iterator, Section(), Skipper>
	{
		qi::rule<Iterator, Section(), Skipper> start;
		qi::rule<Iterator, std::string()> value, ident, name, component;
		qi::rule<Iterator, std::pair<std::string, std::string>()> pair;

		SectionParser() 
			: SectionParser::base_type(start, "section grammar")
		{
			auto add_component = phx::push_back(phx::bind(&Section::requirements, qi::_val), qi::_1);
			auto add_pair = phx::insert(phx::bind(&Section::properties, qi::_val), qi::_1);
			auto set_name = phx::assign(phx::bind(&Section::name, qi::_val), qi::_1);

			ident = +qi::char_("a-zA-Z0-9_");
			component = qi::char_('@') > ident >> (qi::eol | qi::eoi);
			value = *(qi::char_ - (qi::eol | qi::eoi));
			pair = ident > qi::lit('=') > value >> (qi::eol | qi::eoi);
			name = qi::lit('[') > ident > qi::lit(']') >> (qi::eol | qi::eoi);
			start = name[set_name] >> *(component[add_component] | pair[add_pair]);

			BOOST_SPIRIT_DEBUG_NODES((value)(pair)(component)(ident)(name));
		}
	};

Columbo

Das kann man sicherlich locker ohne Spirit parsen. Können wir auch eine solche Lösung vorschlagen?

foofel

Wenn du was schönes hast sicher zusammengefasst müßte man in etwa sowas parsen:

; comment one
/* comment two */

[entity]
@component
property=one
; comment 

property = two or more
@component2
[@component]
property=three
/*four=five

six=seven*/

[@component2]

[entity2:entity]
property=something else
prop2 =new ; skip me

[entity3]

hoffe ich hab so mehr oder weniger die meisten fälle abgedeckt. Wie gesagt, das da abhängigkeiten drinne sind ist fürs parsen erstmal egal, hauptsache die sections kommen sauber raus.

Columbo

Ich dachte an etwas in der Richtung:

#include <algorithm>
#include <iostream>

#include <string>
#include <map>
#include <vector>

#include <cstring>

struct Char { char ch; };

std::istream& operator>>(std::istream& is, Char ch) {
	char c;
	if (is >> c && ch.ch != c)
		is.setstate(std::ios::failbit);
	return is;
}

std::string rtrim(std::string const& s) {
	return {begin(s), std::find_if(s.rbegin(), s.rend(), [] (char c) {return !std::isspace(c);}).base()};
}

struct CommentCharTraits {
	std::string startOL, openML, closeML;
};

struct SectionCharTraits {
	char openNameBracket;
	char closedNameBracket;
	char reqIntroducer;
	char keyValueSeparator;
};

std::string stripComments(char const* str,
                          CommentCharTraits traits = {";", "/*", "*/"})
{
	const std::size_t len  = std::strlen(str);
	std::string res; res.reserve(len);
	for (auto end = str+len; str != end; ++str)
		if (traits.startOL.compare(0, std::string::npos, str, traits.startOL.size()) == 0)
			str = std::find(str, end, '\n');
		else if (traits.openML.compare(0, std::string::npos, str, traits.openML.size()) == 0) {
			if (!(str = std::strstr(str+1, traits.closeML.c_str())))
				return res;
			str += traits.closeML.size();
		}
		else res.push_back(*str);

	return res;
}

typedef std::map<std::string, std::string> Pairs;
struct Section
{
	std::string name;
	std::vector<std::string> requirements;
	Pairs properties;
};

void read( std::istream& is, Section& s, SectionCharTraits traits )
{
	is >> Char{traits.openNameBracket};
	std::getline(is, s.name, traits.closedNameBracket);

	for (; is >> std::ws;) {
		std::string tmp;
		char peek = is.peek();
		if (peek == traits.reqIntroducer) {
			if (!std::getline(is.ignore(), tmp))
				break;
			s.requirements.push_back(std::move(tmp));
		}
		else if (peek == traits.openNameBracket)
			break;
		else {
			std::string tmp2;
			if (!std::getline(is, tmp, traits.keyValueSeparator)
			 || !std::getline(is, tmp2))
				break;
			s.properties[rtrim(tmp)] = rtrim(tmp2);
		}
	}
}

Nicht ausführlich getestet, wird wahrscheinlich ein paar Macken haben, vor allem wenn es um Fehlererkennung geht.
Könnte dann so verwendet werden:

#include <sstream>

int main() {
	std::istringstream stream(stripComments(
		R"( ; comment one
		/* comment two */

		[entity]
		@component
		property=one
		; comment

		property = two or more
		@component2
		[@component]
		property=three
		/*four=five

		six=seven*/

		[@component2]

		[entity2:entity]
		property=something else
		prop2 =new ; skip me

		[entity3])"));

	Section s;
	read(stream, s, {'[', ']', '@', '='});

	std::cout << s.name << '\n';
	for (auto&& r : s.requirements)
		std::cout << r << '\n';
	for (auto&& p : s.properties)
		std::cout << p.first << " = " << p.second << '\n';
}

Ich würde jedoch bei Spirit bleiben, der Code ist komplizierter geworden als gedacht.

Columbo

Kannes vom mobilen Geraet nicht testen, aber /*/ duerfte faelschlicherweise als Kommentar erkannt werden.

foofel

Hab auch noch mal ein bischen weiter gebastelt, aktuell: https://ideone.com/LllC26

Ich hab mal versucht das ganze über die automatische generierung laufen zu lassen. Das funktioniert wie gesagt ziemlich gut wenn sich die "@comp" und "prop=val" lines nicht vermischen aber ich hab noch immer keine ahnung wie ich das anstell wenn die nicht blockweise geordnen in der datei stehen...

Muss doch noch spirit profis hier geben