(A)(BC)(DEF)
のような文字列から括弧の中にある文字列を取り出したい、というのが動機。
Ruby
def tokenize(s, left, right) s.scan(/(?<=#{Regexp.escape(left)})[^#{Regexp.escape(right)}]+(?=#{Regexp.escape(right)})/) end
pry で試す。
pry(main)> tokenize('(A)(BC)(DEF)', '(', ')') => ["A", "BC", "DEF"]
Haskell
tokenize [] _ _ = [] tokenize (s:ss) left right | s == left = t:(tokenize r left right) where (t, (_:r)) = span (\c -> c /= right) ss
ghci で試す。
*Main> tokenize "(A)(BC)(DEF)" '(' ')' ["A","BC","DEF"]
Prolog
tokenize([], _, _, []). tokenize(Input, Left, Right, [T|TS]) :- append([Left|T], [Right|Rest], Input), tokenize(Rest, Left, Right, TS), !.
gprolog で試す。
| ?- tokenize("(A)(BC)(DEF)", 0'(, 0'), TS). TS = [[65],[66,67],[68,69,70]]
C++
#include <string> #include <iterator> #include <algorithm> template<typename iterator_t, typename container_t> std::string::const_iterator tokenize(iterator_t first, iterator_t last, char left, char right, container_t& container) { auto sink = std::back_inserter(container); iterator_t i = first; while(i != last) { if(*i != left) { break; } ++i; iterator_t j = std::find_if(i, last, [&right](char c) { return c == right; }); if(j == last) { break; } *sink = std::string(i, j); i = ++j; } return i; }
#include <vector> #include <iostream> #include <iterator> int main(int, char* []) { std::string s = "(A)(BC)(DEF)"; std::vector<std::string> ss; tokenize(s.begin(), s.end(), '(', ')', ss); std::copy(ss.begin(), ss.end(), std::ostream_iterator<std::string>(std::cout, "\n")); return 0; }
$ g++ --std=c++11 -o sample_c++ sample_c++.cpp $ ./sample_c++ A BC DEF
Boost Sprit でパーサを書いたら仰々しくなったので、何か間違えている気がしてならない。
#include <string> #include <iterator> #include <boost/spirit/include/qi.hpp> template<typename contailer_t> struct Storer { mutable std::back_insert_iterator<contailer_t> i; Storer(contailer_t& container) : i(back_inserter(container)) {} template<typename attribute_t, typename context_t> void operator () (attribute_t& attribute, context_t) const { *i = std::string(attribute.begin(), attribute.end()); } }; template<typename contailer_t> Storer<contailer_t> store(contailer_t& container) { return Storer<contailer_t>(container); } template<typename contailer_t> void tokenize(std::string s, contailer_t& container, char left, char right) { using boost::spirit::qi::char_; using boost::spirit::qi::phrase_parse; phrase_parse( s.begin(), s.end(), *(char_(left) >> (*~char_(right))[store(container)] >> char_(right)), boost::spirit::ascii::space ); }