エンジニアのソフトウェア的愛情

または私は如何にして心配するのを止めてプログラムを・愛する・ようになったか

S式パーサ

無駄にでかくなってしまいました。
半端ですがさらしておきます。

できること: S式をパースしてオブジェクトにします。オブジェクトは出力できます。
(,),.,空白以外は全部識別しに使える文字扱いです。引用符とか関係ありません。

sexp_parser.h

#ifndef SEXP_PARSER_H
#define SEXP_PARSER_H

#include <iosfwd>
#include <string>

typedef std::string::const_iterator iterator;
typedef std::string::value_type     value_type;

class Sexp
{
public:
    virtual ~Sexp() {}

    virtual void write(std::ostream& out) const = 0;
};

class Atom : public Sexp
{
public:
    Atom(iterator begin, iterator end);

    void write(std::ostream& out) const;

private:
    std::string s_;
};

class Pair : public Sexp
{
public:
    Pair(Sexp* car, Sexp* cdr);
    ~Pair();

    void write(std::ostream& out) const;

private:
    Sexp* car_;
    Sexp* cdr_;
};

class Nil : public Sexp
{
public:
    void write(std::ostream& out) const;
};

std::ostream& operator << (std::ostream& out, const Sexp& sexp);

class Parser
{
public:
    Parser();

    Sexp* parse(iterator begin, iterator end);

private:
    void skipWhitespace(iterator& i) const;
    bool isAtomChar(value_type c) const;

    Sexp* readAtom(iterator& i);
    Sexp* readPair(iterator& i);
    Sexp* readList(iterator& i);
    Sexp* readSexp(iterator& i);

    Sexp* readRawList(iterator& i);

    iterator begin_;
    iterator end_;
};

#endif//SEXP_PARSER_H

sexp_parser.cpp

#include "sexp_parser.h"

#include <iostream>
#include <cctype>

static const char eos          = '\0';
static const char dot          = '.';
static const char leftBracket  = '(';
static const char rightBracket = ')';

std::ostream& operator << (std::ostream& out, const Sexp& sexp)
{
    sexp.write(out);
    return out;
}

Atom::Atom(iterator begin, iterator end) : Sexp(), s_(begin, end)
{
}

void Atom::write(std::ostream& out) const
{
    out << "[ ATOM:" << s_ << " ]";
}

Pair::Pair(Sexp* car, Sexp* cdr) : Sexp(), car_(car), cdr_(cdr)
{
}

Pair::~Pair()
{
    delete car_;
    delete cdr_;
}

void Pair::write(std::ostream& out) const
{
    out << "[ PAIR:" << *car_ << "." << *cdr_ << " ]";
}

void Nil::write(std::ostream& out) const
{
    out << "[ NIL ]";
}

Parser::Parser()
{
}

Sexp* Parser::parse(iterator begin, iterator end)
{
    begin_ = begin;
    end_   = end;

    iterator i = begin_;

    return readSexp(i);
}

void Parser::skipWhitespace(iterator& i) const
{
    while(std::isspace(*i))
    {
        ++i;
    }
}

bool Parser::isAtomChar(value_type c) const
{
    return ! ((c == eos) || (isspace(c) || (c == dot) || (c == leftBracket) || (c == rightBracket)));
}

Sexp* Parser::readAtom(iterator& i)
{
    skipWhitespace(i);

    iterator begin(i);
    while(isAtomChar(*i))
    {
        ++i;
    }

    if(begin != i)
    {
        return new Atom(begin, i);
    }
    else
    {
        i = begin;
        return 0;
    }
}

Sexp* Parser::readPair(iterator& i)
{
    skipWhitespace(i);

    iterator begin(i);
    if(*i != leftBracket)
    {
        i = begin;
        return 0;
    }
    ++i;

    skipWhitespace(i);

    Sexp* car = readSexp(i);
    if(car == 0)
    {
        i = begin;
        return 0;
    }

    skipWhitespace(i);

    if(*i != dot)
    {
        delete car;
        i = begin;
        return 0;
    }
    ++i;

    Sexp* cdr = readSexp(i);
    if(cdr == 0)
    {
        delete car;
        i = begin;
        return 0;
    }

    skipWhitespace(i);

    if(*i != rightBracket)
    {
        delete car;
        delete cdr;
        i = begin;
        return 0;
    }
    ++i;

    return new Pair(car, cdr);
}

Sexp* Parser::readList(iterator& i)
{
    skipWhitespace(i);

    iterator begin(i);
    if(*i != leftBracket)
    {
        i = begin;
        return 0;
    }
    ++i;

    Sexp* sexp = readRawList(i);

    skipWhitespace(i);

    if(sexp == 0)
    {
        if(*i != rightBracket)
        {
            delete sexp;
            i = begin;
            return 0;
        }
        ++i;

        return new Nil;
    }

    if(*i != rightBracket)
    {
        delete sexp;
        i = begin;
        return 0;
    }
    ++i;

    return sexp;
}

Sexp* Parser::readRawList(iterator& i)
{
    iterator begin(i);

    Sexp* car = readSexp(i);

    if(car == 0)
    {
        return 0;
    }

    Sexp* cdr = readRawList(i);

    if(cdr == 0)
    {
        skipWhitespace(i);
        if(*i != dot)
        {
            return new Pair(car, new Nil);
        }
        ++i;

        skipWhitespace(i);
        cdr = readSexp(i);
        if(cdr == 0)
        {
            delete car;
            delete cdr;
            i = begin;
            return 0;
        }
        return new Pair(car, cdr);
    }

    return new Pair(car, cdr);
}

Sexp* Parser::readSexp(iterator& i)
{
    Sexp* sexp;

    iterator begin(i);

    if((sexp = readAtom(i)) != 0)
    {
        return sexp;
    }

    if((sexp = readPair(i)) != 0)
    {
        return sexp;
    }

    if((sexp = readList(i)) != 0)
    {
        return sexp;
    }

    i = begin;
    return 0;
}

sexp_parserTest.cpp

#include <string>
#include <iostream>
#include <sstream>

#include "sexp_parser.h"

int main(int argc, char* argv[])
{
    for(int i = 1; i < argc; ++i)
    {
        std::string source(argv[i]);
        Parser      parser;

        Sexp* sexp = parser.parse(source.begin(), source.end());

        if(sexp != 0)
        {
            std::cout << *sexp << std::endl;
            delete sexp;
        }
        else
        {
            std::cout << "parse error : " << argv[i] << std::endl;
        }
    }

    return 0;
}